Amesos Package Browser (Single Doxygen Collection) Development
amesos_klu_l_solve.c
Go to the documentation of this file.
00001 /* ========================================================================== */
00002 /* === KLU_solve ============================================================ */
00003 /* ========================================================================== */
00004 
00005 /* Solve Ax=b using the symbolic and numeric objects from KLU_analyze
00006  * (or KLU_analyze_given) and KLU_factor.  Note that no iterative refinement is
00007  * performed.  Uses Numeric->Xwork as workspace (undefined on input and output),
00008  * of size 4n Entry's (note that columns 2 to 4 of Xwork overlap with
00009  * Numeric->Iwork).
00010  */
00011 
00012 /* This file should make the long int version of KLU */
00013 #define DLONG 1
00014 
00015 #include "amesos_klu_internal.h"
00016 
00017 Int KLU_solve
00018 (
00019     /* inputs, not modified */
00020     KLU_symbolic *Symbolic,
00021     KLU_numeric *Numeric,
00022     Int d,        /* leading dimension of B */
00023     Int nrhs,       /* number of right-hand-sides */
00024 
00025     /* right-hand-side on input, overwritten with solution to Ax=b on output */
00026     double B [ ],     /* size n*nrhs, in column-oriented form, with
00027            * leading dimension d. */
00028     /* --------------- */
00029     KLU_common *Common
00030 )
00031 {
00032     Entry x [4], offik, s ;
00033     double rs, *Rs ;
00034     Entry *Offx, *X, *Bz, *Udiag ;
00035     Int *Q, *R, *Pnum, *Offp, *Offi, *Lip, *Uip, *Llen, *Ulen ;
00036     Unit **LUbx ;
00037     Int k1, k2, nk, k, block, pend, n, p, nblocks, chunk, nr, i ;
00038 
00039     /* ---------------------------------------------------------------------- */
00040     /* check inputs */
00041     /* ---------------------------------------------------------------------- */
00042 
00043     if (Common == NULL)
00044     {
00045   return (FALSE) ;
00046     }
00047     if (Numeric == NULL || Symbolic == NULL || d < Symbolic->n || nrhs < 0 ||
00048   B == NULL)
00049     {
00050   Common->status = KLU_INVALID ;
00051   return (FALSE) ;
00052     }
00053     Common->status = KLU_OK ;
00054 
00055     /* ---------------------------------------------------------------------- */
00056     /* get the contents of the Symbolic object */
00057     /* ---------------------------------------------------------------------- */
00058 
00059     Bz = (Entry *) B ;
00060     n = Symbolic->n ;
00061     nblocks = Symbolic->nblocks ;
00062     Q = Symbolic->Q ;
00063     R = Symbolic->R ;
00064 
00065     /* ---------------------------------------------------------------------- */
00066     /* get the contents of the Numeric object */
00067     /* ---------------------------------------------------------------------- */
00068 
00069     ASSERT (nblocks == Numeric->nblocks) ;
00070     Pnum = Numeric->Pnum ;
00071     Offp = Numeric->Offp ;
00072     Offi = Numeric->Offi ;
00073     Offx = (Entry *) Numeric->Offx ;
00074 
00075     Lip  = Numeric->Lip ;
00076     Llen = Numeric->Llen ;
00077     Uip  = Numeric->Uip ;
00078     Ulen = Numeric->Ulen ;
00079     LUbx = (Unit **) Numeric->LUbx ;
00080     Udiag = Numeric->Udiag ;
00081 
00082     Rs = Numeric->Rs ;
00083     X = (Entry *) Numeric->Xwork ;
00084 
00085     ASSERT (KLU_valid (n, Offp, Offi, Offx)) ;
00086 
00087     /* ---------------------------------------------------------------------- */
00088     /* solve in chunks of 4 columns at a time */
00089     /* ---------------------------------------------------------------------- */
00090 
00091     for (chunk = 0 ; chunk < nrhs ; chunk += 4)
00092     {
00093 
00094   /* ------------------------------------------------------------------ */
00095   /* get the size of the current chunk */
00096   /* ------------------------------------------------------------------ */
00097 
00098   nr = MIN (nrhs - chunk, 4) ;
00099 
00100   /* ------------------------------------------------------------------ */
00101   /* scale and permute the right hand side, X = P*(R\B) */
00102   /* ------------------------------------------------------------------ */
00103 
00104   if (Rs == NULL)
00105   {
00106 
00107       /* no scaling */
00108       switch (nr)
00109       {
00110 
00111     case 1:
00112 
00113         for (k = 0 ; k < n ; k++)
00114         {
00115       X [k] = Bz [Pnum [k]] ;
00116         }
00117         break ;
00118 
00119     case 2:
00120 
00121         for (k = 0 ; k < n ; k++)
00122         {
00123       i = Pnum [k] ;
00124       X [2*k    ] = Bz [i      ] ;
00125       X [2*k + 1] = Bz  [i + d  ] ;
00126         }
00127         break ;
00128 
00129     case 3:
00130 
00131         for (k = 0 ; k < n ; k++)
00132         {
00133       i = Pnum [k] ;
00134       X [3*k    ] = Bz [i      ] ;
00135       X [3*k + 1] = Bz [i + d  ] ;
00136       X [3*k + 2] = Bz [i + d*2] ;
00137         }
00138         break ;
00139 
00140     case 4:
00141 
00142         for (k = 0 ; k < n ; k++)
00143         {
00144       i = Pnum [k] ;
00145       X [4*k    ] = Bz [i      ] ;
00146       X [4*k + 1] = Bz [i + d  ] ;
00147       X [4*k + 2] = Bz [i + d*2] ;
00148       X [4*k + 3] = Bz [i + d*3] ;
00149         }
00150         break ;
00151       }
00152 
00153   }
00154   else
00155   {
00156 
00157       switch (nr)
00158       {
00159 
00160     case 1:
00161 
00162         for (k = 0 ; k < n ; k++)
00163         {
00164       SCALE_DIV_ASSIGN (X [k], Bz  [Pnum [k]], Rs [k]) ;
00165         }
00166         break ;
00167 
00168     case 2:
00169 
00170         for (k = 0 ; k < n ; k++)
00171         {
00172       i = Pnum [k] ;
00173       rs = Rs [k] ;
00174       SCALE_DIV_ASSIGN (X [2*k], Bz [i], rs) ;
00175       SCALE_DIV_ASSIGN (X [2*k + 1], Bz [i + d], rs) ;
00176         }
00177         break ;
00178 
00179     case 3:
00180 
00181         for (k = 0 ; k < n ; k++)
00182         {
00183       i = Pnum [k] ;
00184       rs = Rs [k] ;
00185       SCALE_DIV_ASSIGN (X [3*k], Bz [i], rs) ;
00186       SCALE_DIV_ASSIGN (X [3*k + 1], Bz [i + d], rs) ;
00187       SCALE_DIV_ASSIGN (X [3*k + 2], Bz [i + d*2], rs) ;
00188         }
00189         break ;
00190 
00191     case 4:
00192 
00193         for (k = 0 ; k < n ; k++)
00194         {
00195       i = Pnum [k] ;
00196       rs = Rs [k] ;
00197       SCALE_DIV_ASSIGN (X [4*k], Bz [i], rs) ;
00198       SCALE_DIV_ASSIGN (X [4*k + 1], Bz [i + d], rs) ;
00199       SCALE_DIV_ASSIGN (X [4*k + 2], Bz [i + d*2], rs) ;
00200       SCALE_DIV_ASSIGN (X [4*k + 3], Bz [i + d*3], rs) ;
00201         }
00202         break ;
00203       }
00204   }
00205 
00206   /* ------------------------------------------------------------------ */
00207   /* solve X = (L*U + Off)\X */
00208   /* ------------------------------------------------------------------ */
00209 
00210   for (block = nblocks-1 ; block >= 0 ; block--)
00211   {
00212 
00213       /* -------------------------------------------------------------- */
00214       /* the block of size nk is from rows/columns k1 to k2-1 */
00215       /* -------------------------------------------------------------- */
00216 
00217       k1 = R [block] ;
00218       k2 = R [block+1] ;
00219       nk = k2 - k1 ;
00220       PRINTF (("solve %d, k1 %d k2-1 %d nk %d\n", block, k1,k2-1,nk)) ;
00221 
00222       /* solve the block system */
00223       if (nk == 1)
00224       {
00225     s = Udiag [k1] ;
00226     switch (nr)
00227     {
00228 
00229         case 1:
00230       DIV (X [k1], X [k1], s) ;
00231       break ;
00232 
00233         case 2:
00234       DIV (X [2*k1], X [2*k1], s) ;
00235       DIV (X [2*k1 + 1], X [2*k1 + 1], s) ;
00236       break ;
00237 
00238         case 3:
00239       DIV (X [3*k1], X [3*k1], s) ;
00240       DIV (X [3*k1 + 1], X [3*k1 + 1], s) ;
00241       DIV (X [3*k1 + 2], X [3*k1 + 2], s) ;
00242       break ;
00243 
00244         case 4:
00245       DIV (X [4*k1], X [4*k1], s) ;
00246       DIV (X [4*k1 + 1], X [4*k1 + 1], s) ;
00247       DIV (X [4*k1 + 2], X [4*k1 + 2], s) ;
00248       DIV (X [4*k1 + 3], X [4*k1 + 3], s) ;
00249       break ;
00250 
00251     }
00252       }
00253       else
00254       {
00255     KLU_lsolve (nk, Lip + k1, Llen + k1, LUbx [block], nr,
00256       X + nr*k1) ;
00257     KLU_usolve (nk, Uip + k1, Ulen + k1, LUbx [block],
00258       Udiag + k1, nr, X + nr*k1) ;
00259       }
00260 
00261       /* -------------------------------------------------------------- */
00262       /* block back-substitution for the off-diagonal-block entries */
00263       /* -------------------------------------------------------------- */
00264 
00265       if (block > 0)
00266       {
00267     switch (nr)
00268     {
00269 
00270         case 1:
00271 
00272       for (k = k1 ; k < k2 ; k++)
00273       {
00274           pend = Offp [k+1] ;
00275           x [0] = X [k] ;
00276           for (p = Offp [k] ; p < pend ; p++)
00277           {
00278         MULT_SUB (X [Offi [p]], Offx [p], x [0]) ;
00279           }
00280       }
00281       break ;
00282 
00283         case 2:
00284 
00285       for (k = k1 ; k < k2 ; k++)
00286       {
00287           pend = Offp [k+1] ;
00288           x [0] = X [2*k    ] ;
00289           x [1] = X [2*k + 1] ;
00290           for (p = Offp [k] ; p < pend ; p++)
00291           {
00292         i = Offi [p] ;
00293         offik = Offx [p] ;
00294         MULT_SUB (X [2*i], offik, x [0]) ;
00295         MULT_SUB (X [2*i + 1], offik, x [1]) ;
00296           }
00297       }
00298       break ;
00299 
00300         case 3:
00301 
00302       for (k = k1 ; k < k2 ; k++)
00303       {
00304           pend = Offp [k+1] ;
00305           x [0] = X [3*k    ] ;
00306           x [1] = X [3*k + 1] ;
00307           x [2] = X [3*k + 2] ;
00308           for (p = Offp [k] ; p < pend ; p++)
00309           {
00310         i = Offi [p] ;
00311         offik = Offx [p] ;
00312         MULT_SUB (X [3*i], offik, x [0]) ;
00313         MULT_SUB (X [3*i + 1], offik, x [1]) ;
00314         MULT_SUB (X [3*i + 2], offik, x [2]) ;
00315           }
00316       }
00317       break ;
00318 
00319         case 4:
00320 
00321       for (k = k1 ; k < k2 ; k++)
00322       {
00323           pend = Offp [k+1] ;
00324           x [0] = X [4*k    ] ;
00325           x [1] = X [4*k + 1] ;
00326           x [2] = X [4*k + 2] ;
00327           x [3] = X [4*k + 3] ;
00328           for (p = Offp [k] ; p < pend ; p++)
00329           {
00330         i = Offi [p] ;
00331         offik = Offx [p] ;
00332         MULT_SUB (X [4*i], offik, x [0]) ;
00333         MULT_SUB (X [4*i + 1], offik, x [1]) ;
00334         MULT_SUB (X [4*i + 2], offik, x [2]) ;
00335         MULT_SUB (X [4*i + 3], offik, x [3]) ;
00336           }
00337       }
00338       break ;
00339     }
00340       }
00341   }
00342 
00343   /* ------------------------------------------------------------------ */
00344   /* permute the result, Bz  = Q*X */
00345   /* ------------------------------------------------------------------ */
00346 
00347   switch (nr)
00348   {
00349 
00350       case 1:
00351 
00352     for (k = 0 ; k < n ; k++)
00353     {
00354         Bz  [Q [k]] = X [k] ;
00355     }
00356     break ;
00357 
00358       case 2:
00359 
00360     for (k = 0 ; k < n ; k++)
00361     {
00362         i = Q [k] ;
00363         Bz  [i      ] = X [2*k    ] ;
00364         Bz  [i + d  ] = X [2*k + 1] ;
00365     }
00366     break ;
00367 
00368       case 3:
00369 
00370     for (k = 0 ; k < n ; k++)
00371     {
00372         i = Q [k] ;
00373         Bz  [i      ] = X [3*k    ] ;
00374         Bz  [i + d  ] = X [3*k + 1] ;
00375         Bz  [i + d*2] = X [3*k + 2] ;
00376     }
00377     break ;
00378 
00379       case 4:
00380 
00381     for (k = 0 ; k < n ; k++)
00382     {
00383         i = Q [k] ;
00384         Bz  [i      ] = X [4*k    ] ;
00385         Bz  [i + d  ] = X [4*k + 1] ;
00386         Bz  [i + d*2] = X [4*k + 2] ;
00387         Bz  [i + d*3] = X [4*k + 3] ;
00388     }
00389     break ;
00390   }
00391 
00392   /* ------------------------------------------------------------------ */
00393   /* go to the next chunk of B */
00394   /* ------------------------------------------------------------------ */
00395 
00396   Bz  += d*4 ;
00397     }
00398     return (TRUE) ;
00399 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines