Amesos Package Browser (Single Doxygen Collection) Development
amesos_klu_solve.c
Go to the documentation of this file.
00001 /* ========================================================================== */
00002 /* === KLU_solve ============================================================ */
00003 /* ========================================================================== */
00004 
00005 /* Solve Ax=b using the symbolic and numeric objects from KLU_analyze
00006  * (or KLU_analyze_given) and KLU_factor.  Note that no iterative refinement is
00007  * performed.  Uses Numeric->Xwork as workspace (undefined on input and output),
00008  * of size 4n Entry's (note that columns 2 to 4 of Xwork overlap with
00009  * Numeric->Iwork).
00010  */
00011 
00012 #include "amesos_klu_internal.h"
00013 
00014 Int KLU_solve
00015 (
00016     /* inputs, not modified */
00017     KLU_symbolic *Symbolic,
00018     KLU_numeric *Numeric,
00019     Int d,        /* leading dimension of B */
00020     Int nrhs,       /* number of right-hand-sides */
00021 
00022     /* right-hand-side on input, overwritten with solution to Ax=b on output */
00023     double B [ ],     /* size n*nrhs, in column-oriented form, with
00024            * leading dimension d. */
00025     /* --------------- */
00026     KLU_common *Common
00027 )
00028 {
00029     Entry x [4], offik, s ;
00030     double rs, *Rs ;
00031     Entry *Offx, *X, *Bz, *Udiag ;
00032     Int *Q, *R, *Pnum, *Offp, *Offi, *Lip, *Uip, *Llen, *Ulen ;
00033     Unit **LUbx ;
00034     Int k1, k2, nk, k, block, pend, n, p, nblocks, chunk, nr, i ;
00035 
00036     /* ---------------------------------------------------------------------- */
00037     /* check inputs */
00038     /* ---------------------------------------------------------------------- */
00039 
00040     if (Common == NULL)
00041     {
00042   return (FALSE) ;
00043     }
00044     if (Numeric == NULL || Symbolic == NULL || d < Symbolic->n || nrhs < 0 ||
00045   B == NULL)
00046     {
00047   Common->status = KLU_INVALID ;
00048   return (FALSE) ;
00049     }
00050     Common->status = KLU_OK ;
00051 
00052     /* ---------------------------------------------------------------------- */
00053     /* get the contents of the Symbolic object */
00054     /* ---------------------------------------------------------------------- */
00055 
00056     Bz = (Entry *) B ;
00057     n = Symbolic->n ;
00058     nblocks = Symbolic->nblocks ;
00059     Q = Symbolic->Q ;
00060     R = Symbolic->R ;
00061 
00062     /* ---------------------------------------------------------------------- */
00063     /* get the contents of the Numeric object */
00064     /* ---------------------------------------------------------------------- */
00065 
00066     ASSERT (nblocks == Numeric->nblocks) ;
00067     Pnum = Numeric->Pnum ;
00068     Offp = Numeric->Offp ;
00069     Offi = Numeric->Offi ;
00070     Offx = (Entry *) Numeric->Offx ;
00071 
00072     Lip  = Numeric->Lip ;
00073     Llen = Numeric->Llen ;
00074     Uip  = Numeric->Uip ;
00075     Ulen = Numeric->Ulen ;
00076     LUbx = (Unit **) Numeric->LUbx ;
00077     Udiag = Numeric->Udiag ;
00078 
00079     Rs = Numeric->Rs ;
00080     X = (Entry *) Numeric->Xwork ;
00081 
00082     ASSERT (KLU_valid (n, Offp, Offi, Offx)) ;
00083 
00084     /* ---------------------------------------------------------------------- */
00085     /* solve in chunks of 4 columns at a time */
00086     /* ---------------------------------------------------------------------- */
00087 
00088     for (chunk = 0 ; chunk < nrhs ; chunk += 4)
00089     {
00090 
00091   /* ------------------------------------------------------------------ */
00092   /* get the size of the current chunk */
00093   /* ------------------------------------------------------------------ */
00094 
00095   nr = MIN (nrhs - chunk, 4) ;
00096 
00097   /* ------------------------------------------------------------------ */
00098   /* scale and permute the right hand side, X = P*(R\B) */
00099   /* ------------------------------------------------------------------ */
00100 
00101   if (Rs == NULL)
00102   {
00103 
00104       /* no scaling */
00105       switch (nr)
00106       {
00107 
00108     case 1:
00109 
00110         for (k = 0 ; k < n ; k++)
00111         {
00112       X [k] = Bz [Pnum [k]] ;
00113         }
00114         break ;
00115 
00116     case 2:
00117 
00118         for (k = 0 ; k < n ; k++)
00119         {
00120       i = Pnum [k] ;
00121       X [2*k    ] = Bz [i      ] ;
00122       X [2*k + 1] = Bz  [i + d  ] ;
00123         }
00124         break ;
00125 
00126     case 3:
00127 
00128         for (k = 0 ; k < n ; k++)
00129         {
00130       i = Pnum [k] ;
00131       X [3*k    ] = Bz [i      ] ;
00132       X [3*k + 1] = Bz [i + d  ] ;
00133       X [3*k + 2] = Bz [i + d*2] ;
00134         }
00135         break ;
00136 
00137     case 4:
00138 
00139         for (k = 0 ; k < n ; k++)
00140         {
00141       i = Pnum [k] ;
00142       X [4*k    ] = Bz [i      ] ;
00143       X [4*k + 1] = Bz [i + d  ] ;
00144       X [4*k + 2] = Bz [i + d*2] ;
00145       X [4*k + 3] = Bz [i + d*3] ;
00146         }
00147         break ;
00148       }
00149 
00150   }
00151   else
00152   {
00153 
00154       switch (nr)
00155       {
00156 
00157     case 1:
00158 
00159         for (k = 0 ; k < n ; k++)
00160         {
00161       SCALE_DIV_ASSIGN (X [k], Bz  [Pnum [k]], Rs [k]) ;
00162         }
00163         break ;
00164 
00165     case 2:
00166 
00167         for (k = 0 ; k < n ; k++)
00168         {
00169       i = Pnum [k] ;
00170       rs = Rs [k] ;
00171       SCALE_DIV_ASSIGN (X [2*k], Bz [i], rs) ;
00172       SCALE_DIV_ASSIGN (X [2*k + 1], Bz [i + d], rs) ;
00173         }
00174         break ;
00175 
00176     case 3:
00177 
00178         for (k = 0 ; k < n ; k++)
00179         {
00180       i = Pnum [k] ;
00181       rs = Rs [k] ;
00182       SCALE_DIV_ASSIGN (X [3*k], Bz [i], rs) ;
00183       SCALE_DIV_ASSIGN (X [3*k + 1], Bz [i + d], rs) ;
00184       SCALE_DIV_ASSIGN (X [3*k + 2], Bz [i + d*2], rs) ;
00185         }
00186         break ;
00187 
00188     case 4:
00189 
00190         for (k = 0 ; k < n ; k++)
00191         {
00192       i = Pnum [k] ;
00193       rs = Rs [k] ;
00194       SCALE_DIV_ASSIGN (X [4*k], Bz [i], rs) ;
00195       SCALE_DIV_ASSIGN (X [4*k + 1], Bz [i + d], rs) ;
00196       SCALE_DIV_ASSIGN (X [4*k + 2], Bz [i + d*2], rs) ;
00197       SCALE_DIV_ASSIGN (X [4*k + 3], Bz [i + d*3], rs) ;
00198         }
00199         break ;
00200       }
00201   }
00202 
00203   /* ------------------------------------------------------------------ */
00204   /* solve X = (L*U + Off)\X */
00205   /* ------------------------------------------------------------------ */
00206 
00207   for (block = nblocks-1 ; block >= 0 ; block--)
00208   {
00209 
00210       /* -------------------------------------------------------------- */
00211       /* the block of size nk is from rows/columns k1 to k2-1 */
00212       /* -------------------------------------------------------------- */
00213 
00214       k1 = R [block] ;
00215       k2 = R [block+1] ;
00216       nk = k2 - k1 ;
00217       PRINTF (("solve %d, k1 %d k2-1 %d nk %d\n", block, k1,k2-1,nk)) ;
00218 
00219       /* solve the block system */
00220       if (nk == 1)
00221       {
00222     s = Udiag [k1] ;
00223     switch (nr)
00224     {
00225 
00226         case 1:
00227       DIV (X [k1], X [k1], s) ;
00228       break ;
00229 
00230         case 2:
00231       DIV (X [2*k1], X [2*k1], s) ;
00232       DIV (X [2*k1 + 1], X [2*k1 + 1], s) ;
00233       break ;
00234 
00235         case 3:
00236       DIV (X [3*k1], X [3*k1], s) ;
00237       DIV (X [3*k1 + 1], X [3*k1 + 1], s) ;
00238       DIV (X [3*k1 + 2], X [3*k1 + 2], s) ;
00239       break ;
00240 
00241         case 4:
00242       DIV (X [4*k1], X [4*k1], s) ;
00243       DIV (X [4*k1 + 1], X [4*k1 + 1], s) ;
00244       DIV (X [4*k1 + 2], X [4*k1 + 2], s) ;
00245       DIV (X [4*k1 + 3], X [4*k1 + 3], s) ;
00246       break ;
00247 
00248     }
00249       }
00250       else
00251       {
00252     KLU_lsolve (nk, Lip + k1, Llen + k1, LUbx [block], nr,
00253       X + nr*k1) ;
00254     KLU_usolve (nk, Uip + k1, Ulen + k1, LUbx [block],
00255       Udiag + k1, nr, X + nr*k1) ;
00256       }
00257 
00258       /* -------------------------------------------------------------- */
00259       /* block back-substitution for the off-diagonal-block entries */
00260       /* -------------------------------------------------------------- */
00261 
00262       if (block > 0)
00263       {
00264     switch (nr)
00265     {
00266 
00267         case 1:
00268 
00269       for (k = k1 ; k < k2 ; k++)
00270       {
00271           pend = Offp [k+1] ;
00272           x [0] = X [k] ;
00273           for (p = Offp [k] ; p < pend ; p++)
00274           {
00275         MULT_SUB (X [Offi [p]], Offx [p], x [0]) ;
00276           }
00277       }
00278       break ;
00279 
00280         case 2:
00281 
00282       for (k = k1 ; k < k2 ; k++)
00283       {
00284           pend = Offp [k+1] ;
00285           x [0] = X [2*k    ] ;
00286           x [1] = X [2*k + 1] ;
00287           for (p = Offp [k] ; p < pend ; p++)
00288           {
00289         i = Offi [p] ;
00290         offik = Offx [p] ;
00291         MULT_SUB (X [2*i], offik, x [0]) ;
00292         MULT_SUB (X [2*i + 1], offik, x [1]) ;
00293           }
00294       }
00295       break ;
00296 
00297         case 3:
00298 
00299       for (k = k1 ; k < k2 ; k++)
00300       {
00301           pend = Offp [k+1] ;
00302           x [0] = X [3*k    ] ;
00303           x [1] = X [3*k + 1] ;
00304           x [2] = X [3*k + 2] ;
00305           for (p = Offp [k] ; p < pend ; p++)
00306           {
00307         i = Offi [p] ;
00308         offik = Offx [p] ;
00309         MULT_SUB (X [3*i], offik, x [0]) ;
00310         MULT_SUB (X [3*i + 1], offik, x [1]) ;
00311         MULT_SUB (X [3*i + 2], offik, x [2]) ;
00312           }
00313       }
00314       break ;
00315 
00316         case 4:
00317 
00318       for (k = k1 ; k < k2 ; k++)
00319       {
00320           pend = Offp [k+1] ;
00321           x [0] = X [4*k    ] ;
00322           x [1] = X [4*k + 1] ;
00323           x [2] = X [4*k + 2] ;
00324           x [3] = X [4*k + 3] ;
00325           for (p = Offp [k] ; p < pend ; p++)
00326           {
00327         i = Offi [p] ;
00328         offik = Offx [p] ;
00329         MULT_SUB (X [4*i], offik, x [0]) ;
00330         MULT_SUB (X [4*i + 1], offik, x [1]) ;
00331         MULT_SUB (X [4*i + 2], offik, x [2]) ;
00332         MULT_SUB (X [4*i + 3], offik, x [3]) ;
00333           }
00334       }
00335       break ;
00336     }
00337       }
00338   }
00339 
00340   /* ------------------------------------------------------------------ */
00341   /* permute the result, Bz  = Q*X */
00342   /* ------------------------------------------------------------------ */
00343 
00344   switch (nr)
00345   {
00346 
00347       case 1:
00348 
00349     for (k = 0 ; k < n ; k++)
00350     {
00351         Bz  [Q [k]] = X [k] ;
00352     }
00353     break ;
00354 
00355       case 2:
00356 
00357     for (k = 0 ; k < n ; k++)
00358     {
00359         i = Q [k] ;
00360         Bz  [i      ] = X [2*k    ] ;
00361         Bz  [i + d  ] = X [2*k + 1] ;
00362     }
00363     break ;
00364 
00365       case 3:
00366 
00367     for (k = 0 ; k < n ; k++)
00368     {
00369         i = Q [k] ;
00370         Bz  [i      ] = X [3*k    ] ;
00371         Bz  [i + d  ] = X [3*k + 1] ;
00372         Bz  [i + d*2] = X [3*k + 2] ;
00373     }
00374     break ;
00375 
00376       case 4:
00377 
00378     for (k = 0 ; k < n ; k++)
00379     {
00380         i = Q [k] ;
00381         Bz  [i      ] = X [4*k    ] ;
00382         Bz  [i + d  ] = X [4*k + 1] ;
00383         Bz  [i + d*2] = X [4*k + 2] ;
00384         Bz  [i + d*3] = X [4*k + 3] ;
00385     }
00386     break ;
00387   }
00388 
00389   /* ------------------------------------------------------------------ */
00390   /* go to the next chunk of B */
00391   /* ------------------------------------------------------------------ */
00392 
00393   Bz  += d*4 ;
00394     }
00395     return (TRUE) ;
00396 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines