Amesos Package Browser (Single Doxygen Collection) Development
amesos_klu_tsolve.c
Go to the documentation of this file.
00001 /* ========================================================================== */
00002 /* === KLU_tsolve =========================================================== */
00003 /* ========================================================================== */
00004 
00005 /* Solve A'x=b using the symbolic and numeric objects from KLU_analyze
00006  * (or KLU_analyze_given) and KLU_factor.  Note that no iterative refinement is
00007  * performed.  Uses Numeric->Xwork as workspace (undefined on input and output),
00008  * of size 4n Entry's (note that columns 2 to 4 of Xwork overlap with
00009  * Numeric->Iwork).
00010  */
00011 
00012 #include "amesos_klu_internal.h"
00013 
00014 Int KLU_tsolve
00015 (
00016     /* inputs, not modified */
00017     KLU_symbolic *Symbolic,
00018     KLU_numeric *Numeric,
00019     Int d,        /* leading dimension of B */
00020     Int nrhs,       /* number of right-hand-sides */
00021 
00022     /* right-hand-side on input, overwritten with solution to Ax=b on output */
00023     double B [ ],     /* size n*nrhs, in column-oriented form, with
00024            * leading dimension d. */
00025 #ifdef COMPLEX
00026     Int conj_solve,     /* TRUE for conjugate transpose solve, FALSE for
00027            * array transpose solve.  Used for the complex
00028            * case only. */
00029 #endif
00030     /* --------------- */
00031     KLU_common *Common
00032 )
00033 {
00034     Entry x [4], offik, s ;
00035     double rs, *Rs ;
00036     Entry *Offx, *X, *Bz, *Udiag ;
00037     Int *Q, *R, *Pnum, *Offp, *Offi, *Lip, *Uip, *Llen, *Ulen ;
00038     Unit **LUbx ;
00039     Int k1, k2, nk, k, block, pend, n, p, nblocks, chunk, nr, i ;
00040 
00041     /* ---------------------------------------------------------------------- */
00042     /* check inputs */
00043     /* ---------------------------------------------------------------------- */
00044 
00045     if (Common == NULL)
00046     {
00047   return (FALSE) ;
00048     }
00049     if (Numeric == NULL || Symbolic == NULL || d < Symbolic->n || nrhs < 0 ||
00050   B == NULL)
00051     {
00052   Common->status = KLU_INVALID ;
00053   return (FALSE) ;
00054     }
00055     Common->status = KLU_OK ;
00056 
00057     /* ---------------------------------------------------------------------- */
00058     /* get the contents of the Symbolic object */
00059     /* ---------------------------------------------------------------------- */
00060 
00061     Bz = (Entry *) B ;
00062     n = Symbolic->n ;
00063     nblocks = Symbolic->nblocks ;
00064     Q = Symbolic->Q ;
00065     R = Symbolic->R ;
00066 
00067     /* ---------------------------------------------------------------------- */
00068     /* get the contents of the Numeric object */
00069     /* ---------------------------------------------------------------------- */
00070 
00071     ASSERT (nblocks == Numeric->nblocks) ;
00072     Pnum = Numeric->Pnum ;
00073     Offp = Numeric->Offp ;
00074     Offi = Numeric->Offi ;
00075     Offx = (Entry *) Numeric->Offx ;
00076 
00077     Lip  = Numeric->Lip ;
00078     Llen = Numeric->Llen ;
00079     Uip  = Numeric->Uip ;
00080     Ulen = Numeric->Ulen ;
00081     LUbx = (Unit **) Numeric->LUbx ;
00082     Udiag = Numeric->Udiag ;
00083 
00084     Rs = Numeric->Rs ;
00085     X = (Entry *) Numeric->Xwork ;
00086     ASSERT (KLU_valid (n, Offp, Offi, Offx)) ;
00087 
00088     /* ---------------------------------------------------------------------- */
00089     /* solve in chunks of 4 columns at a time */
00090     /* ---------------------------------------------------------------------- */
00091 
00092     for (chunk = 0 ; chunk < nrhs ; chunk += 4)
00093     {
00094 
00095   /* ------------------------------------------------------------------ */
00096   /* get the size of the current chunk */
00097   /* ------------------------------------------------------------------ */
00098 
00099   nr = MIN (nrhs - chunk, 4) ;
00100 
00101   /* ------------------------------------------------------------------ */
00102   /* permute the right hand side, X = Q'*B */
00103   /* ------------------------------------------------------------------ */
00104 
00105   switch (nr)
00106   {
00107 
00108       case 1:
00109 
00110     for (k = 0 ; k < n ; k++)
00111     {
00112         X [k] = Bz  [Q [k]] ;
00113     }
00114     break ;
00115 
00116       case 2:
00117 
00118     for (k = 0 ; k < n ; k++)
00119     {
00120         i = Q [k] ;
00121         X [2*k    ] = Bz [i      ] ;
00122         X [2*k + 1] = Bz [i + d  ] ;
00123     }
00124     break ;
00125 
00126       case 3:
00127 
00128     for (k = 0 ; k < n ; k++)
00129     {
00130         i = Q [k] ;
00131         X [3*k    ] = Bz [i      ] ;
00132         X [3*k + 1] = Bz [i + d  ] ;
00133         X [3*k + 2] = Bz [i + d*2] ;
00134     }
00135     break ;
00136 
00137       case 4:
00138 
00139     for (k = 0 ; k < n ; k++)
00140     {
00141         i = Q [k] ;
00142         X [4*k    ] = Bz [i      ] ;
00143         X [4*k + 1] = Bz [i + d  ] ;
00144         X [4*k + 2] = Bz [i + d*2] ;
00145         X [4*k + 3] = Bz [i + d*3] ;
00146     }
00147     break ;
00148 
00149   }
00150 
00151   /* ------------------------------------------------------------------ */
00152   /* solve X = (L*U + Off)'\X */
00153   /* ------------------------------------------------------------------ */
00154 
00155   for (block = 0 ; block < nblocks ; block++)
00156   {
00157 
00158       /* -------------------------------------------------------------- */
00159       /* the block of size nk is from rows/columns k1 to k2-1 */
00160       /* -------------------------------------------------------------- */
00161 
00162       k1 = R [block] ;
00163       k2 = R [block+1] ;
00164       nk = k2 - k1 ;
00165       PRINTF (("tsolve %d, k1 %d k2-1 %d nk %d\n", block, k1,k2-1,nk)) ;
00166 
00167       /* -------------------------------------------------------------- */
00168       /* block back-substitution for the off-diagonal-block entries */
00169       /* -------------------------------------------------------------- */
00170 
00171       if (block > 0)
00172       {
00173     switch (nr)
00174         {
00175 
00176         case 1:
00177 
00178       for (k = k1 ; k < k2 ; k++)
00179       {
00180           pend = Offp [k+1] ;
00181           for (p = Offp [k] ; p < pend ; p++)
00182           {
00183 #ifdef COMPLEX
00184         if (conj_solve)
00185         {
00186             MULT_SUB_CONJ (X [k], X [Offi [p]],
00187               Offx [p]) ;
00188         }
00189         else
00190 #endif
00191         {
00192             MULT_SUB (X [k], Offx [p], X [Offi [p]]) ;
00193         }
00194           }
00195       }
00196       break ;
00197 
00198         case 2:
00199 
00200       for (k = k1 ; k < k2 ; k++)
00201       {
00202           pend = Offp [k+1] ;
00203           x [0] = X [2*k    ] ;
00204           x [1] = X [2*k + 1] ;
00205           for (p = Offp [k] ; p < pend ; p++)
00206           {
00207         i = Offi [p] ;
00208 #ifdef COMPLEX
00209         if (conj_solve)
00210         {
00211             CONJ (offik, Offx [p]) ;
00212         }
00213         else
00214 #endif
00215         {
00216             offik = Offx [p] ;
00217         }
00218         MULT_SUB (x [0], offik, X [2*i]) ;
00219         MULT_SUB (x [1], offik, X [2*i + 1]) ;
00220           }
00221           X [2*k    ] = x [0] ;
00222           X [2*k + 1] = x [1] ;
00223       }
00224       break ;
00225 
00226         case 3:
00227 
00228       for (k = k1 ; k < k2 ; k++)
00229       {
00230           pend = Offp [k+1] ;
00231           x [0] = X [3*k    ] ;
00232           x [1] = X [3*k + 1] ;
00233           x [2] = X [3*k + 2] ;
00234           for (p = Offp [k] ; p < pend ; p++)
00235           {
00236         i = Offi [p] ;
00237 #ifdef COMPLEX
00238         if (conj_solve)
00239         {
00240             CONJ (offik, Offx [p]) ;
00241         }
00242         else
00243 #endif
00244         {
00245             offik = Offx [p] ;
00246         }
00247         MULT_SUB (x [0], offik, X [3*i]) ;
00248         MULT_SUB (x [1], offik, X [3*i + 1]) ;
00249         MULT_SUB (x [2], offik, X [3*i + 2]) ;
00250           }
00251           X [3*k    ] = x [0] ;
00252           X [3*k + 1] = x [1] ;
00253           X [3*k + 2] = x [2] ;
00254       }
00255       break ;
00256 
00257         case 4:
00258 
00259       for (k = k1 ; k < k2 ; k++)
00260       {
00261           pend = Offp [k+1] ;
00262           x [0] = X [4*k    ] ;
00263           x [1] = X [4*k + 1] ;
00264           x [2] = X [4*k + 2] ;
00265           x [3] = X [4*k + 3] ;
00266           for (p = Offp [k] ; p < pend ; p++)
00267           {
00268         i = Offi [p] ;
00269 #ifdef COMPLEX
00270         if (conj_solve)
00271         {
00272             CONJ(offik, Offx [p]) ;
00273         }
00274         else
00275 #endif
00276         {
00277             offik = Offx [p] ;
00278         }
00279         MULT_SUB (x [0], offik, X [4*i]) ;
00280         MULT_SUB (x [1], offik, X [4*i + 1]) ;
00281         MULT_SUB (x [2], offik, X [4*i + 2]) ;
00282         MULT_SUB (x [3], offik, X [4*i + 3]) ;
00283           }
00284           X [4*k    ] = x [0] ;
00285           X [4*k + 1] = x [1] ;
00286           X [4*k + 2] = x [2] ;
00287           X [4*k + 3] = x [3] ;
00288       }
00289       break ;
00290         }
00291       }
00292 
00293       /* -------------------------------------------------------------- */
00294       /* solve the block system */
00295       /* -------------------------------------------------------------- */
00296 
00297       if (nk == 1)
00298       {
00299 #ifdef COMPLEX
00300     if (conj_solve)
00301     {
00302         CONJ (s, Udiag [k1]) ;
00303     }
00304     else
00305 #endif
00306     {
00307         s = Udiag [k1] ;
00308     }
00309     switch (nr)
00310     {
00311 
00312         case 1:
00313       DIV (X [k1], X [k1], s) ;
00314       break ;
00315 
00316         case 2:
00317       DIV (X [2*k1], X [2*k1], s) ;
00318       DIV (X [2*k1 + 1], X [2*k1 + 1], s) ;
00319       break ;
00320 
00321         case 3:
00322       DIV (X [3*k1], X [3*k1], s) ;
00323       DIV (X [3*k1 + 1], X [3*k1 + 1], s) ;
00324       DIV (X [3*k1 + 2], X [3*k1 + 2], s) ;
00325       break ;
00326 
00327         case 4:
00328       DIV (X [4*k1], X [4*k1], s) ;
00329       DIV (X [4*k1 + 1], X [4*k1 + 1], s) ;
00330       DIV (X [4*k1 + 2], X [4*k1 + 2], s) ;
00331       DIV (X [4*k1 + 3], X [4*k1 + 3], s) ;
00332       break ;
00333 
00334     }
00335       }
00336       else
00337       {
00338     KLU_utsolve (nk, Uip + k1, Ulen + k1, LUbx [block],
00339       Udiag + k1, nr,
00340 #ifdef COMPLEX
00341       conj_solve,
00342 #endif
00343       X + nr*k1) ;
00344     KLU_ltsolve (nk, Lip + k1, Llen + k1, LUbx [block], nr,
00345 #ifdef COMPLEX
00346       conj_solve,
00347 #endif
00348       X + nr*k1) ;
00349       }
00350   }
00351 
00352   /* ------------------------------------------------------------------ */
00353   /* scale and permute the result, Bz  = P'(R\X) */
00354   /* ------------------------------------------------------------------ */
00355 
00356   if (Rs == NULL)
00357   {
00358 
00359       /* no scaling */
00360       switch (nr)
00361       {
00362 
00363     case 1:
00364 
00365         for (k = 0 ; k < n ; k++)
00366         {
00367       Bz  [Pnum [k]] = X [k] ;
00368         }
00369         break ;
00370 
00371     case 2:
00372 
00373         for (k = 0 ; k < n ; k++)
00374         {
00375       i = Pnum [k] ;
00376       Bz  [i      ] = X [2*k    ] ;
00377       Bz  [i + d  ] = X [2*k + 1] ;
00378         }
00379         break ;
00380 
00381     case 3:
00382 
00383         for (k = 0 ; k < n ; k++)
00384         {
00385       i = Pnum [k] ;
00386       Bz  [i      ] = X [3*k    ] ;
00387       Bz  [i + d  ] = X [3*k + 1] ;
00388       Bz  [i + d*2] = X [3*k + 2] ;
00389         }
00390         break ;
00391 
00392     case 4:
00393 
00394         for (k = 0 ; k < n ; k++)
00395         {
00396       i = Pnum [k] ;
00397       Bz  [i      ] = X [4*k    ] ;
00398       Bz  [i + d  ] = X [4*k + 1] ;
00399       Bz  [i + d*2] = X [4*k + 2] ;
00400       Bz  [i + d*3] = X [4*k + 3] ;
00401         }
00402         break ;
00403       }
00404 
00405   }
00406   else
00407   {
00408 
00409       switch (nr)
00410       {
00411 
00412     case 1:
00413 
00414         for (k = 0 ; k < n ; k++)
00415         {
00416       SCALE_DIV_ASSIGN (Bz [Pnum [k]], X [k], Rs [k]) ;
00417         }
00418         break ;
00419 
00420     case 2:
00421 
00422         for (k = 0 ; k < n ; k++)
00423         {
00424       i = Pnum [k] ;
00425       rs = Rs [k] ;
00426       SCALE_DIV_ASSIGN (Bz [i], X [2*k], rs) ;
00427       SCALE_DIV_ASSIGN (Bz [i + d], X [2*k + 1], rs) ;
00428         }
00429         break ;
00430 
00431     case 3:
00432 
00433         for (k = 0 ; k < n ; k++)
00434         {
00435       i = Pnum [k] ;
00436       rs = Rs [k] ;
00437       SCALE_DIV_ASSIGN (Bz [i], X [3*k], rs) ;
00438       SCALE_DIV_ASSIGN (Bz [i + d], X [3*k + 1], rs) ;
00439       SCALE_DIV_ASSIGN (Bz [i + d*2], X [3*k + 2], rs) ;
00440         }
00441         break ;
00442 
00443     case 4:
00444 
00445         for (k = 0 ; k < n ; k++)
00446         {
00447       i = Pnum [k] ;
00448       rs = Rs [k] ;
00449       SCALE_DIV_ASSIGN (Bz [i], X [4*k], rs) ;
00450       SCALE_DIV_ASSIGN (Bz [i + d], X [4*k + 1], rs) ;
00451       SCALE_DIV_ASSIGN (Bz [i + d*2], X [4*k + 2], rs) ;
00452       SCALE_DIV_ASSIGN (Bz [i + d*3], X [4*k + 3], rs) ;
00453         }
00454         break ;
00455       }
00456   }
00457 
00458   /* ------------------------------------------------------------------ */
00459   /* go to the next chunk of B */
00460   /* ------------------------------------------------------------------ */
00461 
00462   Bz  += d*4 ;
00463     }
00464     return (TRUE) ;
00465 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines