Amesos Package Browser (Single Doxygen Collection) Development
amesos_klu_factor.c
Go to the documentation of this file.
00001 /* ========================================================================== */
00002 /* === KLU_factor =========================================================== */
00003 /* ========================================================================== */
00004 
00005 /* Factor the matrix, after ordering and analyzing it with KLU_analyze
00006  * or KLU_analyze_given.
00007  */
00008 
00009 #include "amesos_klu_internal.h"
00010 
00011 /* ========================================================================== */
00012 /* === KLU_factor2 ========================================================== */
00013 /* ========================================================================== */
00014 
00015 static void factor2
00016 (
00017     /* inputs, not modified */
00018     Int Ap [ ],   /* size n+1, column pointers */
00019     Int Ai [ ],   /* size nz, row indices */
00020     Entry Ax [ ],
00021     KLU_symbolic *Symbolic,
00022 
00023     /* inputs, modified on output: */
00024     KLU_numeric *Numeric,
00025     KLU_common *Common
00026 )
00027 {
00028     double lsize ;
00029     double *Lnz, *Rs ;
00030     Int *P, *Q, *R, *Pnum, *Offp, *Offi, *Pblock, *Pinv, *Iwork,
00031   *Lip, *Uip, *Llen, *Ulen ;
00032     Entry *Offx, *X, s, *Udiag ;
00033     Unit **LUbx ;
00034     Int k1, k2, nk, k, block, oldcol, pend, oldrow, n, lnz, unz, p, newrow,
00035   nblocks, poff, nzoff, lnz_block, unz_block, scale, max_lnz_block,
00036   max_unz_block ;
00037 
00038     /* ---------------------------------------------------------------------- */
00039     /* initializations */
00040     /* ---------------------------------------------------------------------- */
00041 
00042     /* get the contents of the Symbolic object */
00043     n = Symbolic->n ;
00044     P = Symbolic->P ;
00045     Q = Symbolic->Q ;
00046     R = Symbolic->R ;
00047     Lnz = Symbolic->Lnz ;
00048     nblocks = Symbolic->nblocks ;
00049     nzoff = Symbolic->nzoff ;
00050 
00051     Pnum = Numeric->Pnum ;
00052     Offp = Numeric->Offp ;
00053     Offi = Numeric->Offi ;
00054     Offx = (Entry *) Numeric->Offx ;
00055 
00056     Lip = Numeric->Lip ;
00057     Uip = Numeric->Uip ;
00058     Llen = Numeric->Llen ;
00059     Ulen = Numeric->Ulen ;
00060     LUbx = (Unit **) Numeric->LUbx ;
00061     Udiag = Numeric->Udiag ;
00062 
00063     Rs = Numeric->Rs ;
00064     Pinv = Numeric->Pinv ;
00065     X = (Entry *) Numeric->Xwork ;    /* X is of size n */
00066     Iwork = Numeric->Iwork ;      /* 5*maxblock for KLU_factor */
00067             /* 1*maxblock for Pblock */
00068     Pblock = Iwork + 5*((size_t) Symbolic->maxblock) ;
00069     Common->nrealloc = 0 ;
00070     scale = Common->scale ;
00071     max_lnz_block = 1 ;
00072     max_unz_block = 1 ;
00073 
00074     /* compute the inverse of P from symbolic analysis.  Will be updated to
00075      * become the inverse of the numerical factorization when the factorization
00076      * is done, for use in KLU_refactor */
00077 #ifndef NDEBUG
00078     for (k = 0 ; k < n ; k++)
00079     {
00080   Pinv [k] = EMPTY ;
00081     }
00082 #endif
00083     for (k = 0 ; k < n ; k++)
00084     {
00085   ASSERT (P [k] >= 0 && P [k] < n) ;
00086   Pinv [P [k]] = k ;
00087     }
00088 #ifndef NDEBUG
00089     for (k = 0 ; k < n ; k++) ASSERT (Pinv [k] != EMPTY) ;
00090 #endif
00091 
00092     lnz = 0 ;
00093     unz = 0 ;
00094     Common->noffdiag = 0 ;
00095     Offp [0] = 0 ;
00096 
00097     /* ---------------------------------------------------------------------- */
00098     /* optionally check input matrix and compute scale factors */
00099     /* ---------------------------------------------------------------------- */
00100 
00101     if (scale >= 0)
00102     {
00103   /* use Pnum as workspace. NOTE: scale factors are not yet permuted
00104    * according to the final pivot row ordering, so Rs [oldrow] is the
00105    * scale factor for A (oldrow,:), for the user's matrix A.  Pnum is
00106    * used as workspace in KLU_scale.  When the factorization is done,
00107    * the scale factors are permuted according to the final pivot row
00108    * permutation, so that Rs [k] is the scale factor for the kth row of
00109    * A(p,q) where p and q are the final row and column permutations. */
00110   KLU_scale (scale, n, Ap, Ai, (double *) Ax, Rs, Pnum, Common) ;
00111   if (Common->status < KLU_OK)
00112   {
00113       /* matrix is invalid */
00114       return ;
00115   }
00116     }
00117 
00118 #ifndef NDEBUG
00119     if (scale > 0)
00120     {
00121   for (k = 0 ; k < n ; k++) PRINTF (("Rs [%d] %g\n", k, Rs [k])) ;
00122     }
00123 #endif
00124 
00125     /* ---------------------------------------------------------------------- */
00126     /* factor each block using klu */
00127     /* ---------------------------------------------------------------------- */
00128 
00129     for (block = 0 ; block < nblocks ; block++)
00130     {
00131 
00132   /* ------------------------------------------------------------------ */
00133   /* the block is from rows/columns k1 to k2-1 */
00134   /* ------------------------------------------------------------------ */
00135 
00136   k1 = R [block] ;
00137   k2 = R [block+1] ;
00138   nk = k2 - k1 ;
00139   PRINTF (("FACTOR BLOCK %d, k1 %d k2-1 %d nk %d\n", block, k1,k2-1,nk)) ;
00140 
00141   if (nk == 1)
00142   {
00143 
00144       /* -------------------------------------------------------------- */
00145       /* singleton case */
00146       /* -------------------------------------------------------------- */
00147 
00148       poff = Offp [k1] ;
00149       oldcol = Q [k1] ;
00150       pend = Ap [oldcol+1] ;
00151       CLEAR (s) ;
00152 
00153       if (scale <= 0)
00154       {
00155     /* no scaling */
00156     for (p = Ap [oldcol] ; p < pend ; p++)
00157     {
00158         oldrow = Ai [p] ;
00159         newrow = Pinv [oldrow] ;
00160         if (newrow < k1)
00161         {
00162       Offi [poff] = oldrow ;
00163       Offx [poff] = Ax [p] ;
00164       poff++ ;
00165         }
00166         else
00167         {
00168       ASSERT (newrow == k1) ;
00169       PRINTF (("singleton block %d", block)) ;
00170       PRINT_ENTRY (Ax [p]) ;
00171       s = Ax [p] ;
00172         }
00173     }
00174       }
00175       else
00176       {
00177     /* row scaling.  NOTE: scale factors are not yet permuted
00178      * according to the pivot row permutation, so Rs [oldrow] is
00179      * used below.  When the factorization is done, the scale
00180      * factors are permuted, so that Rs [newrow] will be used in
00181      * klu_solve, klu_tsolve, and klu_rgrowth */
00182     for (p = Ap [oldcol] ; p < pend ; p++)
00183     {
00184         oldrow = Ai [p] ;
00185         newrow = Pinv [oldrow] ;
00186         if (newrow < k1)
00187         {
00188       Offi [poff] = oldrow ;
00189       /* Offx [poff] = Ax [p] / Rs [oldrow] ; */
00190       SCALE_DIV_ASSIGN (Offx [poff], Ax [p], Rs [oldrow]) ;
00191       poff++ ;
00192         }
00193         else
00194         {
00195       ASSERT (newrow == k1) ;
00196       PRINTF (("singleton block %d ", block)) ;
00197       PRINT_ENTRY (Ax[p]) ;
00198       SCALE_DIV_ASSIGN (s, Ax [p], Rs [oldrow]) ;
00199         }
00200     }
00201       }
00202 
00203       Udiag [k1] = s ;
00204 
00205       if (IS_ZERO (s))
00206       {
00207     /* singular singleton */
00208     Common->status = KLU_SINGULAR ;
00209     Common->numerical_rank = k1 ;
00210     Common->singular_col = oldcol ;
00211     if (Common->halt_if_singular)
00212     {
00213         return ;
00214     }
00215       }
00216 
00217       Offp [k1+1] = poff ;
00218       Pnum [k1] = P [k1] ;
00219       lnz++ ;
00220       unz++ ;
00221 
00222   }
00223   else
00224   {
00225 
00226       /* -------------------------------------------------------------- */
00227       /* construct and factorize the kth block */
00228       /* -------------------------------------------------------------- */
00229 
00230       if (Lnz [block] < 0)
00231       {
00232     /* COLAMD was used - no estimate of fill-in */
00233     /* use 10 times the nnz in A, plus n */
00234     lsize = -(Common->initmem) ;
00235       }
00236       else
00237       {
00238     lsize = Common->initmem_amd * Lnz [block] + nk ;
00239       }
00240 
00241       /* allocates 1 arrays: LUbx [block] */
00242       Numeric->LUsize [block] = KLU_kernel_factor (nk, Ap, Ai, Ax, Q,
00243         lsize, &LUbx [block], Udiag + k1, Llen + k1, Ulen + k1,
00244         Lip + k1, Uip + k1, Pblock, &lnz_block, &unz_block,
00245         X, Iwork, k1, Pinv, Rs, Offp, Offi, Offx, Common) ;
00246 
00247       if (Common->status < KLU_OK ||
00248          (Common->status == KLU_SINGULAR && Common->halt_if_singular))
00249       {
00250     /* out of memory, invalid inputs, or singular */
00251     return ;
00252       }
00253 
00254       PRINTF (("\n----------------------- L %d:\n", block)) ;
00255       ASSERT (KLU_valid_LU (nk, TRUE, Lip+k1, Llen+k1, LUbx [block])) ;
00256       PRINTF (("\n----------------------- U %d:\n", block)) ;
00257       ASSERT (KLU_valid_LU (nk, FALSE, Uip+k1, Ulen+k1, LUbx [block])) ;
00258 
00259       /* -------------------------------------------------------------- */
00260       /* get statistics */
00261       /* -------------------------------------------------------------- */
00262 
00263       lnz += lnz_block ;
00264       unz += unz_block ;
00265       max_lnz_block = MAX (max_lnz_block, lnz_block) ;
00266       max_unz_block = MAX (max_unz_block, unz_block) ;
00267 
00268       if (Lnz [block] == EMPTY)
00269       {
00270     /* revise estimate for subsequent factorization */
00271     Lnz [block] = MAX (lnz_block, unz_block) ;
00272       }
00273 
00274       /* -------------------------------------------------------------- */
00275       /* combine the klu row ordering with the symbolic pre-ordering */
00276       /* -------------------------------------------------------------- */
00277 
00278       PRINTF (("Pnum, 1-based:\n")) ;
00279       for (k = 0 ; k < nk ; k++)
00280       {
00281     ASSERT (k + k1 < n) ;
00282     ASSERT (Pblock [k] + k1 < n) ;
00283     Pnum [k + k1] = P [Pblock [k] + k1] ;
00284     PRINTF (("Pnum (%d + %d + 1 = %d) = %d + 1 = %d\n",
00285         k, k1, k+k1+1, Pnum [k+k1], Pnum [k+k1]+1)) ;
00286       }
00287 
00288       /* the local pivot row permutation Pblock is no longer needed */
00289   }
00290     }
00291     ASSERT (nzoff == Offp [n]) ;
00292     PRINTF (("\n------------------- Off diagonal entries:\n")) ;
00293     ASSERT (KLU_valid (n, Offp, Offi, Offx)) ;
00294 
00295     Numeric->lnz = lnz ;
00296     Numeric->unz = unz ;
00297     Numeric->max_lnz_block = max_lnz_block ;
00298     Numeric->max_unz_block = max_unz_block ;
00299 
00300     /* compute the inverse of Pnum */
00301 #ifndef NDEBUG
00302     for (k = 0 ; k < n ; k++)
00303     {
00304   Pinv [k] = EMPTY ;
00305     }
00306 #endif
00307     for (k = 0 ; k < n ; k++)
00308     {
00309   ASSERT (Pnum [k] >= 0 && Pnum [k] < n) ;
00310   Pinv [Pnum [k]] = k ;
00311     }
00312 #ifndef NDEBUG
00313     for (k = 0 ; k < n ; k++) ASSERT (Pinv [k] != EMPTY) ;
00314 #endif
00315 
00316     /* permute scale factors Rs according to pivotal row order */
00317     if (scale > 0)
00318     {
00319   for (k = 0 ; k < n ; k++)
00320   {
00321       REAL (X [k]) = Rs [Pnum [k]] ;
00322   }
00323   for (k = 0 ; k < n ; k++)
00324   {
00325       Rs [k] = REAL (X [k]) ;
00326   }
00327     }
00328 
00329     PRINTF (("\n------------------- Off diagonal entries, old:\n")) ;
00330     ASSERT (KLU_valid (n, Offp, Offi, Offx)) ;
00331 
00332     /* apply the pivot row permutations to the off-diagonal entries */
00333     for (p = 0 ; p < nzoff ; p++)
00334     {
00335   ASSERT (Offi [p] >= 0 && Offi [p] < n) ;
00336   Offi [p] = Pinv [Offi [p]] ;
00337     }
00338 
00339     PRINTF (("\n------------------- Off diagonal entries, new:\n")) ;
00340     ASSERT (KLU_valid (n, Offp, Offi, Offx)) ;
00341 
00342 #ifndef NDEBUG
00343     {
00344   PRINTF (("\n ############# KLU_BTF_FACTOR done, nblocks %d\n",nblocks));
00345   Entry ss, *Udiag = Numeric->Udiag ;
00346   for (block = 0 ; block < nblocks && Common->status == KLU_OK ; block++)
00347   {
00348       k1 = R [block] ;
00349       k2 = R [block+1] ;
00350       nk = k2 - k1 ;
00351       PRINTF (("\n======================KLU_factor output: k1 %d k2 %d nk %d\n",k1,k2,nk)) ;
00352       if (nk == 1)
00353       {
00354     PRINTF (("singleton  ")) ;
00355     /* ENTRY_PRINT (singleton [block]) ; */
00356     ss = Udiag [k1] ;
00357     PRINT_ENTRY (ss) ;
00358       }
00359       else
00360       {
00361     Int *Lip, *Uip, *Llen, *Ulen ;
00362     Unit *LU ;
00363     Lip = Numeric->Lip + k1 ;
00364     Llen = Numeric->Llen + k1 ;
00365     LU = (Unit *) Numeric->LUbx [block] ;
00366     PRINTF (("\n---- L block %d\n", block));
00367     ASSERT (KLU_valid_LU (nk, TRUE, Lip, Llen, LU)) ;
00368     Uip = Numeric->Uip + k1 ;
00369     Ulen = Numeric->Ulen + k1 ;
00370     PRINTF (("\n---- U block %d\n", block)) ;
00371     ASSERT (KLU_valid_LU (nk, FALSE, Uip, Ulen, LU)) ;
00372       }
00373   }
00374     }
00375 #endif
00376 }
00377 
00378 
00379 
00380 /* ========================================================================== */
00381 /* === KLU_factor =========================================================== */
00382 /* ========================================================================== */
00383 
00384 KLU_numeric *KLU_factor   /* returns NULL if error, or a valid
00385            KLU_numeric object if successful */
00386 (
00387     /* --- inputs --- */
00388     Int Ap [ ],   /* size n+1, column pointers */
00389     Int Ai [ ],   /* size nz, row indices */
00390     double Ax [ ],
00391     KLU_symbolic *Symbolic,
00392     /* -------------- */
00393     KLU_common *Common
00394 )
00395 {
00396     Int n, nzoff, nblocks, maxblock, k, ok = TRUE ;
00397     Int *R ;
00398     KLU_numeric *Numeric ;
00399     size_t n1, nzoff1, s, b6, n3 ;
00400 #ifdef KLU_ENABLE_OPENMP
00401     int num_threads;
00402 #endif
00403 
00404     if (Common == NULL)
00405     {
00406   return (NULL) ;
00407     }
00408     Common->status = KLU_OK ;
00409     Common->numerical_rank = EMPTY ;
00410     Common->singular_col = EMPTY ;
00411 
00412     /* ---------------------------------------------------------------------- */
00413     /* get the contents of the Symbolic object */
00414     /* ---------------------------------------------------------------------- */
00415 
00416     /* check for a valid Symbolic object */
00417     if (Symbolic == NULL)
00418     {
00419   Common->status = KLU_INVALID ;
00420   return (NULL) ;
00421     }
00422 
00423     n = Symbolic->n ;
00424     nzoff = Symbolic->nzoff ;
00425     nblocks = Symbolic->nblocks ;
00426     maxblock = Symbolic->maxblock ;
00427     R = Symbolic->R ;
00428     PRINTF (("KLU_factor:  n %d nzoff %d nblocks %d maxblock %d\n",
00429   n, nzoff, nblocks, maxblock)) ;
00430 
00431     /* ---------------------------------------------------------------------- */
00432     /* get control parameters and make sure they are in the proper range */
00433     /* ---------------------------------------------------------------------- */
00434 
00435     Common->initmem_amd = MAX (1.0, Common->initmem_amd) ;
00436     Common->initmem = MAX (1.0, Common->initmem) ;
00437     Common->tol = MIN (Common->tol, 1.0) ;
00438     Common->tol = MAX (0.0, Common->tol) ;
00439     Common->memgrow = MAX (1.0, Common->memgrow) ;
00440 
00441     /* ---------------------------------------------------------------------- */
00442     /* allocate the Numeric object  */
00443     /* ---------------------------------------------------------------------- */
00444 
00445     /* this will not cause size_t overflow (already checked by KLU_symbolic) */
00446     n1 = ((size_t) n) + 1 ;
00447     nzoff1 = ((size_t) nzoff) + 1 ;
00448 
00449     Numeric = KLU_malloc (sizeof (KLU_numeric), 1, Common) ;
00450     if (Common->status < KLU_OK)
00451     {
00452   /* out of memory */
00453   Common->status = KLU_OUT_OF_MEMORY ;
00454   return (NULL) ;
00455     }
00456     Numeric->n = n ;
00457     Numeric->nblocks = nblocks ;
00458     Numeric->nzoff = nzoff ;
00459     Numeric->Pnum = KLU_malloc (n, sizeof (Int), Common) ;
00460     Numeric->Offp = KLU_malloc (n1, sizeof (Int), Common) ;
00461     Numeric->Offi = KLU_malloc (nzoff1, sizeof (Int), Common) ;
00462     Numeric->Offx = KLU_malloc (nzoff1, sizeof (Entry), Common) ;
00463 
00464     Numeric->Lip  = KLU_malloc (n, sizeof (Int), Common) ;
00465     Numeric->Uip  = KLU_malloc (n, sizeof (Int), Common) ;
00466     Numeric->Llen = KLU_malloc (n, sizeof (Int), Common) ;
00467     Numeric->Ulen = KLU_malloc (n, sizeof (Int), Common) ;
00468 
00469     Numeric->LUsize = KLU_malloc (nblocks, sizeof (size_t), Common) ;
00470 
00471     Numeric->LUbx = KLU_malloc (nblocks, sizeof (Unit *), Common) ;
00472     if (Numeric->LUbx != NULL)
00473     {
00474   for (k = 0 ; k < nblocks ; k++)
00475   {
00476       Numeric->LUbx [k] = NULL ;
00477   }
00478     }
00479 
00480     Numeric->Udiag = KLU_malloc (n, sizeof (Entry), Common) ;
00481 
00482     if (Common->scale > 0)
00483     {
00484   Numeric->Rs = KLU_malloc (n, sizeof (double), Common) ;
00485     }
00486     else
00487     {
00488   /* no scaling */
00489   Numeric->Rs = NULL ;
00490     }
00491 
00492     Numeric->Pinv = KLU_malloc (n, sizeof (Int), Common) ;
00493 
00494     /* allocate permanent workspace for factorization and solve.  Note that the
00495      * solver will use an Xwork of size 4n, whereas the factorization codes use
00496      * an Xwork of size n and integer space (Iwork) of size 6n. KLU_condest
00497      * uses an Xwork of size 2n.  Total size is:
00498      *
00499      *    n*sizeof(Entry) + max (6*maxblock*sizeof(Int), 3*n*sizeof(Entry))
00500      *
00501      * If OpenMP is enabled the solve uses an Xwork of size num_threads * 4n.
00502      */
00503 #ifdef KLU_ENABLE_OPENMP
00504 #pragma omp parallel
00505     num_threads = omp_get_num_threads();
00506     s = KLU_mult_size_t (n, num_threads * sizeof (Entry), &ok) ;
00507     n3 = KLU_mult_size_t (n, 3 * num_threads * sizeof (Entry), &ok) ;
00508 #else
00509     s = KLU_mult_size_t (n, sizeof (Entry), &ok) ;
00510     n3 = KLU_mult_size_t (n, 3 * sizeof (Entry), &ok) ;
00511 #endif
00512     b6 = KLU_mult_size_t (maxblock, 6 * sizeof (Int), &ok) ;
00513     Numeric->worksize = KLU_add_size_t (s, MAX (n3, b6), &ok) ;
00514     Numeric->Work = KLU_malloc (Numeric->worksize, 1, Common) ;
00515     Numeric->Xwork = Numeric->Work ;
00516     Numeric->Iwork = (Int *) ((Entry *) Numeric->Xwork + n) ;
00517     if (!ok || Common->status < KLU_OK)
00518     {
00519   /* out of memory or problem too large */
00520   Common->status = ok ? KLU_OUT_OF_MEMORY : KLU_TOO_LARGE ;
00521   KLU_free_numeric (&Numeric, Common) ;
00522   return (NULL) ;
00523     }
00524 
00525     /* ---------------------------------------------------------------------- */
00526     /* factorize the blocks */
00527     /* ---------------------------------------------------------------------- */
00528 
00529     factor2 (Ap, Ai, (Entry *) Ax, Symbolic, Numeric, Common) ;
00530 
00531     /* ---------------------------------------------------------------------- */
00532     /* return or free the Numeric object */
00533     /* ---------------------------------------------------------------------- */
00534 
00535     if (Common->status < KLU_OK)
00536     {
00537   /* out of memory or inputs invalid */
00538   KLU_free_numeric (&Numeric, Common) ;
00539     }
00540     else if (Common->status == KLU_SINGULAR)
00541     {
00542   if (Common->halt_if_singular)
00543   {
00544       /* Matrix is singular, and the Numeric object is only partially
00545        * defined because we halted early.  This is the default case for
00546        * a singular matrix. */
00547       KLU_free_numeric (&Numeric, Common) ;
00548   }
00549     }
00550     else if (Common->status == KLU_OK)
00551     {
00552   /* successful non-singular factorization */
00553   Common->numerical_rank = n ;
00554   Common->singular_col = n ;
00555     }
00556     return (Numeric) ;
00557 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines