Teuchos Package Browser (Single Doxygen Collection) Version of the Day
Teuchos_BLAS.hpp
Go to the documentation of this file.
00001 // @HEADER
00002 // ***********************************************************************
00003 //
00004 //                    Teuchos: Common Tools Package
00005 //                 Copyright (2004) Sandia Corporation
00006 //
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 //
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00038 //
00039 // ***********************************************************************
00040 // @HEADER
00041 
00042 // Kris
00043 // 06.16.03 -- Start over from scratch
00044 // 06.16.03 -- Initial templatization (Tpetra_BLAS.cpp is no longer needed)
00045 // 06.18.03 -- Changed xxxxx_() function calls to XXXXX_F77()
00046 //          -- Added warning messages for generic calls
00047 // 07.08.03 -- Move into Teuchos package/namespace
00048 // 07.24.03 -- The first iteration of BLAS generics is nearing completion. Caveats:
00049 //             * TRSM isn't finished yet; it works for one case at the moment (left side, upper tri., no transpose, no unit diag.)
00050 //             * Many of the generic implementations are quite inefficient, ugly, or both. I wrote these to be easy to debug, not for efficiency or legibility. The next iteration will improve both of these aspects as much as possible.
00051 //             * Very little verification of input parameters is done, save for the character-type arguments (TRANS, etc.) which is quite robust.
00052 //             * All of the routines that make use of both an incx and incy parameter (which includes much of the L1 BLAS) are set up to work iff incx == incy && incx > 0. Allowing for differing/negative values of incx/incy should be relatively trivial.
00053 //             * All of the L2/L3 routines assume that the entire matrix is being used (that is, if A is mxn, lda = m); they don't work on submatrices yet. This *should* be a reasonably trivial thing to fix, as well.
00054 //          -- Removed warning messages for generic calls
00055 // 08.08.03 -- TRSM now works for all cases where SIDE == L and DIAG == N. DIAG == U is implemented but does not work correctly; SIDE == R is not yet implemented.
00056 // 08.14.03 -- TRSM now works for all cases and accepts (and uses) leading-dimension information.
00057 // 09.26.03 -- character input replaced with enumerated input to cause compiling errors and not run-time errors ( suggested by RAB ).
00058 
00059 #ifndef _TEUCHOS_BLAS_HPP_
00060 #define _TEUCHOS_BLAS_HPP_
00061 
00069 /* for INTEL_CXML, the second arg may need to be changed to 'one'.  If so
00070 the appropriate declaration of one will need to be added back into
00071 functions that include the macro:
00072 */
00073 #if defined (INTEL_CXML)
00074         unsigned int one=1;
00075 #endif
00076 
00077 #ifdef CHAR_MACRO
00078 #undef CHAR_MACRO
00079 #endif
00080 #if defined (INTEL_CXML)
00081 #define CHAR_MACRO(char_var) &char_var, one
00082 #else
00083 #define CHAR_MACRO(char_var) &char_var
00084 #endif
00085 
00086 #include "Teuchos_ConfigDefs.hpp"
00087 #include "Teuchos_ScalarTraits.hpp"
00088 #include "Teuchos_OrdinalTraits.hpp"
00089 #include "Teuchos_BLAS_types.hpp"
00090 #include "Teuchos_TestForException.hpp"
00091 
00125 namespace Teuchos
00126 {
00127   extern TEUCHOS_LIB_DLL_EXPORT const char ESideChar[];
00128   extern TEUCHOS_LIB_DLL_EXPORT const char ETranspChar[];
00129   extern TEUCHOS_LIB_DLL_EXPORT const char EUploChar[];
00130   extern TEUCHOS_LIB_DLL_EXPORT const char EDiagChar[];
00131 
00133 
00138   template<typename OrdinalType, typename ScalarType>
00139   class DefaultBLASImpl
00140   {    
00141 
00142     typedef typename Teuchos::ScalarTraits<ScalarType>::magnitudeType MagnitudeType;
00143     
00144   public:
00146 
00147     
00149     inline DefaultBLASImpl(void) {}
00150 
00152     inline DefaultBLASImpl(const DefaultBLASImpl<OrdinalType, ScalarType>& /*BLAS_source*/) {}
00153 
00155     inline virtual ~DefaultBLASImpl(void) {}
00157 
00159 
00160 
00162     void ROTG(ScalarType* da, ScalarType* db, MagnitudeType* c, ScalarType* s) const;
00163 
00165     void ROT(const OrdinalType n, ScalarType* dx, const OrdinalType incx, ScalarType* dy, const OrdinalType incy, MagnitudeType* c, ScalarType* s) const;
00166 
00168     void SCAL(const OrdinalType n, const ScalarType alpha, ScalarType* x, const OrdinalType incx) const;
00169 
00171     void COPY(const OrdinalType n, const ScalarType* x, const OrdinalType incx, ScalarType* y, const OrdinalType incy) const;
00172 
00174     template <typename alpha_type, typename x_type>
00175     void AXPY(const OrdinalType n, const alpha_type alpha, const x_type* x, const OrdinalType incx, ScalarType* y, const OrdinalType incy) const;
00176 
00178     typename ScalarTraits<ScalarType>::magnitudeType ASUM(const OrdinalType n, const ScalarType* x, const OrdinalType incx) const;
00179 
00181     template <typename x_type, typename y_type>
00182     ScalarType DOT(const OrdinalType n, const x_type* x, const OrdinalType incx, const y_type* y, const OrdinalType incy) const;
00183 
00185     typename ScalarTraits<ScalarType>::magnitudeType NRM2(const OrdinalType n, const ScalarType* x, const OrdinalType incx) const;
00186 
00188     OrdinalType IAMAX(const OrdinalType n, const ScalarType* x, const OrdinalType incx) const;
00189 
00191 
00193 
00194 
00196     template <typename alpha_type, typename A_type, typename x_type, typename beta_type>
00197     void GEMV(ETransp trans, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type* A, 
00198         const OrdinalType lda, const x_type* x, const OrdinalType incx, const beta_type beta, ScalarType* y, const OrdinalType incy) const;
00199 
00201     template <typename A_type>
00202     void TRMV(EUplo uplo, ETransp trans, EDiag diag, const OrdinalType n, const A_type* A, 
00203         const OrdinalType lda, ScalarType* x, const OrdinalType incx) const;
00204 
00206     template <typename alpha_type, typename x_type, typename y_type>
00207     void GER(const OrdinalType m, const OrdinalType n, const alpha_type alpha, const x_type* x, const OrdinalType incx, 
00208        const y_type* y, const OrdinalType incy, ScalarType* A, const OrdinalType lda) const;
00210     
00212 
00213 
00215     template <typename alpha_type, typename A_type, typename B_type, typename beta_type>
00216     void GEMM(ETransp transa, ETransp transb, const OrdinalType m, const OrdinalType n, const OrdinalType k, const alpha_type alpha, const A_type* A, const OrdinalType lda, const B_type* B, const OrdinalType ldb, const beta_type beta, ScalarType* C, const OrdinalType ldc) const;
00217 
00219     template <typename alpha_type, typename A_type, typename B_type, typename beta_type>
00220     void SYMM(ESide side, EUplo uplo, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type* A, const OrdinalType lda, const B_type* B, const OrdinalType ldb, const beta_type beta, ScalarType* C, const OrdinalType ldc) const;
00221 
00223     template <typename alpha_type, typename A_type, typename beta_type>
00224     void SYRK(EUplo uplo, ETransp trans, const OrdinalType n, const OrdinalType k, const alpha_type alpha, const A_type* A, const OrdinalType lda, const beta_type beta, ScalarType* C, const OrdinalType ldc) const;
00225 
00227     template <typename alpha_type, typename A_type>
00228     void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n,
00229                 const alpha_type alpha, const A_type* A, const OrdinalType lda, ScalarType* B, const OrdinalType ldb) const;
00230 
00232     template <typename alpha_type, typename A_type>
00233     void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n,
00234                 const alpha_type alpha, const A_type* A, const OrdinalType lda, ScalarType* B, const OrdinalType ldb) const;
00236   };
00237 
00238   template<typename OrdinalType, typename ScalarType>
00239   class TEUCHOS_LIB_DLL_EXPORT BLAS : public DefaultBLASImpl<OrdinalType,ScalarType>
00240   {    
00241 
00242     typedef typename Teuchos::ScalarTraits<ScalarType>::magnitudeType MagnitudeType;
00243     
00244   public:
00246 
00247     
00249     inline BLAS(void) {}
00250 
00252     inline BLAS(const BLAS<OrdinalType, ScalarType>& /*BLAS_source*/) {}
00253 
00255     inline virtual ~BLAS(void) {}
00257   };
00258 
00259 //------------------------------------------------------------------------------------------
00260 //      LEVEL 1 BLAS ROUTINES  
00261 //------------------------------------------------------------------------------------------
00262     
00263   template<typename OrdinalType, typename ScalarType>
00264   void DefaultBLASImpl<OrdinalType, ScalarType>::ROTG(ScalarType* da, ScalarType* db, MagnitudeType* c, ScalarType* s) const
00265   {
00266     ScalarType roe, alpha;
00267     ScalarType zero = ScalarTraits<ScalarType>::zero();
00268     ScalarType one = ScalarTraits<ScalarType>::one();
00269     MagnitudeType scale, norm;
00270     MagnitudeType m_one = ScalarTraits<MagnitudeType>::one();
00271     MagnitudeType m_zero = ScalarTraits<MagnitudeType>::zero();
00272 
00273     roe = *db;
00274     if ( ScalarTraits<ScalarType>::magnitude( *da ) > ScalarTraits<ScalarType>::magnitude( *db ) ) { roe = *da; }
00275     scale = ScalarTraits<ScalarType>::magnitude( *da ) + ScalarTraits<ScalarType>::magnitude( *db );
00276     if ( scale == m_zero ) // There is nothing to do.
00277     {
00278       *c = m_one;
00279       *s = zero;
00280       *da = zero; *db = zero;
00281     } 
00282     else if ( *da == zero ) // Still nothing to do.
00283     { 
00284       *c = m_zero;
00285       *s = one;
00286       *da = *db; *db = zero;
00287     } 
00288     else 
00289     { // Compute the Givens rotation.
00290       norm = scale*ScalarTraits<ScalarType>::magnitude(ScalarTraits<ScalarType>::squareroot( ( *da/scale)*(*da/scale) + (*db/scale)*(*db/scale) ) );
00291       alpha = roe / ScalarTraits<ScalarType>::magnitude(roe);
00292       *c = ScalarTraits<ScalarType>::magnitude(*da) / norm;
00293       *s = alpha * ScalarTraits<ScalarType>::conjugate(*db) / norm;
00294       *db = one;
00295       if( ScalarTraits<ScalarType>::magnitude( *da ) > ScalarTraits<ScalarType>::magnitude( *db ) ){ *db = *s; }
00296       if( ScalarTraits<ScalarType>::magnitude( *db ) >= ScalarTraits<ScalarType>::magnitude( *da ) &&
00297      *c != ScalarTraits<MagnitudeType>::zero() ) { *db = one / *c; }
00298       *da = norm * alpha;
00299     }
00300   } /* end ROTG */
00301       
00302   template<typename OrdinalType, typename ScalarType>
00303   void DefaultBLASImpl<OrdinalType,ScalarType>::ROT(const OrdinalType n, ScalarType* dx, const OrdinalType incx, ScalarType* dy, const OrdinalType incy, MagnitudeType* c, ScalarType* s) const
00304   {
00305     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00306     ScalarType sconj = Teuchos::ScalarTraits<ScalarType>::conjugate(*s);
00307     if (n <= 0) return;
00308     if (incx==1 && incy==1) {
00309       for(OrdinalType i=0; i<n; ++i) {
00310         ScalarType temp = *c*dx[i] + sconj*dy[i];
00311         dy[i] = *c*dy[i] - sconj*dx[i];
00312         dx[i] = temp;
00313       }
00314     }
00315     else {
00316       OrdinalType ix = 0, iy = 0;
00317       if (incx < izero) ix = (-n+1)*incx;
00318       if (incy < izero) iy = (-n+1)*incy;
00319       for(OrdinalType i=0; i<n; ++i) {
00320         ScalarType temp = *c*dx[ix] + sconj*dy[iy];
00321         dy[iy] = *c*dy[iy] - sconj*dx[ix];
00322         dx[ix] = temp;
00323         ix += incx;
00324         iy += incy;
00325       }
00326     }
00327   }
00328 
00329   template<typename OrdinalType, typename ScalarType>
00330   void DefaultBLASImpl<OrdinalType, ScalarType>::SCAL(const OrdinalType n, const ScalarType alpha, ScalarType* x, const OrdinalType incx) const
00331   {
00332     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00333     OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00334     OrdinalType i, ix = izero;
00335     if ( n > izero ) {
00336         // Set the initial index (ix).
00337         if (incx < izero) { ix = (-n+ione)*incx; } 
00338         // Scale the std::vector.
00339         for(i = izero; i < n; i++)
00340         {
00341             x[ix] *= alpha;
00342             ix += incx;
00343         }
00344     }
00345   } /* end SCAL */
00346 
00347   template<typename OrdinalType, typename ScalarType>
00348   void DefaultBLASImpl<OrdinalType, ScalarType>::COPY(const OrdinalType n, const ScalarType* x, const OrdinalType incx, ScalarType* y, const OrdinalType incy) const
00349   {
00350     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00351     OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00352     OrdinalType i, ix = izero, iy = izero;
00353     if ( n > izero ) {
00354   // Set the initial indices (ix, iy).
00355       if (incx < izero) { ix = (-n+ione)*incx; }
00356       if (incy < izero) { iy = (-n+ione)*incy; }
00357 
00358         for(i = izero; i < n; i++)
00359           {
00360       y[iy] = x[ix];
00361       ix += incx;
00362       iy += incy;
00363           }
00364       }
00365   } /* end COPY */
00366 
00367   template<typename OrdinalType, typename ScalarType>
00368   template <typename alpha_type, typename x_type>
00369   void DefaultBLASImpl<OrdinalType, ScalarType>::AXPY(const OrdinalType n, const alpha_type alpha, const x_type* x, const OrdinalType incx, ScalarType* y, const OrdinalType incy) const
00370   {
00371     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00372     OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00373     OrdinalType i, ix = izero, iy = izero;
00374     if( n > izero && alpha != ScalarTraits<alpha_type>::zero())
00375       {
00376   // Set the initial indices (ix, iy).
00377       if (incx < izero) { ix = (-n+ione)*incx; }
00378       if (incy < izero) { iy = (-n+ione)*incy; }
00379 
00380         for(i = izero; i < n; i++)
00381           {
00382       y[iy] += alpha * x[ix];
00383       ix += incx;
00384       iy += incy;
00385           }
00386       }
00387   } /* end AXPY */
00388 
00389   template<typename OrdinalType, typename ScalarType>
00390   typename ScalarTraits<ScalarType>::magnitudeType DefaultBLASImpl<OrdinalType, ScalarType>::ASUM(const OrdinalType n, const ScalarType* x, const OrdinalType incx) const
00391   {
00392     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00393     OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00394     typename ScalarTraits<ScalarType>::magnitudeType result = 
00395       ScalarTraits<typename ScalarTraits<ScalarType>::magnitudeType>::zero();
00396     OrdinalType i, ix = izero;
00397     if( n > izero ) {
00398   // Set the initial indices
00399   if (incx < izero) { ix = (-n+ione)*incx; }
00400 
00401       for(i = izero; i < n; i++)
00402           {
00403       result += ScalarTraits<ScalarType>::magnitude(x[ix]);
00404       ix += incx;
00405           }
00406     } 
00407    return result;
00408   } /* end ASUM */
00409   
00410   template<typename OrdinalType, typename ScalarType>
00411   template <typename x_type, typename y_type>
00412   ScalarType DefaultBLASImpl<OrdinalType, ScalarType>::DOT(const OrdinalType n, const x_type* x, const OrdinalType incx, const y_type* y, const OrdinalType incy) const
00413   {
00414     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00415     OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00416     ScalarType result = ScalarTraits<ScalarType>::zero();
00417     OrdinalType i, ix = izero, iy = izero;
00418     if( n > izero ) 
00419       {
00420   // Set the initial indices (ix,iy).       
00421   if (incx < izero) { ix = (-n+ione)*incx; }
00422   if (incy < izero) { iy = (-n+ione)*incy; }
00423 
00424   for(i = izero; i < n; i++)
00425     {
00426       result += ScalarTraits<x_type>::conjugate(x[ix]) * y[iy];
00427       ix += incx;
00428       iy += incy;
00429     }
00430       }
00431     return result;
00432   } /* end DOT */
00433   
00434   template<typename OrdinalType, typename ScalarType>
00435   typename ScalarTraits<ScalarType>::magnitudeType DefaultBLASImpl<OrdinalType, ScalarType>::NRM2(const OrdinalType n, const ScalarType* x, const OrdinalType incx) const
00436   {
00437     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00438     OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00439     typename ScalarTraits<ScalarType>::magnitudeType result = 
00440       ScalarTraits<typename ScalarTraits<ScalarType>::magnitudeType>::zero();
00441     OrdinalType i, ix = izero;
00442     if ( n > izero ) 
00443       {
00444   // Set the initial index.
00445   if (incx < izero) { ix = (-n+ione)*incx; }  
00446     
00447   for(i = izero; i < n; i++)
00448           {
00449       result += ScalarTraits<ScalarType>::magnitude(ScalarTraits<ScalarType>::conjugate(x[ix]) * x[ix]);
00450       ix += incx;
00451           }
00452       result = ScalarTraits<typename ScalarTraits<ScalarType>::magnitudeType>::squareroot(result);
00453       } 
00454     return result;
00455   } /* end NRM2 */
00456   
00457   template<typename OrdinalType, typename ScalarType>
00458   OrdinalType DefaultBLASImpl<OrdinalType, ScalarType>::IAMAX(const OrdinalType n, const ScalarType* x, const OrdinalType incx) const
00459   {
00460     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00461     OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00462     OrdinalType result = izero, ix = izero, i;
00463     typename ScalarTraits<ScalarType>::magnitudeType maxval =
00464       ScalarTraits<typename ScalarTraits<ScalarType>::magnitudeType>::zero();
00465 
00466     if ( n > izero ) 
00467       {
00468   if (incx < izero) { ix = (-n+ione)*incx; }
00469   maxval = ScalarTraits<ScalarType>::magnitude(x[ix]);
00470   ix += incx;
00471       for(i = ione; i < n; i++)
00472           {
00473       if(ScalarTraits<ScalarType>::magnitude(x[ix]) > maxval)
00474         {
00475         result = i;
00476           maxval = ScalarTraits<ScalarType>::magnitude(x[ix]);
00477         }
00478       ix += incx;
00479     }
00480       }
00481     return result + 1; // the BLAS I?AMAX functions return 1-indexed (Fortran-style) values
00482   } /* end IAMAX */
00483 
00484 //------------------------------------------------------------------------------------------
00485 //      LEVEL 2 BLAS ROUTINES
00486 //------------------------------------------------------------------------------------------
00487   template<typename OrdinalType, typename ScalarType>
00488   template <typename alpha_type, typename A_type, typename x_type, typename beta_type>
00489   void DefaultBLASImpl<OrdinalType, ScalarType>::GEMV(ETransp trans, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type* A, const OrdinalType lda, const x_type* x, const OrdinalType incx, const beta_type beta, ScalarType* y, const OrdinalType incy) const
00490   {
00491     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00492     OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00493     alpha_type alpha_zero = ScalarTraits<alpha_type>::zero();
00494     beta_type beta_zero = ScalarTraits<beta_type>::zero();
00495     x_type x_zero = ScalarTraits<x_type>::zero();
00496     ScalarType y_zero = ScalarTraits<ScalarType>::zero();
00497     beta_type beta_one = ScalarTraits<beta_type>::one();
00498     bool noConj = true;
00499     bool BadArgument = false;
00500 
00501     // Quick return if there is nothing to do!
00502     if( m == izero || n == izero || ( alpha == alpha_zero && beta == beta_one ) ){ return; }
00503     
00504     // Otherwise, we need to check the argument list.
00505     if( m < izero ) { 
00506   std::cout << "BLAS::GEMV Error: M == " << m << std::endl;     
00507   BadArgument = true;
00508     }
00509     if( n < izero ) { 
00510   std::cout << "BLAS::GEMV Error: N == " << n << std::endl;     
00511   BadArgument = true;
00512     }
00513     if( lda < m ) { 
00514   std::cout << "BLAS::GEMV Error: LDA < MAX(1,M)"<< std::endl;      
00515   BadArgument = true;
00516     }
00517     if( incx == izero ) {
00518   std::cout << "BLAS::GEMV Error: INCX == 0"<< std::endl;
00519   BadArgument = true;
00520     }
00521     if( incy == izero ) {
00522   std::cout << "BLAS::GEMV Error: INCY == 0"<< std::endl;
00523   BadArgument = true;
00524     }
00525 
00526     if(!BadArgument) {
00527       OrdinalType i, j, lenx, leny, ix, iy, jx, jy; 
00528       OrdinalType kx = izero, ky = izero;
00529       ScalarType temp;
00530 
00531       // Determine the lengths of the vectors x and y.
00532       if(ETranspChar[trans] == 'N') {
00533   lenx = n;
00534   leny = m;
00535       } else {
00536   lenx = m;
00537   leny = n;
00538       }
00539  
00540       // Determine if this is a conjugate tranpose 
00541       noConj = (ETranspChar[trans] == 'T'); 
00542 
00543       // Set the starting pointers for the vectors x and y if incx/y < 0.
00544       if (incx < izero ) { kx =  (ione - lenx)*incx; }
00545       if (incy < izero ) { ky =  (ione - leny)*incy; }
00546 
00547       // Form y = beta*y
00548       ix = kx; iy = ky;
00549       if(beta != beta_one) {
00550   if (incy == ione) {
00551     if (beta == beta_zero) {
00552       for(i = izero; i < leny; i++) { y[i] = y_zero; }
00553     } else {
00554       for(i = izero; i < leny; i++) { y[i] *= beta; }
00555     }
00556   } else {
00557     if (beta == beta_zero) {
00558       for(i = izero; i < leny; i++) {
00559         y[iy] = y_zero;
00560         iy += incy;
00561       }
00562     } else {
00563       for(i = izero; i < leny; i++) {
00564         y[iy] *= beta;
00565         iy += incy;
00566       }
00567     }
00568   }
00569       }
00570   
00571       // Return if we don't have to do anything more.
00572       if(alpha == alpha_zero) { return; }
00573 
00574       if( ETranspChar[trans] == 'N' ) {
00575   // Form y = alpha*A*y
00576   jx = kx;
00577   if (incy == ione) {
00578     for(j = izero; j < n; j++) {
00579       if (x[jx] != x_zero) {
00580         temp = alpha*x[jx];
00581         for(i = izero; i < m; i++) {
00582     y[i] += temp*A[j*lda + i];
00583         }
00584       }
00585       jx += incx;
00586     }
00587   } else {
00588     for(j = izero; j < n; j++) {
00589       if (x[jx] != x_zero) {
00590         temp = alpha*x[jx];
00591         iy = ky;
00592         for(i = izero; i < m; i++) {
00593     y[iy] += temp*A[j*lda + i];
00594     iy += incy;
00595         }
00596       }
00597       jx += incx;
00598     }
00599   }
00600       } else {
00601   jy = ky;
00602   if (incx == ione) {
00603     for(j = izero; j < n; j++) {
00604       temp = y_zero;
00605             if ( noConj ) {
00606               for(i = izero; i < m; i++) {
00607           temp += A[j*lda + i]*x[i];
00608               }
00609             } else {
00610               for(i = izero; i < m; i++) {
00611           temp += ScalarTraits<A_type>::conjugate(A[j*lda + i])*x[i];
00612               }
00613             } 
00614       y[jy] += alpha*temp;
00615       jy += incy;
00616     }
00617   } else {
00618     for(j = izero; j < n; j++) {
00619       temp = y_zero;
00620       ix = kx;
00621             if ( noConj ) {
00622               for (i = izero; i < m; i++) {
00623           temp += A[j*lda + i]*x[ix];
00624           ix += incx;
00625               }
00626             } else {
00627               for (i = izero; i < m; i++) {
00628           temp += ScalarTraits<A_type>::conjugate(A[j*lda + i])*x[ix];
00629           ix += incx;
00630               }
00631             }
00632       y[jy] += alpha*temp;
00633       jy += incy;
00634     }
00635   }
00636       }
00637     } /* if (!BadArgument) */
00638   } /* end GEMV */
00639 
00640  template<typename OrdinalType, typename ScalarType>
00641  template <typename A_type>
00642  void DefaultBLASImpl<OrdinalType, ScalarType>::TRMV(EUplo uplo, ETransp trans, EDiag diag, const OrdinalType n, const A_type* A, const OrdinalType lda, ScalarType* x, const OrdinalType incx) const
00643   {
00644     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00645     OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00646     ScalarType zero = ScalarTraits<ScalarType>::zero();
00647     bool BadArgument = false;
00648     bool noConj = true;
00649 
00650     // Quick return if there is nothing to do!
00651     if( n == izero ){ return; }
00652     
00653     // Otherwise, we need to check the argument list.
00654     if( n < izero ) { 
00655       std::cout << "BLAS::TRMV Error: N == " << n << std::endl;     
00656       BadArgument = true;
00657     }
00658     if( lda < n ) { 
00659       std::cout << "BLAS::TRMV Error: LDA < MAX(1,N)"<< std::endl;      
00660       BadArgument = true;
00661     }
00662     if( incx == izero ) {
00663       std::cout << "BLAS::TRMV Error: INCX == 0"<< std::endl;
00664       BadArgument = true;
00665     }
00666 
00667     if(!BadArgument) {
00668       OrdinalType i, j, ix, jx, kx = izero;
00669       ScalarType temp;
00670       bool noUnit = (EDiagChar[diag] == 'N');
00671    
00672       // Determine if this is a conjugate tranpose 
00673       noConj = (ETranspChar[trans] == 'T'); 
00674 
00675       // Set the starting pointer for the std::vector x if incx < 0.
00676       if (incx < izero) { kx = (-n+ione)*incx; }
00677 
00678       // Start the operations for a nontransposed triangular matrix 
00679       if (ETranspChar[trans] == 'N') {
00680   /* Compute x = A*x */
00681   if (EUploChar[uplo] == 'U') {
00682     /* A is an upper triangular matrix */
00683     if (incx == ione) {
00684       for (j=izero; j<n; j++) {
00685         if (x[j] != zero) {
00686     temp = x[j];
00687     for (i=izero; i<j; i++) {
00688       x[i] += temp*A[j*lda + i];
00689     }
00690     if ( noUnit ) 
00691       x[j] *= A[j*lda + j];
00692         }
00693       }
00694     } else {
00695       jx = kx;
00696       for (j=izero; j<n; j++) {
00697         if (x[jx] != zero) {
00698     temp = x[jx];
00699     ix = kx;
00700     for (i=izero; i<j; i++) {
00701       x[ix] += temp*A[j*lda + i];
00702       ix += incx;
00703     }
00704     if ( noUnit )
00705       x[jx] *= A[j*lda + j];
00706         }
00707         jx += incx;
00708       }
00709     } /* if (incx == ione) */
00710   } else { /* A is a lower triangular matrix */
00711     if (incx == ione) {
00712       for (j=n-ione; j>-ione; j--) {
00713         if (x[j] != zero) {
00714     temp = x[j];
00715     for (i=n-ione; i>j; i--) {
00716       x[i] += temp*A[j*lda + i];
00717     }
00718     if ( noUnit )
00719       x[j] *= A[j*lda + j];
00720         }
00721       }
00722     } else {
00723       kx += (n-ione)*incx;
00724       jx = kx;
00725       for (j=n-ione; j>-ione; j--) {
00726         if (x[jx] != zero) {
00727     temp = x[jx];
00728     ix = kx;
00729     for (i=n-ione; i>j; i--) {
00730       x[ix] += temp*A[j*lda + i];
00731       ix -= incx;
00732     }
00733     if ( noUnit ) 
00734       x[jx] *= A[j*lda + j];
00735         }
00736         jx -= incx;
00737       }
00738     }
00739   } /* if (EUploChar[uplo]=='U') */
00740       } else { /* A is transposed/conjugated */
00741   /* Compute x = A'*x */
00742   if (EUploChar[uplo]=='U') {
00743     /* A is an upper triangular matrix */
00744     if (incx == ione) {
00745       for (j=n-ione; j>-ione; j--) {
00746         temp = x[j];
00747               if ( noConj ) {
00748                 if ( noUnit )
00749                   temp *= A[j*lda + j];
00750                 for (i=j-ione; i>-ione; i--) {
00751                   temp += A[j*lda + i]*x[i];
00752                 }
00753               } else {
00754                 if ( noUnit )
00755                   temp *= ScalarTraits<A_type>::conjugate(A[j*lda + j]);
00756                 for (i=j-ione; i>-ione; i--) {
00757                   temp += ScalarTraits<A_type>::conjugate(A[j*lda + i])*x[i];
00758                 }
00759               }
00760               x[j] = temp;
00761       }
00762     } else {
00763       jx = kx + (n-ione)*incx;
00764       for (j=n-ione; j>-ione; j--) {
00765         temp = x[jx];
00766         ix = jx;
00767               if ( noConj ) {
00768                 if ( noUnit )
00769                   temp *= A[j*lda + j];
00770                 for (i=j-ione; i>-ione; i--) {
00771                   ix -= incx;
00772                   temp += A[j*lda + i]*x[ix];
00773           }
00774               } else {
00775                 if ( noUnit )
00776                   temp *= ScalarTraits<A_type>::conjugate(A[j*lda + j]);
00777                 for (i=j-ione; i>-ione; i--) {
00778                   ix -= incx;
00779                   temp += ScalarTraits<A_type>::conjugate(A[j*lda + i])*x[ix];
00780           }
00781               }
00782         x[jx] = temp;
00783         jx -= incx;
00784       }
00785     }
00786   } else {
00787     /* A is a lower triangular matrix */
00788     if (incx == ione) {
00789       for (j=izero; j<n; j++) {
00790         temp = x[j];
00791               if ( noConj ) {
00792                 if ( noUnit )
00793                   temp *= A[j*lda + j];
00794                 for (i=j+ione; i<n; i++) {
00795                   temp += A[j*lda + i]*x[i];
00796                 }
00797               } else {
00798                 if ( noUnit )
00799                   temp *= ScalarTraits<A_type>::conjugate(A[j*lda + j]);
00800                 for (i=j+ione; i<n; i++) {
00801                   temp += ScalarTraits<A_type>::conjugate(A[j*lda + i])*x[i];
00802                 }
00803               }
00804         x[j] = temp;
00805       }
00806     } else {
00807       jx = kx;
00808       for (j=izero; j<n; j++) {
00809         temp = x[jx];
00810         ix = jx;
00811               if ( noConj ) {
00812                 if ( noUnit ) 
00813                   temp *= A[j*lda + j];
00814                 for (i=j+ione; i<n; i++) {
00815                   ix += incx;
00816                   temp += A[j*lda + i]*x[ix];
00817                 }
00818               } else {
00819                 if ( noUnit ) 
00820                   temp *= ScalarTraits<A_type>::conjugate(A[j*lda + j]);
00821                 for (i=j+ione; i<n; i++) {
00822                   ix += incx;
00823                   temp += ScalarTraits<A_type>::conjugate(A[j*lda + i])*x[ix];
00824                 }
00825               }
00826         x[jx] = temp;
00827         jx += incx;       
00828       }
00829     }
00830   } /* if (EUploChar[uplo]=='U') */
00831       } /* if (ETranspChar[trans]=='N') */
00832     } /* if (!BadArgument) */
00833   } /* end TRMV */
00834         
00835   template<typename OrdinalType, typename ScalarType>
00836   template <typename alpha_type, typename x_type, typename y_type>
00837   void DefaultBLASImpl<OrdinalType, ScalarType>::GER(const OrdinalType m, const OrdinalType n, const alpha_type alpha, const x_type* x, const OrdinalType incx, const y_type* y, const OrdinalType incy, ScalarType* A, const OrdinalType lda) const
00838   {
00839     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00840     OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00841     alpha_type alpha_zero = ScalarTraits<alpha_type>::zero();
00842     y_type y_zero = ScalarTraits<y_type>::zero();
00843     bool BadArgument = false;
00844 
00845     TEST_FOR_EXCEPTION(Teuchos::ScalarTraits<ScalarType>::isComplex, std::logic_error,
00846       "Teuchos::BLAS::GER() does not currently support complex data types.");
00847 
00848     // Quick return if there is nothing to do!
00849     if( m == izero || n == izero || alpha == alpha_zero ){ return; }
00850     
00851     // Otherwise, we need to check the argument list.
00852     if( m < izero ) { 
00853   std::cout << "BLAS::GER Error: M == " << m << std::endl;      
00854   BadArgument = true;
00855     }
00856     if( n < izero ) { 
00857   std::cout << "BLAS::GER Error: N == " << n << std::endl;      
00858   BadArgument = true;
00859     }
00860     if( lda < m ) { 
00861   std::cout << "BLAS::GER Error: LDA < MAX(1,M)"<< std::endl;     
00862   BadArgument = true;
00863     }
00864     if( incx == 0 ) {
00865   std::cout << "BLAS::GER Error: INCX == 0"<< std::endl;
00866   BadArgument = true;
00867     }
00868     if( incy == 0 ) {
00869   std::cout << "BLAS::GER Error: INCY == 0"<< std::endl;
00870   BadArgument = true;
00871     }
00872 
00873     if(!BadArgument) {
00874       OrdinalType i, j, ix, jy = izero, kx = izero;
00875       ScalarType temp;
00876 
00877       // Set the starting pointers for the vectors x and y if incx/y < 0.
00878       if (incx < izero) { kx = (-m+ione)*incx; }
00879       if (incy < izero) { jy = (-n+ione)*incy; }
00880 
00881       // Start the operations for incx == 1
00882       if( incx == ione ) {
00883   for( j=izero; j<n; j++ ) {
00884     if ( y[jy] != y_zero ) {
00885       temp = alpha*y[jy];
00886       for ( i=izero; i<m; i++ ) {
00887         A[j*lda + i] += x[i]*temp;
00888       }
00889     }
00890     jy += incy;
00891   }
00892       } 
00893       else { // Start the operations for incx != 1
00894   for( j=izero; j<n; j++ ) {
00895     if ( y[jy] != y_zero ) {
00896       temp = alpha*y[jy];
00897       ix = kx;
00898       for( i=izero; i<m; i++ ) {
00899         A[j*lda + i] += x[ix]*temp;
00900         ix += incx;
00901       }
00902     }
00903     jy += incy;
00904   }
00905       }
00906     } /* if(!BadArgument) */
00907   } /* end GER */
00908   
00909 //------------------------------------------------------------------------------------------
00910 //      LEVEL 3 BLAS ROUTINES
00911 //------------------------------------------------------------------------------------------
00912         
00913   template<typename OrdinalType, typename ScalarType>
00914   template <typename alpha_type, typename A_type, typename B_type, typename beta_type>
00915   void DefaultBLASImpl<OrdinalType, ScalarType>::GEMM(ETransp transa, ETransp transb, const OrdinalType m, const OrdinalType n, const OrdinalType k, const alpha_type alpha, const A_type* A, const OrdinalType lda, const B_type* B, const OrdinalType ldb, const beta_type beta, ScalarType* C, const OrdinalType ldc) const
00916   {
00917 
00918     typedef TypeNameTraits<OrdinalType> OTNT;
00919     typedef TypeNameTraits<ScalarType> STNT;
00920 
00921     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00922     alpha_type alpha_zero = ScalarTraits<alpha_type>::zero();
00923     beta_type beta_zero = ScalarTraits<beta_type>::zero();
00924     B_type B_zero = ScalarTraits<B_type>::zero();
00925     ScalarType C_zero = ScalarTraits<ScalarType>::zero();
00926     beta_type beta_one = ScalarTraits<beta_type>::one();
00927     OrdinalType i, j, p;
00928     OrdinalType NRowA = m, NRowB = k;
00929     ScalarType temp;
00930     bool BadArgument = false;
00931     bool conjA = false, conjB = false;
00932 
00933     // Change dimensions of matrix if either matrix is transposed
00934     if( !(ETranspChar[transa]=='N') ) {
00935       NRowA = k;
00936     }
00937     if( !(ETranspChar[transb]=='N') ) {
00938       NRowB = n;
00939     }
00940 
00941     // Quick return if there is nothing to do!
00942     if( (m==izero) || (n==izero) || (((alpha==alpha_zero)||(k==izero)) && (beta==beta_one)) ){ return; }
00943     if( m < izero ) { 
00944       std::cout << "BLAS::GEMM Error: M == " << m << std::endl;     
00945       BadArgument = true;
00946     }
00947     if( n < izero ) { 
00948       std::cout << "BLAS::GEMM Error: N == " << n << std::endl;     
00949       BadArgument = true;
00950     }
00951     if( k < izero ) { 
00952       std::cout << "BLAS::GEMM Error: K == " << k << std::endl;     
00953       BadArgument = true;
00954     }
00955     if( lda < NRowA ) { 
00956       std::cout << "BLAS::GEMM Error: LDA < "<<NRowA<<std::endl;      
00957       BadArgument = true;
00958     }
00959     if( ldb < NRowB ) { 
00960       std::cout << "BLAS::GEMM Error: LDB < "<<NRowB<<std::endl;      
00961       BadArgument = true;
00962     }
00963      if( ldc < m ) { 
00964       std::cout << "BLAS::GEMM Error: LDC < MAX(1,M)"<< std::endl;      
00965       BadArgument = true;
00966     }
00967 
00968     if(!BadArgument) {
00969 
00970       // Determine if this is a conjugate tranpose 
00971       conjA = (ETranspChar[transa] == 'C'); 
00972       conjB = (ETranspChar[transb] == 'C'); 
00973 
00974       // Only need to scale the resulting matrix C.
00975       if( alpha == alpha_zero ) {
00976   if( beta == beta_zero ) {
00977     for (j=izero; j<n; j++) {
00978       for (i=izero; i<m; i++) {
00979         C[j*ldc + i] = C_zero;
00980       }
00981     }
00982   } else {
00983     for (j=izero; j<n; j++) {
00984       for (i=izero; i<m; i++) {
00985         C[j*ldc + i] *= beta;
00986       }
00987     }
00988   }
00989   return;
00990       }
00991       //
00992       // Now start the operations.
00993       //
00994       if ( ETranspChar[transb]=='N' ) {
00995   if ( ETranspChar[transa]=='N' ) {
00996     // Compute C = alpha*A*B + beta*C
00997     for (j=izero; j<n; j++) {
00998       if( beta == beta_zero ) {
00999         for (i=izero; i<m; i++){
01000     C[j*ldc + i] = C_zero;
01001         }
01002       } else if( beta != beta_one ) {
01003         for (i=izero; i<m; i++){
01004     C[j*ldc + i] *= beta;
01005         }
01006       }
01007       for (p=izero; p<k; p++){
01008         if (B[j*ldb + p] != B_zero ){
01009     temp = alpha*B[j*ldb + p];
01010     for (i=izero; i<m; i++) {
01011       C[j*ldc + i] += temp*A[p*lda + i];
01012     }
01013         }
01014       }
01015     }
01016   } else if ( conjA ) {
01017           // Compute C = alpha*conj(A')*B + beta*C
01018           for (j=izero; j<n; j++) {
01019             for (i=izero; i<m; i++) {
01020               temp = C_zero;
01021               for (p=izero; p<k; p++) {
01022                 temp += ScalarTraits<A_type>::conjugate(A[i*lda + p])*B[j*ldb + p];
01023               }
01024               if (beta == beta_zero) {
01025                 C[j*ldc + i] = alpha*temp;
01026               } else {
01027                 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
01028               }
01029             }
01030           }
01031         } else {
01032     // Compute C = alpha*A'*B + beta*C
01033     for (j=izero; j<n; j++) {
01034       for (i=izero; i<m; i++) {
01035         temp = C_zero;
01036         for (p=izero; p<k; p++) {
01037     temp += A[i*lda + p]*B[j*ldb + p];
01038         }
01039         if (beta == beta_zero) {
01040     C[j*ldc + i] = alpha*temp;
01041         } else {
01042     C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
01043         }
01044       }
01045     }
01046   }
01047       } else if ( ETranspChar[transa]=='N' ) {
01048         if ( conjB ) {
01049           // Compute C = alpha*A*conj(B') + beta*C
01050           for (j=izero; j<n; j++) {
01051             if (beta == beta_zero) {
01052               for (i=izero; i<m; i++) {
01053                 C[j*ldc + i] = C_zero;
01054               }
01055             } else if ( beta != beta_one ) {
01056               for (i=izero; i<m; i++) {
01057                 C[j*ldc + i] *= beta;
01058               }
01059             }
01060             for (p=izero; p<k; p++) {
01061               if (B[p*ldb + j] != B_zero) {
01062                 temp = alpha*ScalarTraits<B_type>::conjugate(B[p*ldb + j]);
01063                 for (i=izero; i<m; i++) {
01064                   C[j*ldc + i] += temp*A[p*lda + i];
01065                 }
01066               }
01067             }
01068           }
01069         } else {
01070     // Compute C = alpha*A*B' + beta*C
01071     for (j=izero; j<n; j++) {
01072       if (beta == beta_zero) {
01073         for (i=izero; i<m; i++) {
01074     C[j*ldc + i] = C_zero;
01075         } 
01076       } else if ( beta != beta_one ) {
01077         for (i=izero; i<m; i++) {
01078     C[j*ldc + i] *= beta;
01079         }
01080       }
01081       for (p=izero; p<k; p++) {
01082         if (B[p*ldb + j] != B_zero) {
01083     temp = alpha*B[p*ldb + j];
01084     for (i=izero; i<m; i++) {
01085       C[j*ldc + i] += temp*A[p*lda + i];
01086     }
01087         }
01088       }
01089     }
01090   } 
01091       } else if ( conjA ) {
01092         if ( conjB ) {
01093           // Compute C = alpha*conj(A')*conj(B') + beta*C
01094           for (j=izero; j<n; j++) {
01095             for (i=izero; i<m; i++) {
01096               temp = C_zero;
01097               for (p=izero; p<k; p++) {
01098                 temp += ScalarTraits<A_type>::conjugate(A[i*lda + p])
01099                       * ScalarTraits<B_type>::conjugate(B[p*ldb + j]);
01100               }
01101               if (beta == beta_zero) {
01102                 C[j*ldc + i] = alpha*temp;
01103               } else {
01104                 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
01105               }
01106             }
01107           }
01108         } else {
01109           // Compute C = alpha*conj(A')*B' + beta*C
01110           for (j=izero; j<n; j++) {
01111             for (i=izero; i<m; i++) {
01112               temp = C_zero;
01113               for (p=izero; p<k; p++) {
01114                 temp += ScalarTraits<A_type>::conjugate(A[i*lda + p])
01115                       * B[p*ldb + j];
01116               }
01117               if (beta == beta_zero) {
01118                 C[j*ldc + i] = alpha*temp;
01119               } else {
01120                 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
01121               }
01122             }
01123           }
01124         }
01125      } else {
01126        if ( conjB ) {
01127          // Compute C = alpha*A'*conj(B') + beta*C
01128          for (j=izero; j<n; j++) {
01129             for (i=izero; i<m; i++) {
01130               temp = C_zero;
01131               for (p=izero; p<k; p++) {
01132                 temp += A[i*lda + p]
01133                       * ScalarTraits<B_type>::conjugate(B[p*ldb + j]);
01134               }
01135               if (beta == beta_zero) {
01136                 C[j*ldc + i] = alpha*temp;
01137               } else {
01138                 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
01139               }
01140             }
01141           }
01142         } else {
01143     // Compute C = alpha*A'*B' + beta*C
01144     for (j=izero; j<n; j++) {
01145       for (i=izero; i<m; i++) {
01146         temp = C_zero;
01147         for (p=izero; p<k; p++) {
01148     temp += A[i*lda + p]*B[p*ldb + j];
01149         }
01150         if (beta == beta_zero) {
01151     C[j*ldc + i] = alpha*temp;
01152         } else {
01153     C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
01154         }
01155       }
01156     }
01157   } // end if (ETranspChar[transa]=='N') ...
01158       } // end if (ETranspChar[transb]=='N') ...
01159     } // end if (!BadArgument) ...
01160   } // end of GEMM
01161 
01162 
01163   template<typename OrdinalType, typename ScalarType>
01164   template <typename alpha_type, typename A_type, typename B_type, typename beta_type>
01165   void DefaultBLASImpl<OrdinalType, ScalarType>::SYMM(ESide side, EUplo uplo, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type* A, const OrdinalType lda, const B_type* B, const OrdinalType ldb, const beta_type beta, ScalarType* C, const OrdinalType ldc) const
01166   {
01167     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
01168     OrdinalType ione = OrdinalTraits<OrdinalType>::one();
01169     alpha_type alpha_zero = ScalarTraits<alpha_type>::zero();
01170     beta_type beta_zero = ScalarTraits<beta_type>::zero();
01171     ScalarType C_zero = ScalarTraits<ScalarType>::zero();
01172     beta_type beta_one = ScalarTraits<beta_type>::one();
01173     OrdinalType i, j, k, NRowA = m;
01174     ScalarType temp1, temp2;
01175     bool BadArgument = false;
01176     bool Upper = (EUploChar[uplo] == 'U');
01177     if (ESideChar[side] == 'R') { NRowA = n; }
01178 
01179     // Quick return.
01180     if ( (m==izero) || (n==izero) || ( (alpha==alpha_zero)&&(beta==beta_one) ) ) { return; }
01181     if( m < izero ) { 
01182       std::cout << "BLAS::SYMM Error: M == "<< m << std::endl;
01183       BadArgument = true; }
01184     if( n < izero ) {
01185       std::cout << "BLAS::SYMM Error: N == "<< n << std::endl;
01186       BadArgument = true; }
01187     if( lda < NRowA ) {
01188       std::cout << "BLAS::SYMM Error: LDA < "<<NRowA<<std::endl;
01189       BadArgument = true; }
01190     if( ldb < m ) {
01191       std::cout << "BLAS::SYMM Error: LDB < MAX(1,M)"<<std::endl;
01192       BadArgument = true; }
01193     if( ldc < m ) {
01194       std::cout << "BLAS::SYMM Error: LDC < MAX(1,M)"<<std::endl;
01195       BadArgument = true; }
01196 
01197     if(!BadArgument) {
01198 
01199       // Only need to scale C and return.
01200       if (alpha == alpha_zero) {
01201   if (beta == beta_zero ) {
01202     for (j=izero; j<n; j++) {
01203       for (i=izero; i<m; i++) {
01204         C[j*ldc + i] = C_zero;
01205       }
01206     }
01207   } else {
01208     for (j=izero; j<n; j++) {
01209       for (i=izero; i<m; i++) {
01210         C[j*ldc + i] *= beta;
01211       }
01212     }
01213   }
01214   return;
01215       }
01216 
01217       if ( ESideChar[side] == 'L') {
01218   // Compute C = alpha*A*B + beta*C
01219 
01220   if (Upper) {
01221     // The symmetric part of A is stored in the upper triangular part of the matrix.
01222     for (j=izero; j<n; j++) {
01223       for (i=izero; i<m; i++) {
01224         temp1 = alpha*B[j*ldb + i];
01225         temp2 = C_zero;
01226         for (k=izero; k<i; k++) {
01227     C[j*ldc + k] += temp1*A[i*lda + k];
01228     temp2 += B[j*ldb + k]*A[i*lda + k];
01229         }
01230         if (beta == beta_zero) {
01231     C[j*ldc + i] = temp1*A[i*lda + i] + alpha*temp2;
01232         } else {
01233     C[j*ldc + i] = beta*C[j*ldc + i] + temp1*A[i*lda + i] + alpha*temp2;
01234         }
01235       }
01236     }
01237   } else {
01238     // The symmetric part of A is stored in the lower triangular part of the matrix.
01239     for (j=izero; j<n; j++) {
01240       for (i=m-ione; i>-ione; i--) {
01241         temp1 = alpha*B[j*ldb + i];
01242         temp2 = C_zero;
01243         for (k=i+ione; k<m; k++) {
01244     C[j*ldc + k] += temp1*A[i*lda + k];
01245     temp2 += B[j*ldb + k]*A[i*lda + k];
01246         }
01247         if (beta == beta_zero) {
01248     C[j*ldc + i] = temp1*A[i*lda + i] + alpha*temp2;
01249         } else {
01250     C[j*ldc + i] = beta*C[j*ldc + i] + temp1*A[i*lda + i] + alpha*temp2;
01251         }
01252       }
01253     }
01254   }
01255       } else {
01256   // Compute C = alpha*B*A + beta*C.
01257   for (j=izero; j<n; j++) {
01258     temp1 = alpha*A[j*lda + j];
01259     if (beta == beta_zero) {
01260       for (i=izero; i<m; i++) {
01261         C[j*ldc + i] = temp1*B[j*ldb + i];
01262       }
01263     } else {
01264       for (i=izero; i<m; i++) {
01265         C[j*ldc + i] = beta*C[j*ldc + i] + temp1*B[j*ldb + i];
01266       }
01267     }
01268     for (k=izero; k<j; k++) {
01269       if (Upper) {
01270         temp1 = alpha*A[j*lda + k];
01271       } else {
01272         temp1 = alpha*A[k*lda + j];
01273       }
01274       for (i=izero; i<m; i++) {
01275         C[j*ldc + i] += temp1*B[k*ldb + i];
01276       }
01277     }
01278     for (k=j+ione; k<n; k++) {
01279       if (Upper) {
01280         temp1 = alpha*A[k*lda + j];
01281       } else {
01282         temp1 = alpha*A[j*lda + k];
01283       }
01284       for (i=izero; i<m; i++) {
01285         C[j*ldc + i] += temp1*B[k*ldb + i];
01286       }
01287     }
01288   }
01289       } // end if (ESideChar[side]=='L') ...
01290     } // end if(!BadArgument) ...
01291 } // end SYMM
01292 
01293   template<typename OrdinalType, typename ScalarType>
01294   template <typename alpha_type, typename A_type, typename beta_type>
01295   void DefaultBLASImpl<OrdinalType, ScalarType>::SYRK(EUplo uplo, ETransp trans, const OrdinalType n, const OrdinalType k, const alpha_type alpha, const A_type* A, const OrdinalType lda, const beta_type beta, ScalarType* C, const OrdinalType ldc) const
01296   {
01297     typedef TypeNameTraits<OrdinalType> OTNT;
01298     typedef TypeNameTraits<ScalarType> STNT;
01299 
01300     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
01301     alpha_type alpha_zero = ScalarTraits<alpha_type>::zero();
01302     beta_type beta_zero = ScalarTraits<beta_type>::zero();
01303     beta_type beta_one = ScalarTraits<beta_type>::one();
01304     A_type temp, A_zero = ScalarTraits<A_type>::zero();
01305     ScalarType C_zero = ScalarTraits<ScalarType>::zero();
01306     OrdinalType i, j, l, NRowA = n;
01307     bool BadArgument = false;
01308     bool Upper = (EUploChar[uplo] == 'U');
01309 
01310     TEST_FOR_EXCEPTION(
01311       Teuchos::ScalarTraits<ScalarType>::isComplex
01312       && (trans == CONJ_TRANS),
01313       std::logic_error,
01314             "Teuchos::BLAS<"<<OTNT::name()<<","<<STNT::name()<<">::SYRK()"
01315             " does not support CONJ_TRANS for complex data types."
01316       );
01317 
01318     // Change dimensions of A matrix is transposed
01319     if( !(ETranspChar[trans]=='N') ) {
01320       NRowA = k;
01321     }
01322 
01323     // Quick return.
01324     if ( n==izero ) { return; }
01325     if ( ( (alpha==alpha_zero) || (k==izero) )  && (beta==beta_one) ) { return; }
01326     if( n < izero ) {
01327       std::cout << "BLAS::SYRK Error: N == "<< n <<std::endl;
01328       BadArgument = true; }
01329     if( k < izero ) {
01330       std::cout << "BLAS::SYRK Error: K == "<< k <<std::endl;
01331       BadArgument = true; }
01332     if( lda < NRowA ) {
01333       std::cout << "BLAS::SYRK Error: LDA < "<<NRowA<<std::endl;
01334       BadArgument = true; }
01335     if( ldc < n ) {
01336       std::cout << "BLAS::SYRK Error: LDC < MAX(1,N)"<<std::endl;
01337       BadArgument = true; }
01338 
01339     if(!BadArgument) {
01340 
01341       // Scale C when alpha is zero
01342       if (alpha == alpha_zero) {
01343         if (Upper) {
01344           if (beta==beta_zero) {      
01345             for (j=izero; j<n; j++) {
01346               for (i=izero; i<=j; i++) {
01347           C[j*ldc + i] = C_zero;
01348               }
01349             }
01350           } 
01351           else {
01352             for (j=izero; j<n; j++) {
01353               for (i=izero; i<=j; i++) {
01354                 C[j*ldc + i] *= beta;
01355               }
01356             }
01357           }
01358         }
01359         else {
01360           if (beta==beta_zero) {      
01361             for (j=izero; j<n; j++) {
01362               for (i=j; i<n; i++) {
01363                 C[j*ldc + i] = C_zero;
01364               }
01365             }
01366           } 
01367           else {
01368             for (j=izero; j<n; j++) {
01369               for (i=j; i<n; i++) {
01370                 C[j*ldc + i] *= beta;
01371               }
01372             }
01373           }
01374         }
01375         return;
01376       }
01377   
01378       // Now we can start the computation
01379 
01380       if ( ETranspChar[trans]=='N' ) {
01381   
01382         // Form C <- alpha*A*A' + beta*C
01383         if (Upper) {
01384           for (j=izero; j<n; j++) {
01385             if (beta==beta_zero) {
01386               for (i=izero; i<=j; i++) {
01387                 C[j*ldc + i] = C_zero;
01388               }
01389             }
01390             else if (beta!=beta_one) {
01391               for (i=izero; i<=j; i++) {
01392                 C[j*ldc + i] *= beta;
01393               }
01394             }
01395             for (l=izero; l<k; l++) {
01396               if (A[l*lda + j] != A_zero) {
01397                 temp = alpha*A[l*lda + j];
01398                 for (i = izero; i <=j; i++) {
01399                   C[j*ldc + i] += temp*A[l*lda + i];
01400                 }
01401               }
01402             }
01403           }
01404         }
01405         else {
01406           for (j=izero; j<n; j++) {
01407             if (beta==beta_zero) {
01408               for (i=j; i<n; i++) {
01409                 C[j*ldc + i] = C_zero;
01410               }
01411             }
01412             else if (beta!=beta_one) {
01413               for (i=j; i<n; i++) {
01414                 C[j*ldc + i] *= beta;
01415               }
01416             }
01417             for (l=izero; l<k; l++) {
01418               if (A[l*lda + j] != A_zero) {
01419                 temp = alpha*A[l*lda + j];
01420                 for (i=j; i<n; i++) {
01421                   C[j*ldc + i] += temp*A[l*lda + i];
01422                 }
01423               }
01424             }
01425           }
01426         }
01427       }
01428       else {
01429 
01430         // Form C <- alpha*A'*A + beta*C
01431         if (Upper) {
01432           for (j=izero; j<n; j++) {
01433             for (i=izero; i<=j; i++) {
01434               temp = A_zero;
01435               for (l=izero; l<k; l++) {
01436                 temp += A[i*lda + l]*A[j*lda + l];
01437               }
01438               if (beta==beta_zero) {
01439                 C[j*ldc + i] = alpha*temp;
01440               }
01441               else {
01442                 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
01443               }
01444             }
01445           }
01446         }
01447         else {
01448           for (j=izero; j<n; j++) {
01449             for (i=j; i<n; i++) {
01450               temp = A_zero;
01451               for (l=izero; l<k; ++l) {
01452                 temp += A[i*lda + l]*A[j*lda + l];
01453               }
01454               if (beta==beta_zero) {
01455                 C[j*ldc + i] = alpha*temp;
01456               }
01457               else {
01458                 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
01459               }
01460             }  
01461           }
01462         }
01463       }  
01464     } /* if (!BadArgument) */
01465   } /* END SYRK */
01466 
01467   template<typename OrdinalType, typename ScalarType>
01468   template <typename alpha_type, typename A_type>
01469   void DefaultBLASImpl<OrdinalType, ScalarType>::TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type* A, const OrdinalType lda, ScalarType* B, const OrdinalType ldb) const
01470   {
01471     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
01472     OrdinalType ione = OrdinalTraits<OrdinalType>::one();
01473     alpha_type alpha_zero = ScalarTraits<alpha_type>::zero();
01474     A_type A_zero = ScalarTraits<A_type>::zero();
01475     ScalarType B_zero = ScalarTraits<ScalarType>::zero();
01476     ScalarType one = ScalarTraits<ScalarType>::one();
01477     OrdinalType i, j, k, NRowA = m;
01478     ScalarType temp;
01479     bool BadArgument = false;
01480     bool LSide = (ESideChar[side] == 'L');
01481     bool noUnit = (EDiagChar[diag] == 'N');
01482     bool Upper = (EUploChar[uplo] == 'U');
01483     bool noConj = (EUploChar[transa] == 'T');
01484 
01485     if(!LSide) { NRowA = n; }
01486 
01487     // Quick return.
01488     if (n==izero || m==izero) { return; }
01489     if( m < izero ) {
01490       std::cout << "BLAS::TRMM Error: M == "<< m <<std::endl;
01491       BadArgument = true; }
01492     if( n < izero ) {
01493       std::cout << "BLAS::TRMM Error: N == "<< n <<std::endl;
01494       BadArgument = true; }
01495     if( lda < NRowA ) {
01496       std::cout << "BLAS::TRMM Error: LDA < "<<NRowA<<std::endl;
01497       BadArgument = true; }
01498     if( ldb < m ) {
01499       std::cout << "BLAS::TRMM Error: LDB < MAX(1,M)"<<std::endl;
01500       BadArgument = true; }
01501 
01502     if(!BadArgument) {
01503 
01504       // B only needs to be zeroed out.
01505       if( alpha == alpha_zero ) {
01506   for( j=izero; j<n; j++ ) {
01507     for( i=izero; i<m; i++ ) {
01508       B[j*ldb + i] = B_zero;
01509     }
01510   }
01511   return;
01512       }
01513       
01514       //  Start the computations. 
01515       if ( LSide ) {
01516   // A is on the left side of B.
01517   
01518   if ( ETranspChar[transa]=='N' ) {
01519     // Compute B = alpha*A*B
01520 
01521     if ( Upper ) {
01522       // A is upper triangular
01523       for( j=izero; j<n; j++ ) {
01524         for( k=izero; k<m; k++) {
01525     if ( B[j*ldb + k] != B_zero ) {
01526       temp = alpha*B[j*ldb + k];
01527       for( i=izero; i<k; i++ ) {
01528         B[j*ldb + i] += temp*A[k*lda + i];
01529       }
01530       if ( noUnit )
01531         temp *=A[k*lda + k];
01532       B[j*ldb + k] = temp;
01533     }
01534         }
01535       }
01536     } else {
01537       // A is lower triangular
01538       for( j=izero; j<n; j++ ) {
01539         for( k=m-ione; k>-ione; k-- ) {
01540     if( B[j*ldb + k] != B_zero ) {
01541       temp = alpha*B[j*ldb + k];
01542       B[j*ldb + k] = temp;
01543       if ( noUnit )
01544         B[j*ldb + k] *= A[k*lda + k];
01545       for( i=k+ione; i<m; i++ ) {
01546         B[j*ldb + i] += temp*A[k*lda + i];
01547       }
01548     }
01549         }
01550       }
01551     }
01552   } else {
01553     // Compute B = alpha*A'*B or B = alpha*conj(A')*B
01554     if( Upper ) {
01555       for( j=izero; j<n; j++ ) {
01556         for( i=m-ione; i>-ione; i-- ) {
01557     temp = B[j*ldb + i];
01558                 if ( noConj ) {
01559                   if( noUnit )
01560                     temp *= A[i*lda + i];
01561                   for( k=izero; k<i; k++ ) {
01562                     temp += A[i*lda + k]*B[j*ldb + k];
01563                   }
01564                 } else {
01565                   if( noUnit )
01566                     temp *= ScalarTraits<A_type>::conjugate(A[i*lda + i]);
01567                   for( k=izero; k<i; k++ ) {
01568                     temp += ScalarTraits<A_type>::conjugate(A[i*lda + k])*B[j*ldb + k];
01569                   }
01570                 }
01571     B[j*ldb + i] = alpha*temp;
01572         }
01573       }
01574     } else {
01575       for( j=izero; j<n; j++ ) {
01576         for( i=izero; i<m; i++ ) {
01577     temp = B[j*ldb + i];
01578                 if ( noConj ) {
01579                   if( noUnit ) 
01580                     temp *= A[i*lda + i];
01581                   for( k=i+ione; k<m; k++ ) {
01582                     temp += A[i*lda + k]*B[j*ldb + k];
01583                   }
01584                 } else {
01585                   if( noUnit )
01586                     temp *= ScalarTraits<A_type>::conjugate(A[i*lda + i]);
01587                   for( k=i+ione; k<m; k++ ) {
01588                     temp += ScalarTraits<A_type>::conjugate(A[i*lda + k])*B[j*ldb + k];
01589                   }
01590                 }
01591     B[j*ldb + i] = alpha*temp;
01592         }
01593       }
01594     }
01595   }
01596       } else {
01597   // A is on the right hand side of B.
01598   
01599   if( ETranspChar[transa] == 'N' ) {
01600     // Compute B = alpha*B*A
01601 
01602     if( Upper ) {
01603       // A is upper triangular.
01604       for( j=n-ione; j>-ione; j-- ) {
01605         temp = alpha;
01606         if( noUnit )
01607     temp *= A[j*lda + j];
01608         for( i=izero; i<m; i++ ) {
01609     B[j*ldb + i] *= temp;
01610         }
01611         for( k=izero; k<j; k++ ) {
01612     if( A[j*lda + k] != A_zero ) {
01613       temp = alpha*A[j*lda + k];
01614       for( i=izero; i<m; i++ ) {
01615         B[j*ldb + i] += temp*B[k*ldb + i];
01616       }
01617     }
01618         }
01619       }
01620     } else {
01621       // A is lower triangular.
01622       for( j=izero; j<n; j++ ) {
01623         temp = alpha;
01624         if( noUnit )
01625     temp *= A[j*lda + j];
01626         for( i=izero; i<m; i++ ) {
01627     B[j*ldb + i] *= temp;
01628         }
01629         for( k=j+ione; k<n; k++ ) {
01630     if( A[j*lda + k] != A_zero ) {
01631       temp = alpha*A[j*lda + k];
01632       for( i=izero; i<m; i++ ) {
01633         B[j*ldb + i] += temp*B[k*ldb + i];
01634       }
01635     }
01636         }
01637       }
01638     }
01639   } else {
01640     // Compute B = alpha*B*A' or B = alpha*B*conj(A')
01641 
01642     if( Upper ) {
01643       for( k=izero; k<n; k++ ) {
01644         for( j=izero; j<k; j++ ) {
01645     if( A[k*lda + j] != A_zero ) {
01646                   if ( noConj )
01647         temp = alpha*A[k*lda + j];
01648                   else 
01649         temp = alpha*ScalarTraits<A_type>::conjugate(A[k*lda + j]);
01650       for( i=izero; i<m; i++ ) {
01651         B[j*ldb + i] += temp*B[k*ldb + i];
01652       }
01653     }
01654         }
01655         temp = alpha;
01656         if( noUnit ) {
01657                 if ( noConj ) 
01658       temp *= A[k*lda + k];
01659                 else 
01660                   temp *= ScalarTraits<A_type>::conjugate(A[k*lda + k]);
01661               }
01662         if( temp != one ) {
01663     for( i=izero; i<m; i++ ) {
01664       B[k*ldb + i] *= temp;
01665     }
01666         }
01667       }
01668     } else {
01669       for( k=n-ione; k>-ione; k-- ) {
01670         for( j=k+ione; j<n; j++ ) {
01671     if( A[k*lda + j] != A_zero ) {
01672                   if ( noConj )
01673                     temp = alpha*A[k*lda + j];
01674                   else
01675         temp = alpha*ScalarTraits<A_type>::conjugate(A[k*lda + j]);
01676       for( i=izero; i<m; i++ ) {
01677         B[j*ldb + i] += temp*B[k*ldb + i];
01678       }
01679     }
01680         }
01681         temp = alpha;
01682         if( noUnit ) {
01683     if ( noConj ) 
01684                   temp *= A[k*lda + k];
01685                 else
01686       temp *= ScalarTraits<A_type>::conjugate(A[k*lda + k]);
01687               }
01688         if( temp != one ) {
01689     for( i=izero; i<m; i++ ) {
01690       B[k*ldb + i] *= temp;
01691     }
01692         }
01693       }
01694     }
01695   } // end if( ETranspChar[transa] == 'N' ) ...
01696       } // end if ( LSide ) ...
01697     } // end if (!BadArgument)
01698   } // end TRMM
01699   
01700   template<typename OrdinalType, typename ScalarType>
01701   template <typename alpha_type, typename A_type>
01702   void DefaultBLASImpl<OrdinalType, ScalarType>::TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type* A, const OrdinalType lda, ScalarType* B, const OrdinalType ldb) const
01703   {
01704     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
01705     OrdinalType ione = OrdinalTraits<OrdinalType>::one();
01706     alpha_type alpha_zero = ScalarTraits<alpha_type>::zero();
01707     A_type A_zero = ScalarTraits<A_type>::zero();
01708     ScalarType B_zero = ScalarTraits<ScalarType>::zero();
01709     alpha_type alpha_one = ScalarTraits<alpha_type>::one();
01710     ScalarType B_one = ScalarTraits<ScalarType>::one();
01711     ScalarType temp;
01712     OrdinalType NRowA = m;
01713     bool BadArgument = false;
01714     bool noUnit = (EDiagChar[diag]=='N');
01715     bool noConj = (EUploChar[transa] == 'T');
01716     
01717     if (!(ESideChar[side] == 'L')) { NRowA = n; }
01718 
01719     // Quick return.
01720     if (n == izero || m == izero) { return; }
01721     if( m < izero ) {
01722       std::cout << "BLAS::TRSM Error: M == "<<m<<std::endl;
01723       BadArgument = true; }
01724     if( n < izero ) {
01725       std::cout << "BLAS::TRSM Error: N == "<<n<<std::endl;
01726       BadArgument = true; }
01727     if( lda < NRowA ) {
01728       std::cout << "BLAS::TRSM Error: LDA < "<<NRowA<<std::endl;
01729       BadArgument = true; }
01730     if( ldb < m ) {
01731       std::cout << "BLAS::TRSM Error: LDB < MAX(1,M)"<<std::endl;
01732       BadArgument = true; }
01733 
01734     if(!BadArgument)
01735       {
01736   int i, j, k;
01737   // Set the solution to the zero std::vector.
01738   if(alpha == alpha_zero) {
01739       for(j = izero; j < n; j++) {
01740         for( i = izero; i < m; i++) {
01741         B[j*ldb+i] = B_zero;
01742           }
01743       }
01744   }
01745   else 
01746   { // Start the operations.
01747       if(ESideChar[side] == 'L') {
01748     //
01749         // Perform computations for OP(A)*X = alpha*B     
01750     //
01751     if(ETranspChar[transa] == 'N') {
01752         //
01753         //  Compute B = alpha*inv( A )*B
01754         //
01755         if(EUploChar[uplo] == 'U') { 
01756       // A is upper triangular.
01757       for(j = izero; j < n; j++) {
01758               // Perform alpha*B if alpha is not 1.
01759         if(alpha != alpha_one) {
01760                 for( i = izero; i < m; i++) {
01761                 B[j*ldb+i] *= alpha;
01762             }
01763           }
01764           // Perform a backsolve for column j of B.
01765           for(k = (m - ione); k > -ione; k--) {
01766         // If this entry is zero, we don't have to do anything.
01767         if (B[j*ldb + k] != B_zero) {
01768             if ( noUnit ) {
01769           B[j*ldb + k] /= A[k*lda + k];
01770             }
01771             for(i = izero; i < k; i++) {
01772           B[j*ldb + i] -= B[j*ldb + k] * A[k*lda + i];
01773             }
01774         }
01775           }
01776       }
01777         }
01778         else 
01779         { // A is lower triangular.
01780                         for(j = izero; j < n; j++) {
01781                             // Perform alpha*B if alpha is not 1.
01782                             if(alpha != alpha_one) {
01783                                 for( i = izero; i < m; i++) {
01784                                     B[j*ldb+i] *= alpha;
01785                                 }
01786                             }
01787                             // Perform a forward solve for column j of B.
01788                             for(k = izero; k < m; k++) {
01789                                 // If this entry is zero, we don't have to do anything.
01790                                 if (B[j*ldb + k] != B_zero) {   
01791                                     if ( noUnit ) {
01792                                         B[j*ldb + k] /= A[k*lda + k];
01793                                     }
01794                                     for(i = k+ione; i < m; i++) {
01795                                         B[j*ldb + i] -= B[j*ldb + k] * A[k*lda + i];
01796                                     }
01797                                 }
01798                             }
01799                         }
01800         } // end if (uplo == 'U')
01801     }  // if (transa =='N') 
01802         else { 
01803         //
01804         //  Compute B = alpha*inv( A' )*B 
01805                     //  or      B = alpha*inv( conj(A') )*B
01806         //
01807         if(EUploChar[uplo] == 'U') { 
01808       // A is upper triangular.
01809       for(j = izero; j < n; j++) {
01810                   for( i = izero; i < m; i++) {
01811             temp = alpha*B[j*ldb+i];
01812                                 if ( noConj ) {
01813                                   for(k = izero; k < i; k++) {
01814               temp -= A[i*lda + k] * B[j*ldb + k];
01815           }
01816           if ( noUnit ) {
01817               temp /= A[i*lda + i];
01818           }
01819                                 } else {
01820                                   for(k = izero; k < i; k++) {
01821                                       temp -= ScalarTraits<A_type>::conjugate(A[i*lda + k]) 
01822                                             * B[j*ldb + k];
01823                                   }
01824                                   if ( noUnit ) {
01825                                       temp /= ScalarTraits<A_type>::conjugate(A[i*lda + i]);
01826                                   }
01827                                 }
01828         B[j*ldb + i] = temp;
01829           }
01830       }
01831         }
01832         else
01833         { // A is lower triangular.
01834                         for(j = izero; j < n; j++) {
01835                             for(i = (m - ione) ; i > -ione; i--) {
01836                                 temp = alpha*B[j*ldb+i];
01837                                 if ( noConj ) {
01838                                 for(k = i+ione; k < m; k++) {
01839             temp -= A[i*lda + k] * B[j*ldb + k];
01840           }
01841           if ( noUnit ) {
01842             temp /= A[i*lda + i];
01843           }
01844                                 } else {
01845                                   for(k = i+ione; k < m; k++) {
01846                                     temp -= ScalarTraits<A_type>::conjugate(A[i*lda + k]) 
01847                                           * B[j*ldb + k];
01848                                   }
01849                                   if ( noUnit ) {
01850                                     temp /= ScalarTraits<A_type>::conjugate(A[i*lda + i]);
01851                                   }
01852                                 }
01853         B[j*ldb + i] = temp;
01854                             }
01855                         }
01856         }
01857     }
01858       }  // if (side == 'L')
01859       else { 
01860          // side == 'R'
01861          //
01862          // Perform computations for X*OP(A) = alpha*B      
01863          //
01864         if (ETranspChar[transa] == 'N') {
01865         //
01866         //  Compute B = alpha*B*inv( A )
01867         //
01868         if(EUploChar[uplo] == 'U') { 
01869       // A is upper triangular.
01870           // Perform a backsolve for column j of B.
01871       for(j = izero; j < n; j++) {
01872               // Perform alpha*B if alpha is not 1.
01873               if(alpha != alpha_one) {
01874                 for( i = izero; i < m; i++) {
01875                 B[j*ldb+i] *= alpha;
01876             }
01877           }
01878           for(k = izero; k < j; k++) {
01879         // If this entry is zero, we don't have to do anything.
01880         if (A[j*lda + k] != A_zero) {
01881             for(i = izero; i < m; i++) {
01882           B[j*ldb + i] -= A[j*lda + k] * B[k*ldb + i];
01883             }
01884         }
01885           }
01886           if ( noUnit ) {
01887         temp = B_one/A[j*lda + j];
01888         for(i = izero; i < m; i++) {
01889             B[j*ldb + i] *= temp;
01890         }
01891           }
01892       }
01893         }
01894         else 
01895         { // A is lower triangular.
01896                         for(j = (n - ione); j > -ione; j--) {
01897                             // Perform alpha*B if alpha is not 1.
01898                             if(alpha != alpha_one) {
01899                                 for( i = izero; i < m; i++) {
01900                                     B[j*ldb+i] *= alpha;
01901                                 }
01902                             }
01903                             // Perform a forward solve for column j of B.
01904                             for(k = j+ione; k < n; k++) {
01905                                 // If this entry is zero, we don't have to do anything.
01906         if (A[j*lda + k] != A_zero) {
01907             for(i = izero; i < m; i++) {
01908                                         B[j*ldb + i] -= A[j*lda + k] * B[k*ldb + i]; 
01909                                     }
01910                                 } 
01911                             }
01912           if ( noUnit ) {
01913         temp = B_one/A[j*lda + j];
01914         for(i = izero; i < m; i++) {
01915             B[j*ldb + i] *= temp;
01916         }
01917           }     
01918                         }
01919         } // end if (uplo == 'U')
01920     }  // if (transa =='N') 
01921         else { 
01922         //
01923         //  Compute B = alpha*B*inv( A' )
01924                     //  or      B = alpha*B*inv( conj(A') )
01925         //
01926         if(EUploChar[uplo] == 'U') { 
01927       // A is upper triangular.
01928       for(k = (n - ione); k > -ione; k--) {
01929           if ( noUnit ) {
01930                                 if ( noConj )
01931           temp = B_one/A[k*lda + k];
01932                                 else
01933           temp = B_one/ScalarTraits<A_type>::conjugate(A[k*lda + k]);
01934                     for(i = izero; i < m; i++) {
01935                 B[k*ldb + i] *= temp;
01936         }
01937           }
01938           for(j = izero; j < k; j++) {
01939         if (A[k*lda + j] != A_zero) {
01940                                     if ( noConj ) 
01941               temp = A[k*lda + j];
01942                                     else
01943               temp = ScalarTraits<A_type>::conjugate(A[k*lda + j]);
01944             for(i = izero; i < m; i++) {
01945           B[j*ldb + i] -= temp*B[k*ldb + i];
01946             }
01947         }
01948           }
01949           if (alpha != alpha_one) {
01950         for (i = izero; i < m; i++) {
01951             B[k*ldb + i] *= alpha;
01952         }
01953           }
01954       }
01955         }
01956         else
01957         { // A is lower triangular.
01958       for(k = izero; k < n; k++) {
01959           if ( noUnit ) {
01960                                 if ( noConj )
01961           temp = B_one/A[k*lda + k];
01962                                 else
01963           temp = B_one/ScalarTraits<A_type>::conjugate(A[k*lda + k]);
01964         for (i = izero; i < m; i++) {
01965             B[k*ldb + i] *= temp;
01966         }
01967           }
01968           for(j = k+ione; j < n; j++) {
01969         if(A[k*lda + j] != A_zero) {
01970                                     if ( noConj )
01971               temp = A[k*lda + j];
01972                                     else
01973               temp = ScalarTraits<A_type>::conjugate(A[k*lda + j]);
01974             for(i = izero; i < m; i++) {
01975           B[j*ldb + i] -= temp*B[k*ldb + i];
01976             }
01977         }
01978           }
01979           if (alpha != alpha_one) {
01980         for (i = izero; i < m; i++) {
01981             B[k*ldb + i] *= alpha;
01982         }
01983           }
01984                         }
01985         }
01986     }   
01987       }
01988   }
01989     }
01990   }
01991 
01992   // Explicit instantiation for template<int,float>
01993 
01994   template <>
01995   class TEUCHOS_LIB_DLL_EXPORT BLAS<int, float>
01996   {    
01997   public:
01998     inline BLAS(void) {}
01999     inline BLAS(const BLAS<int, float>& /*BLAS_source*/) {}
02000     inline virtual ~BLAS(void) {}
02001     void ROTG(float* da, float* db, float* c, float* s) const;
02002     void ROT(const int n, float* dx, const int incx, float* dy, const int incy, float* c, float* s) const;
02003     float ASUM(const int n, const float* x, const int incx) const;
02004     void AXPY(const int n, const float alpha, const float* x, const int incx, float* y, const int incy) const;
02005     void COPY(const int n, const float* x, const int incx, float* y, const int incy) const;
02006     float DOT(const int n, const float* x, const int incx, const float* y, const int incy) const;
02007     float NRM2(const int n, const float* x, const int incx) const;
02008     void SCAL(const int n, const float alpha, float* x, const int incx) const;
02009     int IAMAX(const int n, const float* x, const int incx) const;
02010     void GEMV(ETransp trans, const int m, const int n, const float alpha, const float* A, const int lda, const float* x, const int incx, const float beta, float* y, const int incy) const;
02011     void TRMV(EUplo uplo, ETransp trans, EDiag diag, const int n, const float* A, const int lda, float* x, const int incx) const;
02012     void GER(const int m, const int n, const float alpha, const float* x, const int incx, const float* y, const int incy, float* A, const int lda) const;
02013     void GEMM(ETransp transa, ETransp transb, const int m, const int n, const int k, const float alpha, const float* A, const int lda, const float* B, const int ldb, const float beta, float* C, const int ldc) const;
02014     void SYMM(ESide side, EUplo uplo, const int m, const int n, const float alpha, const float* A, const int lda, const float *B, const int ldb, const float beta, float *C, const int ldc) const;
02015     void SYRK(EUplo uplo, ETransp trans, const int n, const int k, const float alpha, const float* A, const int lda, const float beta, float* C, const int ldc) const;
02016     void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const float alpha, const float* A, const int lda, float* B, const int ldb) const;
02017     void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const float alpha, const float* A, const int lda, float* B, const int ldb) const;
02018   };
02019 
02020   // Explicit instantiation for template<int,double>
02021 
02022   template<>
02023   class TEUCHOS_LIB_DLL_EXPORT BLAS<int, double>
02024   {    
02025   public:
02026     inline BLAS(void) {}
02027     inline BLAS(const BLAS<int, double>& /*BLAS_source*/) {}
02028     inline virtual ~BLAS(void) {}
02029     void ROTG(double* da, double* db, double* c, double* s) const;
02030     void ROT(const int n, double* dx, const int incx, double* dy, const int incy, double* c, double* s) const;
02031     double ASUM(const int n, const double* x, const int incx) const;
02032     void AXPY(const int n, const double alpha, const double* x, const int incx, double* y, const int incy) const;
02033     void COPY(const int n, const double* x, const int incx, double* y, const int incy) const;
02034     double DOT(const int n, const double* x, const int incx, const double* y, const int incy) const;
02035     double NRM2(const int n, const double* x, const int incx) const;
02036     void SCAL(const int n, const double alpha, double* x, const int incx) const;
02037     int IAMAX(const int n, const double* x, const int incx) const;
02038     void GEMV(ETransp trans, const int m, const int n, const double alpha, const double* A, const int lda, const double* x, const int incx, const double beta, double* y, const int incy) const;
02039     void TRMV(EUplo uplo, ETransp trans, EDiag diag, const int n, const double* A, const int lda, double* x, const int incx) const;
02040     void GER(const int m, const int n, const double alpha, const double* x, const int incx, const double* y, const int incy, double* A, const int lda) const;
02041     void GEMM(ETransp transa, ETransp transb, const int m, const int n, const int k, const double alpha, const double* A, const int lda, const double* B, const int ldb, const double beta, double* C, const int ldc) const;
02042     void SYMM(ESide side, EUplo uplo, const int m, const int n, const double alpha, const double* A, const int lda, const double *B, const int ldb, const double beta, double *C, const int ldc) const;
02043     void SYRK(EUplo uplo, ETransp trans, const int n, const int k, const double alpha, const double* A, const int lda, const double beta, double* C, const int ldc) const;
02044     void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const double alpha, const double* A, const int lda, double* B, const int ldb) const;
02045     void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const double alpha, const double* A, const int lda, double* B, const int ldb) const;
02046   };
02047 
02048   // Explicit instantiation for template<int,complex<float> >
02049 
02050   template<>
02051   class TEUCHOS_LIB_DLL_EXPORT BLAS<int, std::complex<float> >
02052   {    
02053   public:
02054     inline BLAS(void) {}
02055     inline BLAS(const BLAS<int, std::complex<float> >& /*BLAS_source*/) {}
02056     inline virtual ~BLAS(void) {}
02057     void ROTG(std::complex<float>* da, std::complex<float>* db, float* c, std::complex<float>* s) const;
02058     void ROT(const int n, std::complex<float>* dx, const int incx, std::complex<float>* dy, const int incy, float* c, std::complex<float>* s) const;
02059     float ASUM(const int n, const std::complex<float>* x, const int incx) const;
02060     void AXPY(const int n, const std::complex<float> alpha, const std::complex<float>* x, const int incx, std::complex<float>* y, const int incy) const;
02061     void COPY(const int n, const std::complex<float>* x, const int incx, std::complex<float>* y, const int incy) const;
02062     std::complex<float> DOT(const int n, const std::complex<float>* x, const int incx, const std::complex<float>* y, const int incy) const;
02063     float NRM2(const int n, const std::complex<float>* x, const int incx) const;
02064     void SCAL(const int n, const std::complex<float> alpha, std::complex<float>* x, const int incx) const;
02065     int IAMAX(const int n, const std::complex<float>* x, const int incx) const;
02066     void GEMV(ETransp trans, const int m, const int n, const std::complex<float> alpha, const std::complex<float>* A, const int lda, const std::complex<float>* x, const int incx, const std::complex<float> beta, std::complex<float>* y, const int incy) const;
02067     void TRMV(EUplo uplo, ETransp trans, EDiag diag, const int n, const std::complex<float>* A, const int lda, std::complex<float>* x, const int incx) const;
02068     void GER(const int m, const int n, const std::complex<float> alpha, const std::complex<float>* x, const int incx, const std::complex<float>* y, const int incy, std::complex<float>* A, const int lda) const;
02069     void GEMM(ETransp transa, ETransp transb, const int m, const int n, const int k, const std::complex<float> alpha, const std::complex<float>* A, const int lda, const std::complex<float>* B, const int ldb, const std::complex<float> beta, std::complex<float>* C, const int ldc) const;
02070     void SYMM(ESide side, EUplo uplo, const int m, const int n, const std::complex<float> alpha, const std::complex<float>* A, const int lda, const std::complex<float> *B, const int ldb, const std::complex<float> beta, std::complex<float> *C, const int ldc) const;
02071     void SYRK(EUplo uplo, ETransp trans, const int n, const int k, const std::complex<float> alpha, const std::complex<float>* A, const int lda, const std::complex<float> beta, std::complex<float>* C, const int ldc) const;
02072     void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const std::complex<float> alpha, const std::complex<float>* A, const int lda, std::complex<float>* B, const int ldb) const;
02073     void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const std::complex<float> alpha, const std::complex<float>* A, const int lda, std::complex<float>* B, const int ldb) const;
02074   };
02075 
02076   // Explicit instantiation for template<int,complex<double> >
02077 
02078   template<>
02079   class TEUCHOS_LIB_DLL_EXPORT BLAS<int, std::complex<double> >
02080   {    
02081   public:
02082     inline BLAS(void) {}
02083     inline BLAS(const BLAS<int, std::complex<double> >& /*BLAS_source*/) {}
02084     inline virtual ~BLAS(void) {}
02085     void ROTG(std::complex<double>* da, std::complex<double>* db, double* c, std::complex<double>* s) const;
02086     void ROT(const int n, std::complex<double>* dx, const int incx, std::complex<double>* dy, const int incy, double* c, std::complex<double>* s) const;
02087     double ASUM(const int n, const std::complex<double>* x, const int incx) const;
02088     void AXPY(const int n, const std::complex<double> alpha, const std::complex<double>* x, const int incx, std::complex<double>* y, const int incy) const;
02089     void COPY(const int n, const std::complex<double>* x, const int incx, std::complex<double>* y, const int incy) const;
02090     std::complex<double> DOT(const int n, const std::complex<double>* x, const int incx, const std::complex<double>* y, const int incy) const;
02091     double NRM2(const int n, const std::complex<double>* x, const int incx) const;
02092     void SCAL(const int n, const std::complex<double> alpha, std::complex<double>* x, const int incx) const;
02093     int IAMAX(const int n, const std::complex<double>* x, const int incx) const;
02094     void GEMV(ETransp trans, const int m, const int n, const std::complex<double> alpha, const std::complex<double>* A, const int lda, const std::complex<double>* x, const int incx, const std::complex<double> beta, std::complex<double>* y, const int incy) const;
02095     void TRMV(EUplo uplo, ETransp trans, EDiag diag, const int n, const std::complex<double>* A, const int lda, std::complex<double>* x, const int incx) const;
02096     void GER(const int m, const int n, const std::complex<double> alpha, const std::complex<double>* x, const int incx, const std::complex<double>* y, const int incy, std::complex<double>* A, const int lda) const;
02097     void GEMM(ETransp transa, ETransp transb, const int m, const int n, const int k, const std::complex<double> alpha, const std::complex<double>* A, const int lda, const std::complex<double>* B, const int ldb, const std::complex<double> beta, std::complex<double>* C, const int ldc) const;
02098     void SYMM(ESide side, EUplo uplo, const int m, const int n, const std::complex<double> alpha, const std::complex<double>* A, const int lda, const std::complex<double> *B, const int ldb, const std::complex<double> beta, std::complex<double> *C, const int ldc) const;
02099     void SYRK(EUplo uplo, ETransp trans, const int n, const int k, const std::complex<double> alpha, const std::complex<double>* A, const int lda, const std::complex<double> beta, std::complex<double>* C, const int ldc) const;
02100     void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const std::complex<double> alpha, const std::complex<double>* A, const int lda, std::complex<double>* B, const int ldb) const;
02101     void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const std::complex<double> alpha, const std::complex<double>* A, const int lda, std::complex<double>* B, const int ldb) const;
02102   };
02103 
02104 } // namespace Teuchos
02105 
02106 #endif // _TEUCHOS_BLAS_HPP_
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines