Teuchos - Trilinos Tools Package Version of the Day
Teuchos_BLAS.hpp
Go to the documentation of this file.
00001 // @HEADER
00002 // ***********************************************************************
00003 //
00004 //                    Teuchos: Common Tools Package
00005 //                 Copyright (2004) Sandia Corporation
00006 //
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 //
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00038 //
00039 // ***********************************************************************
00040 // @HEADER
00041 
00042 // Kris
00043 // 06.16.03 -- Start over from scratch
00044 // 06.16.03 -- Initial templatization (Tpetra_BLAS.cpp is no longer needed)
00045 // 06.18.03 -- Changed xxxxx_() function calls to XXXXX_F77()
00046 //          -- Added warning messages for generic calls
00047 // 07.08.03 -- Move into Teuchos package/namespace
00048 // 07.24.03 -- The first iteration of BLAS generics is nearing completion. Caveats:
00049 //             * TRSM isn't finished yet; it works for one case at the moment (left side, upper tri., no transpose, no unit diag.)
00050 //             * Many of the generic implementations are quite inefficient, ugly, or both. I wrote these to be easy to debug, not for efficiency or legibility. The next iteration will improve both of these aspects as much as possible.
00051 //             * Very little verification of input parameters is done, save for the character-type arguments (TRANS, etc.) which is quite robust.
00052 //             * All of the routines that make use of both an incx and incy parameter (which includes much of the L1 BLAS) are set up to work iff incx == incy && incx > 0. Allowing for differing/negative values of incx/incy should be relatively trivial.
00053 //             * All of the L2/L3 routines assume that the entire matrix is being used (that is, if A is mxn, lda = m); they don't work on submatrices yet. This *should* be a reasonably trivial thing to fix, as well.
00054 //          -- Removed warning messages for generic calls
00055 // 08.08.03 -- TRSM now works for all cases where SIDE == L and DIAG == N. DIAG == U is implemented but does not work correctly; SIDE == R is not yet implemented.
00056 // 08.14.03 -- TRSM now works for all cases and accepts (and uses) leading-dimension information.
00057 // 09.26.03 -- character input replaced with enumerated input to cause compiling errors and not run-time errors ( suggested by RAB ).
00058 
00059 #ifndef _TEUCHOS_BLAS_HPP_
00060 #define _TEUCHOS_BLAS_HPP_
00061 
00069 /* for INTEL_CXML, the second arg may need to be changed to 'one'.  If so
00070 the appropriate declaration of one will need to be added back into
00071 functions that include the macro:
00072 */
00073 #if defined (INTEL_CXML)
00074         unsigned int one=1;
00075 #endif
00076 
00077 #ifdef CHAR_MACRO
00078 #undef CHAR_MACRO
00079 #endif
00080 #if defined (INTEL_CXML)
00081 #define CHAR_MACRO(char_var) &char_var, one
00082 #else
00083 #define CHAR_MACRO(char_var) &char_var
00084 #endif
00085 
00086 #include "Teuchos_ConfigDefs.hpp"
00087 #include "Teuchos_ScalarTraits.hpp"
00088 #include "Teuchos_OrdinalTraits.hpp"
00089 #include "Teuchos_BLAS_types.hpp"
00090 #include "Teuchos_Assert.hpp"
00091 
00124 namespace Teuchos
00125 {
00126   extern TEUCHOS_LIB_DLL_EXPORT const char ESideChar[];
00127   extern TEUCHOS_LIB_DLL_EXPORT const char ETranspChar[];
00128   extern TEUCHOS_LIB_DLL_EXPORT const char EUploChar[];
00129   extern TEUCHOS_LIB_DLL_EXPORT const char EDiagChar[];
00130 
00132 
00137   template<typename OrdinalType, typename ScalarType>
00138   class DefaultBLASImpl
00139   {
00140 
00141     typedef typename Teuchos::ScalarTraits<ScalarType>::magnitudeType MagnitudeType;
00142 
00143   public:
00145 
00146 
00148     inline DefaultBLASImpl(void) {}
00149 
00151     inline DefaultBLASImpl(const DefaultBLASImpl<OrdinalType, ScalarType>& /*BLAS_source*/) {}
00152 
00154     inline virtual ~DefaultBLASImpl(void) {}
00156 
00158 
00159 
00161     void ROTG(ScalarType* da, ScalarType* db, MagnitudeType* c, ScalarType* s) const;
00162 
00164     void ROT(const OrdinalType n, ScalarType* dx, const OrdinalType incx, ScalarType* dy, const OrdinalType incy, MagnitudeType* c, ScalarType* s) const;
00165 
00167     void SCAL(const OrdinalType n, const ScalarType alpha, ScalarType* x, const OrdinalType incx) const;
00168 
00170     void COPY(const OrdinalType n, const ScalarType* x, const OrdinalType incx, ScalarType* y, const OrdinalType incy) const;
00171 
00173     template <typename alpha_type, typename x_type>
00174     void AXPY(const OrdinalType n, const alpha_type alpha, const x_type* x, const OrdinalType incx, ScalarType* y, const OrdinalType incy) const;
00175 
00177     typename ScalarTraits<ScalarType>::magnitudeType ASUM(const OrdinalType n, const ScalarType* x, const OrdinalType incx) const;
00178 
00180     template <typename x_type, typename y_type>
00181     ScalarType DOT(const OrdinalType n, const x_type* x, const OrdinalType incx, const y_type* y, const OrdinalType incy) const;
00182 
00184     typename ScalarTraits<ScalarType>::magnitudeType NRM2(const OrdinalType n, const ScalarType* x, const OrdinalType incx) const;
00185 
00187     OrdinalType IAMAX(const OrdinalType n, const ScalarType* x, const OrdinalType incx) const;
00189 
00191 
00192 
00194     template <typename alpha_type, typename A_type, typename x_type, typename beta_type>
00195     void GEMV(ETransp trans, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type* A,
00196               const OrdinalType lda, const x_type* x, const OrdinalType incx, const beta_type beta, ScalarType* y, const OrdinalType incy) const;
00197 
00199     template <typename A_type>
00200     void TRMV(EUplo uplo, ETransp trans, EDiag diag, const OrdinalType n, const A_type* A,
00201               const OrdinalType lda, ScalarType* x, const OrdinalType incx) const;
00202 
00205     template <typename alpha_type, typename x_type, typename y_type>
00206     void GER(const OrdinalType m, const OrdinalType n, const alpha_type alpha, const x_type* x, const OrdinalType incx,
00207              const y_type* y, const OrdinalType incy, ScalarType* A, const OrdinalType lda) const;
00209 
00211 
00212 
00219     template <typename alpha_type, typename A_type, typename B_type, typename beta_type>
00220     void GEMM(ETransp transa, ETransp transb, const OrdinalType m, const OrdinalType n, const OrdinalType k, const alpha_type alpha, const A_type* A, const OrdinalType lda, const B_type* B, const OrdinalType ldb, const beta_type beta, ScalarType* C, const OrdinalType ldc) const;
00221 
00223     template <typename alpha_type, typename A_type, typename B_type, typename beta_type>
00224     void SYMM(ESide side, EUplo uplo, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type* A, const OrdinalType lda, const B_type* B, const OrdinalType ldb, const beta_type beta, ScalarType* C, const OrdinalType ldc) const;
00225 
00227     template <typename alpha_type, typename A_type, typename beta_type>
00228     void SYRK(EUplo uplo, ETransp trans, const OrdinalType n, const OrdinalType k, const alpha_type alpha, const A_type* A, const OrdinalType lda, const beta_type beta, ScalarType* C, const OrdinalType ldc) const;
00229 
00231     template <typename alpha_type, typename A_type>
00232     void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n,
00233                 const alpha_type alpha, const A_type* A, const OrdinalType lda, ScalarType* B, const OrdinalType ldb) const;
00234 
00236     template <typename alpha_type, typename A_type>
00237     void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n,
00238                 const alpha_type alpha, const A_type* A, const OrdinalType lda, ScalarType* B, const OrdinalType ldb) const;
00240   };
00241 
00242   template<typename OrdinalType, typename ScalarType>
00243   class TEUCHOS_LIB_DLL_EXPORT BLAS : public DefaultBLASImpl<OrdinalType,ScalarType>
00244   {
00245 
00246     typedef typename Teuchos::ScalarTraits<ScalarType>::magnitudeType MagnitudeType;
00247 
00248   public:
00250 
00251 
00253     inline BLAS(void) {}
00254 
00256     inline BLAS(const BLAS<OrdinalType, ScalarType>& /*BLAS_source*/) {}
00257 
00259     inline virtual ~BLAS(void) {}
00261   };
00262 
00263 //------------------------------------------------------------------------------------------
00264 //      LEVEL 1 BLAS ROUTINES
00265 //------------------------------------------------------------------------------------------
00266 
00274   namespace details {
00275 
00276     // Compute magnitude.
00277     template<typename ScalarType, bool isComplex>
00278     class MagValue {
00279     public:
00280       void
00281       blas_dabs1(const ScalarType* a, typename ScalarTraits<ScalarType>::magnitudeType* ret) const; 
00282     };
00283 
00284     // Complex-arithmetic specialization. 
00285     template<typename ScalarType>
00286     class MagValue<ScalarType, true> {
00287     public:
00288       void
00289       blas_dabs1(const ScalarType* a, typename ScalarTraits<ScalarType>::magnitudeType* ret) const; 
00290     };
00291 
00292     // Real-arithmetic specialization.
00293     template<typename ScalarType>
00294     class MagValue<ScalarType, false> {
00295     public:
00296       void
00297       blas_dabs1(const ScalarType* a, ScalarType* ret) const;
00298     };
00299  
00300     template<typename ScalarType, bool isComplex>
00301     class GivensRotator {
00302     public:
00303       void
00304       ROTG (ScalarType* a,
00305             ScalarType* b,
00306             typename ScalarTraits<ScalarType>::magnitudeType* c,
00307             ScalarType* s) const;
00308     };
00309 
00310     // Complex-arithmetic specialization.
00311     template<typename ScalarType>
00312     class GivensRotator<ScalarType, true> {
00313     public:
00314       void
00315       ROTG (ScalarType* ca,
00316             ScalarType* cb,
00317             typename ScalarTraits<ScalarType>::magnitudeType* c,
00318             ScalarType* s) const;
00319     };
00320 
00321     // Real-arithmetic specialization.
00322     template<typename ScalarType>
00323     class GivensRotator<ScalarType, false> {
00324     public:
00325       void
00326       ROTG (ScalarType* da,
00327             ScalarType* db,
00328             ScalarType* c,
00329             ScalarType* s) const;
00330     private:
00343       ScalarType SIGN (ScalarType x, ScalarType y) const {
00344         typedef ScalarTraits<ScalarType> STS;
00345 
00346         if (y > STS::zero()) {
00347           return STS::magnitude (x);
00348         } else if (y < STS::zero()) {
00349           return -STS::magnitude (x);
00350         } else { // y == STS::zero()
00351           // Suppose that ScalarType implements signed zero, as IEEE
00352           // 754 - compliant floating-point numbers should.  You can't
00353           // use == to test for signed zero, since +0 == -0.  However,
00354           // 1/0 = Inf > 0 and 1/-0 = -Inf < 0.  Let's hope ScalarType
00355           // supports Inf... we don't need to test for Inf, just see
00356           // if it's greater than or less than zero.
00357           //
00358           // NOTE: This ONLY works if ScalarType is real.  Complex
00359           // infinity doesn't have a sign, so we can't compare it with
00360           // zero.  That's OK, because finite complex numbers don't
00361           // have a sign either; they have an angle.
00362           ScalarType signedInfinity = STS::one() / y;
00363           if (signedInfinity > STS::zero()) {
00364             return STS::magnitude (x);
00365           } else {
00366             // Even if ScalarType doesn't implement signed zero,
00367             // Fortran's SIGN intrinsic returns -ABS(X) if the second
00368             // argument Y is zero.  We imitate this behavior here.
00369             return -STS::magnitude (x);
00370           }
00371         }
00372       }
00373     };
00374 
00375     // Implementation of complex-arithmetic specialization.
00376     template<typename ScalarType>
00377     void
00378     GivensRotator<ScalarType, true>::
00379     ROTG (ScalarType* ca,
00380           ScalarType* cb,
00381           typename ScalarTraits<ScalarType>::magnitudeType* c,
00382           ScalarType* s) const
00383     {
00384       typedef ScalarTraits<ScalarType> STS;
00385       typedef typename STS::magnitudeType MagnitudeType;
00386       typedef ScalarTraits<MagnitudeType> STM;
00387 
00388       // This is a straightforward translation into C++ of the
00389       // reference BLAS' implementation of ZROTG.  You can get
00390       // the Fortran 77 source code of ZROTG here:
00391       //
00392       // http://www.netlib.org/blas/zrotg.f
00393       //
00394       // I used the following rules to translate Fortran types and
00395       // intrinsic functions into C++:
00396       //
00397       // DOUBLE PRECISION -> MagnitudeType
00398       // DOUBLE COMPLEX -> ScalarType
00399       // CDABS -> STS::magnitude
00400       // DCMPLX -> ScalarType constructor (assuming that ScalarType
00401       //   is std::complex<MagnitudeType>)
00402       // DCONJG -> STS::conjugate
00403       // DSQRT -> STM::squareroot
00404       ScalarType alpha;
00405       MagnitudeType norm, scale;
00406 
00407       if (STS::magnitude (*ca) == STM::zero()) {
00408         *c = STM::zero();
00409         *s = STS::one();
00410         *ca = *cb;
00411       } else {
00412         scale = STS::magnitude (*ca) + STS::magnitude (*cb);
00413         { // I introduced temporaries into the translated BLAS code in
00414           // order to make the expression easier to read and also save a
00415           // few floating-point operations.
00416           const MagnitudeType ca_scaled =
00417             STS::magnitude (*ca / ScalarType(scale, STM::zero()));
00418           const MagnitudeType cb_scaled =
00419             STS::magnitude (*cb / ScalarType(scale, STM::zero()));
00420           norm = scale *
00421             STM::squareroot (ca_scaled*ca_scaled + cb_scaled*cb_scaled);
00422         }
00423         alpha = *ca / STS::magnitude (*ca);
00424         *c = STS::magnitude (*ca) / norm;
00425         *s = alpha * STS::conjugate (*cb) / norm;
00426         *ca = alpha * norm;
00427       }
00428     }
00429 
00430     // Implementation of real-arithmetic specialization.
00431     template<typename ScalarType>
00432     void
00433     GivensRotator<ScalarType, false>::
00434     ROTG (ScalarType* da,
00435           ScalarType* db,
00436           ScalarType* c,
00437           ScalarType* s) const
00438     {
00439       typedef ScalarTraits<ScalarType> STS;
00440 
00441       // This is a straightforward translation into C++ of the
00442       // reference BLAS' implementation of DROTG.  You can get
00443       // the Fortran 77 source code of DROTG here:
00444       //
00445       // http://www.netlib.org/blas/drotg.f
00446       //
00447       // I used the following rules to translate Fortran types and
00448       // intrinsic functions into C++:
00449       //
00450       // DOUBLE PRECISION -> ScalarType
00451       // DABS -> STS::magnitude
00452       // DSQRT -> STM::squareroot
00453       // DSIGN -> SIGN (see below)
00454       //
00455       // DSIGN(x,y) (the old DOUBLE PRECISION type-specific form of
00456       // the Fortran type-generic SIGN intrinsic) required special
00457       // translation, which we did in a separate utility function in
00458       // the specializaton of GivensRotator for real arithmetic.
00459       // (ROTG for complex arithmetic doesn't require this function.)
00460       // C99 provides a copysign() math library function, but we are
00461       // not able to rely on the existence of C99 functions here.
00462       ScalarType r, roe, scale, z;
00463 
00464       roe = *db;
00465       if (STS::magnitude (*da) > STS::magnitude (*db)) {
00466         roe = *da;
00467       }
00468       scale = STS::magnitude (*da) + STS::magnitude (*db);
00469       if (scale == STS::zero()) {
00470         *c = STS::one();
00471         *s = STS::zero();
00472         r = STS::zero();
00473         z = STS::zero();
00474       } else {
00475         // I introduced temporaries into the translated BLAS code in
00476         // order to make the expression easier to read and also save
00477         // a few floating-point operations.
00478         const ScalarType da_scaled = *da / scale;
00479         const ScalarType db_scaled = *db / scale;
00480         r = scale * STS::squareroot (da_scaled*da_scaled + db_scaled*db_scaled);
00481         r = SIGN (STS::one(), roe) * r;
00482         *c = *da / r;
00483         *s = *db / r;
00484         z = STS::one();
00485         if (STS::magnitude (*da) > STS::magnitude (*db)) {
00486           z = *s;
00487         }
00488         if (STS::magnitude (*db) >= STS::magnitude (*da) && *c != STS::zero()) {
00489           z = STS::one() / *c;
00490         }
00491       }
00492 
00493       *da = r;
00494       *db = z;
00495     }
00496 
00497     // Real-valued implementation of MagValue
00498     template<typename ScalarType>
00499     void 
00500     MagValue<ScalarType, false>::
00501     blas_dabs1(const ScalarType* a, ScalarType* ret) const 
00502     {
00503       *ret = Teuchos::ScalarTraits<ScalarType>::magnitude( *a );  
00504     }
00505 
00506     // Complex-valued implementation of MagValue
00507     template<typename ScalarType>
00508     void 
00509     MagValue<ScalarType, true>::
00510     blas_dabs1(const ScalarType* a, typename ScalarTraits<ScalarType>::magnitudeType* ret) const 
00511     {
00512       *ret = ScalarTraits<typename ScalarTraits<ScalarType>::magnitudeType>::magnitude(a->real());
00513       *ret += ScalarTraits<typename ScalarTraits<ScalarType>::magnitudeType>::magnitude(a->imag());
00514     }
00515 
00516   } // namespace details
00517 
00518   template<typename OrdinalType, typename ScalarType>
00519   void
00520   DefaultBLASImpl<OrdinalType, ScalarType>::
00521   ROTG (ScalarType* da,
00522         ScalarType* db,
00523         MagnitudeType* c,
00524         ScalarType* s) const
00525   {
00526     typedef ScalarTraits<ScalarType> STS;
00527     details::GivensRotator<ScalarType, STS::isComplex> rotator;
00528     rotator.ROTG (da, db, c, s);
00529   }
00530 
00531   template<typename OrdinalType, typename ScalarType>
00532   void DefaultBLASImpl<OrdinalType,ScalarType>::ROT(const OrdinalType n, ScalarType* dx, const OrdinalType incx, ScalarType* dy, const OrdinalType incy, MagnitudeType* c, ScalarType* s) const
00533   {
00534     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00535     ScalarType sconj = Teuchos::ScalarTraits<ScalarType>::conjugate(*s);
00536     if (n <= 0) return;
00537     if (incx==1 && incy==1) {
00538       for(OrdinalType i=0; i<n; ++i) {
00539         ScalarType temp = *c*dx[i] + sconj*dy[i];
00540         dy[i] = *c*dy[i] - sconj*dx[i];
00541         dx[i] = temp;
00542       }
00543     }
00544     else {
00545       OrdinalType ix = 0, iy = 0;
00546       if (incx < izero) ix = (-n+1)*incx;
00547       if (incy < izero) iy = (-n+1)*incy;
00548       for(OrdinalType i=0; i<n; ++i) {
00549         ScalarType temp = *c*dx[ix] + sconj*dy[iy];
00550         dy[iy] = *c*dy[iy] - sconj*dx[ix];
00551         dx[ix] = temp;
00552         ix += incx;
00553         iy += incy;
00554       }
00555     }
00556   }
00557 
00558   template<typename OrdinalType, typename ScalarType>
00559   void DefaultBLASImpl<OrdinalType, ScalarType>::SCAL(const OrdinalType n, const ScalarType alpha, ScalarType* x, const OrdinalType incx) const
00560   {
00561     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00562     OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00563     OrdinalType i, ix = izero;
00564 
00565     if ( n < ione || incx < ione )
00566       return;
00567 
00568     // Scale the vector.
00569     for(i = izero; i < n; i++)
00570     {
00571       x[ix] *= alpha;
00572       ix += incx;
00573     }
00574   } /* end SCAL */
00575 
00576   template<typename OrdinalType, typename ScalarType>
00577   void DefaultBLASImpl<OrdinalType, ScalarType>::COPY(const OrdinalType n, const ScalarType* x, const OrdinalType incx, ScalarType* y, const OrdinalType incy) const
00578   {
00579     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00580     OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00581     OrdinalType i, ix = izero, iy = izero;
00582     if ( n > izero ) {
00583         // Set the initial indices (ix, iy).
00584         if (incx < izero) { ix = (-n+ione)*incx; }
00585         if (incy < izero) { iy = (-n+ione)*incy; }
00586 
00587         for(i = izero; i < n; i++)
00588           {
00589             y[iy] = x[ix];
00590             ix += incx;
00591             iy += incy;
00592           }
00593       }
00594   } /* end COPY */
00595 
00596   template<typename OrdinalType, typename ScalarType>
00597   template <typename alpha_type, typename x_type>
00598   void DefaultBLASImpl<OrdinalType, ScalarType>::AXPY(const OrdinalType n, const alpha_type alpha, const x_type* x, const OrdinalType incx, ScalarType* y, const OrdinalType incy) const
00599   {
00600     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00601     OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00602     OrdinalType i, ix = izero, iy = izero;
00603     if( n > izero && alpha != ScalarTraits<alpha_type>::zero())
00604       {
00605         // Set the initial indices (ix, iy).
00606         if (incx < izero) { ix = (-n+ione)*incx; }
00607         if (incy < izero) { iy = (-n+ione)*incy; }
00608 
00609         for(i = izero; i < n; i++)
00610           {
00611             y[iy] += alpha * x[ix];
00612             ix += incx;
00613             iy += incy;
00614           }
00615       }
00616   } /* end AXPY */
00617 
00618   template<typename OrdinalType, typename ScalarType>
00619   typename ScalarTraits<ScalarType>::magnitudeType DefaultBLASImpl<OrdinalType, ScalarType>::ASUM(const OrdinalType n, const ScalarType* x, const OrdinalType incx) const
00620   {
00621     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00622     OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00623     typename ScalarTraits<ScalarType>::magnitudeType temp, result =
00624       ScalarTraits<typename ScalarTraits<ScalarType>::magnitudeType>::zero();
00625     OrdinalType i, ix = izero;
00626 
00627     if ( n < ione || incx < ione )
00628       return result;
00629 
00630     details::MagValue<ScalarType, ScalarTraits<ScalarType>::isComplex> mval;
00631     for (i = izero; i < n; i++)
00632       {
00633         mval.blas_dabs1( &x[ix], &temp );
00634         result += temp;
00635         ix += incx;
00636       }
00637    
00638     return result;
00639   } /* end ASUM */
00640 
00641   template<typename OrdinalType, typename ScalarType>
00642   template <typename x_type, typename y_type>
00643   ScalarType DefaultBLASImpl<OrdinalType, ScalarType>::DOT(const OrdinalType n, const x_type* x, const OrdinalType incx, const y_type* y, const OrdinalType incy) const
00644   {
00645     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00646     OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00647     ScalarType result = ScalarTraits<ScalarType>::zero();
00648     OrdinalType i, ix = izero, iy = izero;
00649     if( n > izero )
00650       {
00651         // Set the initial indices (ix,iy).
00652         if (incx < izero) { ix = (-n+ione)*incx; }
00653         if (incy < izero) { iy = (-n+ione)*incy; }
00654 
00655         for(i = izero; i < n; i++)
00656           {
00657             result += ScalarTraits<x_type>::conjugate(x[ix]) * y[iy];
00658             ix += incx;
00659             iy += incy;
00660           }
00661       }
00662     return result;
00663   } /* end DOT */
00664 
00665   template<typename OrdinalType, typename ScalarType>
00666   typename ScalarTraits<ScalarType>::magnitudeType DefaultBLASImpl<OrdinalType, ScalarType>::NRM2(const OrdinalType n, const ScalarType* x, const OrdinalType incx) const
00667   {
00668     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00669     OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00670     typename ScalarTraits<ScalarType>::magnitudeType result =
00671       ScalarTraits<typename ScalarTraits<ScalarType>::magnitudeType>::zero();
00672     OrdinalType i, ix = izero;
00673 
00674     if ( n < ione || incx < ione )
00675       return result;
00676 
00677     for(i = izero; i < n; i++)
00678       {
00679         result += ScalarTraits<ScalarType>::magnitude(ScalarTraits<ScalarType>::conjugate(x[ix]) * x[ix]);
00680         ix += incx;
00681       }
00682     result = ScalarTraits<typename ScalarTraits<ScalarType>::magnitudeType>::squareroot(result);
00683     return result;
00684   } /* end NRM2 */
00685 
00686   template<typename OrdinalType, typename ScalarType>
00687   OrdinalType DefaultBLASImpl<OrdinalType, ScalarType>::IAMAX(const OrdinalType n, const ScalarType* x, const OrdinalType incx) const
00688   {
00689     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00690     OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00691     OrdinalType result = izero, ix = izero, i;
00692     typename ScalarTraits<ScalarType>::magnitudeType curval =
00693       ScalarTraits<typename ScalarTraits<ScalarType>::magnitudeType>::zero();
00694     typename ScalarTraits<ScalarType>::magnitudeType maxval =
00695       ScalarTraits<typename ScalarTraits<ScalarType>::magnitudeType>::zero();
00696 
00697     if ( n < ione || incx < ione )
00698       return result;
00699   
00700     details::MagValue<ScalarType, ScalarTraits<ScalarType>::isComplex> mval;
00701 
00702     mval.blas_dabs1( &x[ix], &maxval );
00703     ix += incx;
00704     for(i = ione; i < n; i++)
00705       {
00706         mval.blas_dabs1( &x[ix], &curval );
00707         if(curval > maxval)
00708           {
00709             result = i;
00710             maxval = curval;
00711           }
00712         ix += incx;
00713       }
00714 
00715     return result + 1; // the BLAS I?AMAX functions return 1-indexed (Fortran-style) values
00716   } /* end IAMAX */
00717 
00718 //------------------------------------------------------------------------------------------
00719 //      LEVEL 2 BLAS ROUTINES
00720 //------------------------------------------------------------------------------------------
00721   template<typename OrdinalType, typename ScalarType>
00722   template <typename alpha_type, typename A_type, typename x_type, typename beta_type>
00723   void DefaultBLASImpl<OrdinalType, ScalarType>::GEMV(ETransp trans, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type* A, const OrdinalType lda, const x_type* x, const OrdinalType incx, const beta_type beta, ScalarType* y, const OrdinalType incy) const
00724   {
00725     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00726     OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00727     alpha_type alpha_zero = ScalarTraits<alpha_type>::zero();
00728     beta_type beta_zero = ScalarTraits<beta_type>::zero();
00729     x_type x_zero = ScalarTraits<x_type>::zero();
00730     ScalarType y_zero = ScalarTraits<ScalarType>::zero();
00731     beta_type beta_one = ScalarTraits<beta_type>::one();
00732     bool noConj = true;
00733     bool BadArgument = false;
00734 
00735     // Quick return if there is nothing to do!
00736     if( m == izero || n == izero || ( alpha == alpha_zero && beta == beta_one ) ){ return; }
00737 
00738     // Otherwise, we need to check the argument list.
00739     if( m < izero ) {
00740         std::cout << "BLAS::GEMV Error: M == " << m << std::endl;
00741         BadArgument = true;
00742     }
00743     if( n < izero ) {
00744         std::cout << "BLAS::GEMV Error: N == " << n << std::endl;
00745         BadArgument = true;
00746     }
00747     if( lda < m ) {
00748         std::cout << "BLAS::GEMV Error: LDA < MAX(1,M)"<< std::endl;
00749         BadArgument = true;
00750     }
00751     if( incx == izero ) {
00752         std::cout << "BLAS::GEMV Error: INCX == 0"<< std::endl;
00753         BadArgument = true;
00754     }
00755     if( incy == izero ) {
00756         std::cout << "BLAS::GEMV Error: INCY == 0"<< std::endl;
00757         BadArgument = true;
00758     }
00759 
00760     if(!BadArgument) {
00761       OrdinalType i, j, lenx, leny, ix, iy, jx, jy;
00762       OrdinalType kx = izero, ky = izero;
00763       ScalarType temp;
00764 
00765       // Determine the lengths of the vectors x and y.
00766       if(ETranspChar[trans] == 'N') {
00767         lenx = n;
00768         leny = m;
00769       } else {
00770         lenx = m;
00771         leny = n;
00772       }
00773 
00774       // Determine if this is a conjugate tranpose
00775       noConj = (ETranspChar[trans] == 'T');
00776 
00777       // Set the starting pointers for the vectors x and y if incx/y < 0.
00778       if (incx < izero ) { kx =  (ione - lenx)*incx; }
00779       if (incy < izero ) { ky =  (ione - leny)*incy; }
00780 
00781       // Form y = beta*y
00782       ix = kx; iy = ky;
00783       if(beta != beta_one) {
00784         if (incy == ione) {
00785           if (beta == beta_zero) {
00786             for(i = izero; i < leny; i++) { y[i] = y_zero; }
00787           } else {
00788             for(i = izero; i < leny; i++) { y[i] *= beta; }
00789           }
00790         } else {
00791           if (beta == beta_zero) {
00792             for(i = izero; i < leny; i++) {
00793               y[iy] = y_zero;
00794               iy += incy;
00795             }
00796           } else {
00797             for(i = izero; i < leny; i++) {
00798               y[iy] *= beta;
00799               iy += incy;
00800             }
00801           }
00802         }
00803       }
00804 
00805       // Return if we don't have to do anything more.
00806       if(alpha == alpha_zero) { return; }
00807 
00808       if( ETranspChar[trans] == 'N' ) {
00809         // Form y = alpha*A*y
00810         jx = kx;
00811         if (incy == ione) {
00812           for(j = izero; j < n; j++) {
00813             if (x[jx] != x_zero) {
00814               temp = alpha*x[jx];
00815               for(i = izero; i < m; i++) {
00816                 y[i] += temp*A[j*lda + i];
00817               }
00818             }
00819             jx += incx;
00820           }
00821         } else {
00822           for(j = izero; j < n; j++) {
00823             if (x[jx] != x_zero) {
00824               temp = alpha*x[jx];
00825               iy = ky;
00826               for(i = izero; i < m; i++) {
00827                 y[iy] += temp*A[j*lda + i];
00828                 iy += incy;
00829               }
00830             }
00831             jx += incx;
00832           }
00833         }
00834       } else {
00835         jy = ky;
00836         if (incx == ione) {
00837           for(j = izero; j < n; j++) {
00838             temp = y_zero;
00839             if ( noConj ) {
00840               for(i = izero; i < m; i++) {
00841                 temp += A[j*lda + i]*x[i];
00842               }
00843             } else {
00844               for(i = izero; i < m; i++) {
00845                 temp += ScalarTraits<A_type>::conjugate(A[j*lda + i])*x[i];
00846               }
00847             }
00848             y[jy] += alpha*temp;
00849             jy += incy;
00850           }
00851         } else {
00852           for(j = izero; j < n; j++) {
00853             temp = y_zero;
00854             ix = kx;
00855             if ( noConj ) {
00856               for (i = izero; i < m; i++) {
00857                 temp += A[j*lda + i]*x[ix];
00858                 ix += incx;
00859               }
00860             } else {
00861               for (i = izero; i < m; i++) {
00862                 temp += ScalarTraits<A_type>::conjugate(A[j*lda + i])*x[ix];
00863                 ix += incx;
00864               }
00865             }
00866             y[jy] += alpha*temp;
00867             jy += incy;
00868           }
00869         }
00870       }
00871     } /* if (!BadArgument) */
00872   } /* end GEMV */
00873 
00874  template<typename OrdinalType, typename ScalarType>
00875  template <typename A_type>
00876  void DefaultBLASImpl<OrdinalType, ScalarType>::TRMV(EUplo uplo, ETransp trans, EDiag diag, const OrdinalType n, const A_type* A, const OrdinalType lda, ScalarType* x, const OrdinalType incx) const
00877   {
00878     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00879     OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00880     ScalarType zero = ScalarTraits<ScalarType>::zero();
00881     bool BadArgument = false;
00882     bool noConj = true;
00883 
00884     // Quick return if there is nothing to do!
00885     if( n == izero ){ return; }
00886 
00887     // Otherwise, we need to check the argument list.
00888     if( n < izero ) {
00889       std::cout << "BLAS::TRMV Error: N == " << n << std::endl;
00890       BadArgument = true;
00891     }
00892     if( lda < n ) {
00893       std::cout << "BLAS::TRMV Error: LDA < MAX(1,N)"<< std::endl;
00894       BadArgument = true;
00895     }
00896     if( incx == izero ) {
00897       std::cout << "BLAS::TRMV Error: INCX == 0"<< std::endl;
00898       BadArgument = true;
00899     }
00900 
00901     if(!BadArgument) {
00902       OrdinalType i, j, ix, jx, kx = izero;
00903       ScalarType temp;
00904       bool noUnit = (EDiagChar[diag] == 'N');
00905 
00906       // Determine if this is a conjugate tranpose
00907       noConj = (ETranspChar[trans] == 'T');
00908 
00909       // Set the starting pointer for the vector x if incx < 0.
00910       if (incx < izero) { kx = (-n+ione)*incx; }
00911 
00912       // Start the operations for a nontransposed triangular matrix
00913       if (ETranspChar[trans] == 'N') {
00914         /* Compute x = A*x */
00915         if (EUploChar[uplo] == 'U') {
00916           /* A is an upper triangular matrix */
00917           if (incx == ione) {
00918             for (j=izero; j<n; j++) {
00919               if (x[j] != zero) {
00920                 temp = x[j];
00921                 for (i=izero; i<j; i++) {
00922                   x[i] += temp*A[j*lda + i];
00923                 }
00924                 if ( noUnit )
00925                   x[j] *= A[j*lda + j];
00926               }
00927             }
00928           } else {
00929             jx = kx;
00930             for (j=izero; j<n; j++) {
00931               if (x[jx] != zero) {
00932                 temp = x[jx];
00933                 ix = kx;
00934                 for (i=izero; i<j; i++) {
00935                   x[ix] += temp*A[j*lda + i];
00936                   ix += incx;
00937                 }
00938                 if ( noUnit )
00939                   x[jx] *= A[j*lda + j];
00940               }
00941               jx += incx;
00942             }
00943           } /* if (incx == ione) */
00944         } else { /* A is a lower triangular matrix */
00945           if (incx == ione) {
00946             for (j=n-ione; j>-ione; j--) {
00947               if (x[j] != zero) {
00948                 temp = x[j];
00949                 for (i=n-ione; i>j; i--) {
00950                   x[i] += temp*A[j*lda + i];
00951                 }
00952                 if ( noUnit )
00953                   x[j] *= A[j*lda + j];
00954               }
00955             }
00956           } else {
00957             kx += (n-ione)*incx;
00958             jx = kx;
00959             for (j=n-ione; j>-ione; j--) {
00960               if (x[jx] != zero) {
00961                 temp = x[jx];
00962                 ix = kx;
00963                 for (i=n-ione; i>j; i--) {
00964                   x[ix] += temp*A[j*lda + i];
00965                   ix -= incx;
00966                 }
00967                 if ( noUnit )
00968                   x[jx] *= A[j*lda + j];
00969               }
00970               jx -= incx;
00971             }
00972           }
00973         } /* if (EUploChar[uplo]=='U') */
00974       } else { /* A is transposed/conjugated */
00975         /* Compute x = A'*x */
00976         if (EUploChar[uplo]=='U') {
00977           /* A is an upper triangular matrix */
00978           if (incx == ione) {
00979             for (j=n-ione; j>-ione; j--) {
00980               temp = x[j];
00981               if ( noConj ) {
00982                 if ( noUnit )
00983                   temp *= A[j*lda + j];
00984                 for (i=j-ione; i>-ione; i--) {
00985                   temp += A[j*lda + i]*x[i];
00986                 }
00987               } else {
00988                 if ( noUnit )
00989                   temp *= ScalarTraits<A_type>::conjugate(A[j*lda + j]);
00990                 for (i=j-ione; i>-ione; i--) {
00991                   temp += ScalarTraits<A_type>::conjugate(A[j*lda + i])*x[i];
00992                 }
00993               }
00994               x[j] = temp;
00995             }
00996           } else {
00997             jx = kx + (n-ione)*incx;
00998             for (j=n-ione; j>-ione; j--) {
00999               temp = x[jx];
01000               ix = jx;
01001               if ( noConj ) {
01002                 if ( noUnit )
01003                   temp *= A[j*lda + j];
01004                 for (i=j-ione; i>-ione; i--) {
01005                   ix -= incx;
01006                   temp += A[j*lda + i]*x[ix];
01007                 }
01008               } else {
01009                 if ( noUnit )
01010                   temp *= ScalarTraits<A_type>::conjugate(A[j*lda + j]);
01011                 for (i=j-ione; i>-ione; i--) {
01012                   ix -= incx;
01013                   temp += ScalarTraits<A_type>::conjugate(A[j*lda + i])*x[ix];
01014                 }
01015               }
01016               x[jx] = temp;
01017               jx -= incx;
01018             }
01019           }
01020         } else {
01021           /* A is a lower triangular matrix */
01022           if (incx == ione) {
01023             for (j=izero; j<n; j++) {
01024               temp = x[j];
01025               if ( noConj ) {
01026                 if ( noUnit )
01027                   temp *= A[j*lda + j];
01028                 for (i=j+ione; i<n; i++) {
01029                   temp += A[j*lda + i]*x[i];
01030                 }
01031               } else {
01032                 if ( noUnit )
01033                   temp *= ScalarTraits<A_type>::conjugate(A[j*lda + j]);
01034                 for (i=j+ione; i<n; i++) {
01035                   temp += ScalarTraits<A_type>::conjugate(A[j*lda + i])*x[i];
01036                 }
01037               }
01038               x[j] = temp;
01039             }
01040           } else {
01041             jx = kx;
01042             for (j=izero; j<n; j++) {
01043               temp = x[jx];
01044               ix = jx;
01045               if ( noConj ) {
01046                 if ( noUnit )
01047                   temp *= A[j*lda + j];
01048                 for (i=j+ione; i<n; i++) {
01049                   ix += incx;
01050                   temp += A[j*lda + i]*x[ix];
01051                 }
01052               } else {
01053                 if ( noUnit )
01054                   temp *= ScalarTraits<A_type>::conjugate(A[j*lda + j]);
01055                 for (i=j+ione; i<n; i++) {
01056                   ix += incx;
01057                   temp += ScalarTraits<A_type>::conjugate(A[j*lda + i])*x[ix];
01058                 }
01059               }
01060               x[jx] = temp;
01061               jx += incx;
01062             }
01063           }
01064         } /* if (EUploChar[uplo]=='U') */
01065       } /* if (ETranspChar[trans]=='N') */
01066     } /* if (!BadArgument) */
01067   } /* end TRMV */
01068 
01069   template<typename OrdinalType, typename ScalarType>
01070   template <typename alpha_type, typename x_type, typename y_type>
01071   void DefaultBLASImpl<OrdinalType, ScalarType>::GER(const OrdinalType m, const OrdinalType n, const alpha_type alpha, const x_type* x, const OrdinalType incx, const y_type* y, const OrdinalType incy, ScalarType* A, const OrdinalType lda) const
01072   {
01073     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
01074     OrdinalType ione = OrdinalTraits<OrdinalType>::one();
01075     alpha_type alpha_zero = ScalarTraits<alpha_type>::zero();
01076     y_type y_zero = ScalarTraits<y_type>::zero();
01077     bool BadArgument = false;
01078 
01079     // Quick return if there is nothing to do!
01080     if( m == izero || n == izero || alpha == alpha_zero ){ return; }
01081 
01082     // Otherwise, we need to check the argument list.
01083     if( m < izero ) {
01084         std::cout << "BLAS::GER Error: M == " << m << std::endl;
01085         BadArgument = true;
01086     }
01087     if( n < izero ) {
01088         std::cout << "BLAS::GER Error: N == " << n << std::endl;
01089         BadArgument = true;
01090     }
01091     if( lda < m ) {
01092         std::cout << "BLAS::GER Error: LDA < MAX(1,M)"<< std::endl;
01093         BadArgument = true;
01094     }
01095     if( incx == 0 ) {
01096         std::cout << "BLAS::GER Error: INCX == 0"<< std::endl;
01097         BadArgument = true;
01098     }
01099     if( incy == 0 ) {
01100         std::cout << "BLAS::GER Error: INCY == 0"<< std::endl;
01101         BadArgument = true;
01102     }
01103 
01104     if(!BadArgument) {
01105       OrdinalType i, j, ix, jy = izero, kx = izero;
01106       ScalarType temp;
01107 
01108       // Set the starting pointers for the vectors x and y if incx/y < 0.
01109       if (incx < izero) { kx = (-m+ione)*incx; }
01110       if (incy < izero) { jy = (-n+ione)*incy; }
01111 
01112       // Start the operations for incx == 1
01113       if( incx == ione ) {
01114         for( j=izero; j<n; j++ ) {
01115           if ( y[jy] != y_zero ) {
01116             temp = alpha*y[jy];
01117             for ( i=izero; i<m; i++ ) {
01118               A[j*lda + i] += x[i]*temp;
01119             }
01120           }
01121           jy += incy;
01122         }
01123       }
01124       else { // Start the operations for incx != 1
01125         for( j=izero; j<n; j++ ) {
01126           if ( y[jy] != y_zero ) {
01127             temp = alpha*y[jy];
01128             ix = kx;
01129             for( i=izero; i<m; i++ ) {
01130               A[j*lda + i] += x[ix]*temp;
01131               ix += incx;
01132             }
01133           }
01134           jy += incy;
01135         }
01136       }
01137     } /* if(!BadArgument) */
01138   } /* end GER */
01139 
01140 //------------------------------------------------------------------------------------------
01141 //      LEVEL 3 BLAS ROUTINES
01142 //------------------------------------------------------------------------------------------
01143 
01144   template<typename OrdinalType, typename ScalarType>
01145   template <typename alpha_type, typename A_type, typename B_type, typename beta_type>
01146   void DefaultBLASImpl<OrdinalType, ScalarType>::GEMM(ETransp transa, ETransp transb, const OrdinalType m, const OrdinalType n, const OrdinalType k, const alpha_type alpha, const A_type* A, const OrdinalType lda, const B_type* B, const OrdinalType ldb, const beta_type beta, ScalarType* C, const OrdinalType ldc) const
01147   {
01148 
01149     typedef TypeNameTraits<OrdinalType> OTNT;
01150     typedef TypeNameTraits<ScalarType> STNT;
01151 
01152     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
01153     alpha_type alpha_zero = ScalarTraits<alpha_type>::zero();
01154     beta_type beta_zero = ScalarTraits<beta_type>::zero();
01155     B_type B_zero = ScalarTraits<B_type>::zero();
01156     ScalarType C_zero = ScalarTraits<ScalarType>::zero();
01157     beta_type beta_one = ScalarTraits<beta_type>::one();
01158     OrdinalType i, j, p;
01159     OrdinalType NRowA = m, NRowB = k;
01160     ScalarType temp;
01161     bool BadArgument = false;
01162     bool conjA = false, conjB = false;
01163 
01164     // Change dimensions of matrix if either matrix is transposed
01165     if( !(ETranspChar[transa]=='N') ) {
01166       NRowA = k;
01167     }
01168     if( !(ETranspChar[transb]=='N') ) {
01169       NRowB = n;
01170     }
01171 
01172     // Quick return if there is nothing to do!
01173     if( (m==izero) || (n==izero) || (((alpha==alpha_zero)||(k==izero)) && (beta==beta_one)) ){ return; }
01174     if( m < izero ) {
01175       std::cout << "BLAS::GEMM Error: M == " << m << std::endl;
01176       BadArgument = true;
01177     }
01178     if( n < izero ) {
01179       std::cout << "BLAS::GEMM Error: N == " << n << std::endl;
01180       BadArgument = true;
01181     }
01182     if( k < izero ) {
01183       std::cout << "BLAS::GEMM Error: K == " << k << std::endl;
01184       BadArgument = true;
01185     }
01186     if( lda < NRowA ) {
01187       std::cout << "BLAS::GEMM Error: LDA < "<<NRowA<<std::endl;
01188       BadArgument = true;
01189     }
01190     if( ldb < NRowB ) {
01191       std::cout << "BLAS::GEMM Error: LDB < "<<NRowB<<std::endl;
01192       BadArgument = true;
01193     }
01194      if( ldc < m ) {
01195       std::cout << "BLAS::GEMM Error: LDC < MAX(1,M)"<< std::endl;
01196       BadArgument = true;
01197     }
01198 
01199     if(!BadArgument) {
01200 
01201       // Determine if this is a conjugate tranpose
01202       conjA = (ETranspChar[transa] == 'C');
01203       conjB = (ETranspChar[transb] == 'C');
01204 
01205       // Only need to scale the resulting matrix C.
01206       if( alpha == alpha_zero ) {
01207         if( beta == beta_zero ) {
01208           for (j=izero; j<n; j++) {
01209             for (i=izero; i<m; i++) {
01210               C[j*ldc + i] = C_zero;
01211             }
01212           }
01213         } else {
01214           for (j=izero; j<n; j++) {
01215             for (i=izero; i<m; i++) {
01216               C[j*ldc + i] *= beta;
01217             }
01218           }
01219         }
01220         return;
01221       }
01222       //
01223       // Now start the operations.
01224       //
01225       if ( ETranspChar[transb]=='N' ) {
01226         if ( ETranspChar[transa]=='N' ) {
01227           // Compute C = alpha*A*B + beta*C
01228           for (j=izero; j<n; j++) {
01229             if( beta == beta_zero ) {
01230               for (i=izero; i<m; i++){
01231                 C[j*ldc + i] = C_zero;
01232               }
01233             } else if( beta != beta_one ) {
01234               for (i=izero; i<m; i++){
01235                 C[j*ldc + i] *= beta;
01236               }
01237             }
01238             for (p=izero; p<k; p++){
01239               if (B[j*ldb + p] != B_zero ){
01240                 temp = alpha*B[j*ldb + p];
01241                 for (i=izero; i<m; i++) {
01242                   C[j*ldc + i] += temp*A[p*lda + i];
01243                 }
01244               }
01245             }
01246           }
01247         } else if ( conjA ) {
01248           // Compute C = alpha*conj(A')*B + beta*C
01249           for (j=izero; j<n; j++) {
01250             for (i=izero; i<m; i++) {
01251               temp = C_zero;
01252               for (p=izero; p<k; p++) {
01253                 temp += ScalarTraits<A_type>::conjugate(A[i*lda + p])*B[j*ldb + p];
01254               }
01255               if (beta == beta_zero) {
01256                 C[j*ldc + i] = alpha*temp;
01257               } else {
01258                 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
01259               }
01260             }
01261           }
01262         } else {
01263           // Compute C = alpha*A'*B + beta*C
01264           for (j=izero; j<n; j++) {
01265             for (i=izero; i<m; i++) {
01266               temp = C_zero;
01267               for (p=izero; p<k; p++) {
01268                 temp += A[i*lda + p]*B[j*ldb + p];
01269               }
01270               if (beta == beta_zero) {
01271                 C[j*ldc + i] = alpha*temp;
01272               } else {
01273                 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
01274               }
01275             }
01276           }
01277         }
01278       } else if ( ETranspChar[transa]=='N' ) {
01279         if ( conjB ) {
01280           // Compute C = alpha*A*conj(B') + beta*C
01281           for (j=izero; j<n; j++) {
01282             if (beta == beta_zero) {
01283               for (i=izero; i<m; i++) {
01284                 C[j*ldc + i] = C_zero;
01285               }
01286             } else if ( beta != beta_one ) {
01287               for (i=izero; i<m; i++) {
01288                 C[j*ldc + i] *= beta;
01289               }
01290             }
01291             for (p=izero; p<k; p++) {
01292               if (B[p*ldb + j] != B_zero) {
01293                 temp = alpha*ScalarTraits<B_type>::conjugate(B[p*ldb + j]);
01294                 for (i=izero; i<m; i++) {
01295                   C[j*ldc + i] += temp*A[p*lda + i];
01296                 }
01297               }
01298             }
01299           }
01300         } else {
01301           // Compute C = alpha*A*B' + beta*C
01302           for (j=izero; j<n; j++) {
01303             if (beta == beta_zero) {
01304               for (i=izero; i<m; i++) {
01305                 C[j*ldc + i] = C_zero;
01306               }
01307             } else if ( beta != beta_one ) {
01308               for (i=izero; i<m; i++) {
01309                 C[j*ldc + i] *= beta;
01310               }
01311             }
01312             for (p=izero; p<k; p++) {
01313               if (B[p*ldb + j] != B_zero) {
01314                 temp = alpha*B[p*ldb + j];
01315                 for (i=izero; i<m; i++) {
01316                   C[j*ldc + i] += temp*A[p*lda + i];
01317                 }
01318               }
01319             }
01320           }
01321         }
01322       } else if ( conjA ) {
01323         if ( conjB ) {
01324           // Compute C = alpha*conj(A')*conj(B') + beta*C
01325           for (j=izero; j<n; j++) {
01326             for (i=izero; i<m; i++) {
01327               temp = C_zero;
01328               for (p=izero; p<k; p++) {
01329                 temp += ScalarTraits<A_type>::conjugate(A[i*lda + p])
01330                       * ScalarTraits<B_type>::conjugate(B[p*ldb + j]);
01331               }
01332               if (beta == beta_zero) {
01333                 C[j*ldc + i] = alpha*temp;
01334               } else {
01335                 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
01336               }
01337             }
01338           }
01339         } else {
01340           // Compute C = alpha*conj(A')*B' + beta*C
01341           for (j=izero; j<n; j++) {
01342             for (i=izero; i<m; i++) {
01343               temp = C_zero;
01344               for (p=izero; p<k; p++) {
01345                 temp += ScalarTraits<A_type>::conjugate(A[i*lda + p])
01346                       * B[p*ldb + j];
01347               }
01348               if (beta == beta_zero) {
01349                 C[j*ldc + i] = alpha*temp;
01350               } else {
01351                 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
01352               }
01353             }
01354           }
01355         }
01356      } else {
01357        if ( conjB ) {
01358          // Compute C = alpha*A'*conj(B') + beta*C
01359          for (j=izero; j<n; j++) {
01360             for (i=izero; i<m; i++) {
01361               temp = C_zero;
01362               for (p=izero; p<k; p++) {
01363                 temp += A[i*lda + p]
01364                       * ScalarTraits<B_type>::conjugate(B[p*ldb + j]);
01365               }
01366               if (beta == beta_zero) {
01367                 C[j*ldc + i] = alpha*temp;
01368               } else {
01369                 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
01370               }
01371             }
01372           }
01373         } else {
01374           // Compute C = alpha*A'*B' + beta*C
01375           for (j=izero; j<n; j++) {
01376             for (i=izero; i<m; i++) {
01377               temp = C_zero;
01378               for (p=izero; p<k; p++) {
01379                 temp += A[i*lda + p]*B[p*ldb + j];
01380               }
01381               if (beta == beta_zero) {
01382                 C[j*ldc + i] = alpha*temp;
01383               } else {
01384                 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
01385               }
01386             }
01387           }
01388         } // end if (ETranspChar[transa]=='N') ...
01389       } // end if (ETranspChar[transb]=='N') ...
01390     } // end if (!BadArgument) ...
01391   } // end of GEMM
01392 
01393 
01394   template<typename OrdinalType, typename ScalarType>
01395   template <typename alpha_type, typename A_type, typename B_type, typename beta_type>
01396   void DefaultBLASImpl<OrdinalType, ScalarType>::SYMM(ESide side, EUplo uplo, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type* A, const OrdinalType lda, const B_type* B, const OrdinalType ldb, const beta_type beta, ScalarType* C, const OrdinalType ldc) const
01397   {
01398     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
01399     OrdinalType ione = OrdinalTraits<OrdinalType>::one();
01400     alpha_type alpha_zero = ScalarTraits<alpha_type>::zero();
01401     beta_type beta_zero = ScalarTraits<beta_type>::zero();
01402     ScalarType C_zero = ScalarTraits<ScalarType>::zero();
01403     beta_type beta_one = ScalarTraits<beta_type>::one();
01404     OrdinalType i, j, k, NRowA = m;
01405     ScalarType temp1, temp2;
01406     bool BadArgument = false;
01407     bool Upper = (EUploChar[uplo] == 'U');
01408     if (ESideChar[side] == 'R') { NRowA = n; }
01409 
01410     // Quick return.
01411     if ( (m==izero) || (n==izero) || ( (alpha==alpha_zero)&&(beta==beta_one) ) ) { return; }
01412     if( m < izero ) {
01413       std::cout << "BLAS::SYMM Error: M == "<< m << std::endl;
01414       BadArgument = true; }
01415     if( n < izero ) {
01416       std::cout << "BLAS::SYMM Error: N == "<< n << std::endl;
01417       BadArgument = true; }
01418     if( lda < NRowA ) {
01419       std::cout << "BLAS::SYMM Error: LDA < "<<NRowA<<std::endl;
01420       BadArgument = true; }
01421     if( ldb < m ) {
01422       std::cout << "BLAS::SYMM Error: LDB < MAX(1,M)"<<std::endl;
01423       BadArgument = true; }
01424     if( ldc < m ) {
01425       std::cout << "BLAS::SYMM Error: LDC < MAX(1,M)"<<std::endl;
01426       BadArgument = true; }
01427 
01428     if(!BadArgument) {
01429 
01430       // Only need to scale C and return.
01431       if (alpha == alpha_zero) {
01432         if (beta == beta_zero ) {
01433           for (j=izero; j<n; j++) {
01434             for (i=izero; i<m; i++) {
01435               C[j*ldc + i] = C_zero;
01436             }
01437           }
01438         } else {
01439           for (j=izero; j<n; j++) {
01440             for (i=izero; i<m; i++) {
01441               C[j*ldc + i] *= beta;
01442             }
01443           }
01444         }
01445         return;
01446       }
01447 
01448       if ( ESideChar[side] == 'L') {
01449         // Compute C = alpha*A*B + beta*C
01450 
01451         if (Upper) {
01452           // The symmetric part of A is stored in the upper triangular part of the matrix.
01453           for (j=izero; j<n; j++) {
01454             for (i=izero; i<m; i++) {
01455               temp1 = alpha*B[j*ldb + i];
01456               temp2 = C_zero;
01457               for (k=izero; k<i; k++) {
01458                 C[j*ldc + k] += temp1*A[i*lda + k];
01459                 temp2 += B[j*ldb + k]*A[i*lda + k];
01460               }
01461               if (beta == beta_zero) {
01462                 C[j*ldc + i] = temp1*A[i*lda + i] + alpha*temp2;
01463               } else {
01464                 C[j*ldc + i] = beta*C[j*ldc + i] + temp1*A[i*lda + i] + alpha*temp2;
01465               }
01466             }
01467           }
01468         } else {
01469           // The symmetric part of A is stored in the lower triangular part of the matrix.
01470           for (j=izero; j<n; j++) {
01471             for (i=m-ione; i>-ione; i--) {
01472               temp1 = alpha*B[j*ldb + i];
01473               temp2 = C_zero;
01474               for (k=i+ione; k<m; k++) {
01475                 C[j*ldc + k] += temp1*A[i*lda + k];
01476                 temp2 += B[j*ldb + k]*A[i*lda + k];
01477               }
01478               if (beta == beta_zero) {
01479                 C[j*ldc + i] = temp1*A[i*lda + i] + alpha*temp2;
01480               } else {
01481                 C[j*ldc + i] = beta*C[j*ldc + i] + temp1*A[i*lda + i] + alpha*temp2;
01482               }
01483             }
01484           }
01485         }
01486       } else {
01487         // Compute C = alpha*B*A + beta*C.
01488         for (j=izero; j<n; j++) {
01489           temp1 = alpha*A[j*lda + j];
01490           if (beta == beta_zero) {
01491             for (i=izero; i<m; i++) {
01492               C[j*ldc + i] = temp1*B[j*ldb + i];
01493             }
01494           } else {
01495             for (i=izero; i<m; i++) {
01496               C[j*ldc + i] = beta*C[j*ldc + i] + temp1*B[j*ldb + i];
01497             }
01498           }
01499           for (k=izero; k<j; k++) {
01500             if (Upper) {
01501               temp1 = alpha*A[j*lda + k];
01502             } else {
01503               temp1 = alpha*A[k*lda + j];
01504             }
01505             for (i=izero; i<m; i++) {
01506               C[j*ldc + i] += temp1*B[k*ldb + i];
01507             }
01508           }
01509           for (k=j+ione; k<n; k++) {
01510             if (Upper) {
01511               temp1 = alpha*A[k*lda + j];
01512             } else {
01513               temp1 = alpha*A[j*lda + k];
01514             }
01515             for (i=izero; i<m; i++) {
01516               C[j*ldc + i] += temp1*B[k*ldb + i];
01517             }
01518           }
01519         }
01520       } // end if (ESideChar[side]=='L') ...
01521     } // end if(!BadArgument) ...
01522 } // end SYMM
01523 
01524   template<typename OrdinalType, typename ScalarType>
01525   template <typename alpha_type, typename A_type, typename beta_type>
01526   void DefaultBLASImpl<OrdinalType, ScalarType>::SYRK(EUplo uplo, ETransp trans, const OrdinalType n, const OrdinalType k, const alpha_type alpha, const A_type* A, const OrdinalType lda, const beta_type beta, ScalarType* C, const OrdinalType ldc) const
01527   {
01528     typedef TypeNameTraits<OrdinalType> OTNT;
01529     typedef TypeNameTraits<ScalarType> STNT;
01530 
01531     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
01532     alpha_type alpha_zero = ScalarTraits<alpha_type>::zero();
01533     beta_type beta_zero = ScalarTraits<beta_type>::zero();
01534     beta_type beta_one = ScalarTraits<beta_type>::one();
01535     A_type temp, A_zero = ScalarTraits<A_type>::zero();
01536     ScalarType C_zero = ScalarTraits<ScalarType>::zero();
01537     OrdinalType i, j, l, NRowA = n;
01538     bool BadArgument = false;
01539     bool Upper = (EUploChar[uplo] == 'U');
01540 
01541     TEUCHOS_TEST_FOR_EXCEPTION(
01542       Teuchos::ScalarTraits<ScalarType>::isComplex
01543       && (trans == CONJ_TRANS),
01544       std::logic_error,
01545             "Teuchos::BLAS<"<<OTNT::name()<<","<<STNT::name()<<">::SYRK()"
01546             " does not support CONJ_TRANS for complex data types."
01547       );
01548 
01549     // Change dimensions of A matrix is transposed
01550     if( !(ETranspChar[trans]=='N') ) {
01551       NRowA = k;
01552     }
01553 
01554     // Quick return.
01555     if ( n==izero ) { return; }
01556     if ( ( (alpha==alpha_zero) || (k==izero) )  && (beta==beta_one) ) { return; }
01557     if( n < izero ) {
01558       std::cout << "BLAS::SYRK Error: N == "<< n <<std::endl;
01559       BadArgument = true; }
01560     if( k < izero ) {
01561       std::cout << "BLAS::SYRK Error: K == "<< k <<std::endl;
01562       BadArgument = true; }
01563     if( lda < NRowA ) {
01564       std::cout << "BLAS::SYRK Error: LDA < "<<NRowA<<std::endl;
01565       BadArgument = true; }
01566     if( ldc < n ) {
01567       std::cout << "BLAS::SYRK Error: LDC < MAX(1,N)"<<std::endl;
01568       BadArgument = true; }
01569 
01570     if(!BadArgument) {
01571 
01572       // Scale C when alpha is zero
01573       if (alpha == alpha_zero) {
01574         if (Upper) {
01575           if (beta==beta_zero) {
01576             for (j=izero; j<n; j++) {
01577               for (i=izero; i<=j; i++) {
01578                 C[j*ldc + i] = C_zero;
01579               }
01580             }
01581           }
01582           else {
01583             for (j=izero; j<n; j++) {
01584               for (i=izero; i<=j; i++) {
01585                 C[j*ldc + i] *= beta;
01586               }
01587             }
01588           }
01589         }
01590         else {
01591           if (beta==beta_zero) {
01592             for (j=izero; j<n; j++) {
01593               for (i=j; i<n; i++) {
01594                 C[j*ldc + i] = C_zero;
01595               }
01596             }
01597           }
01598           else {
01599             for (j=izero; j<n; j++) {
01600               for (i=j; i<n; i++) {
01601                 C[j*ldc + i] *= beta;
01602               }
01603             }
01604           }
01605         }
01606         return;
01607       }
01608 
01609       // Now we can start the computation
01610 
01611       if ( ETranspChar[trans]=='N' ) {
01612 
01613         // Form C <- alpha*A*A' + beta*C
01614         if (Upper) {
01615           for (j=izero; j<n; j++) {
01616             if (beta==beta_zero) {
01617               for (i=izero; i<=j; i++) {
01618                 C[j*ldc + i] = C_zero;
01619               }
01620             }
01621             else if (beta!=beta_one) {
01622               for (i=izero; i<=j; i++) {
01623                 C[j*ldc + i] *= beta;
01624               }
01625             }
01626             for (l=izero; l<k; l++) {
01627               if (A[l*lda + j] != A_zero) {
01628                 temp = alpha*A[l*lda + j];
01629                 for (i = izero; i <=j; i++) {
01630                   C[j*ldc + i] += temp*A[l*lda + i];
01631                 }
01632               }
01633             }
01634           }
01635         }
01636         else {
01637           for (j=izero; j<n; j++) {
01638             if (beta==beta_zero) {
01639               for (i=j; i<n; i++) {
01640                 C[j*ldc + i] = C_zero;
01641               }
01642             }
01643             else if (beta!=beta_one) {
01644               for (i=j; i<n; i++) {
01645                 C[j*ldc + i] *= beta;
01646               }
01647             }
01648             for (l=izero; l<k; l++) {
01649               if (A[l*lda + j] != A_zero) {
01650                 temp = alpha*A[l*lda + j];
01651                 for (i=j; i<n; i++) {
01652                   C[j*ldc + i] += temp*A[l*lda + i];
01653                 }
01654               }
01655             }
01656           }
01657         }
01658       }
01659       else {
01660 
01661         // Form C <- alpha*A'*A + beta*C
01662         if (Upper) {
01663           for (j=izero; j<n; j++) {
01664             for (i=izero; i<=j; i++) {
01665               temp = A_zero;
01666               for (l=izero; l<k; l++) {
01667                 temp += A[i*lda + l]*A[j*lda + l];
01668               }
01669               if (beta==beta_zero) {
01670                 C[j*ldc + i] = alpha*temp;
01671               }
01672               else {
01673                 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
01674               }
01675             }
01676           }
01677         }
01678         else {
01679           for (j=izero; j<n; j++) {
01680             for (i=j; i<n; i++) {
01681               temp = A_zero;
01682               for (l=izero; l<k; ++l) {
01683                 temp += A[i*lda + l]*A[j*lda + l];
01684               }
01685               if (beta==beta_zero) {
01686                 C[j*ldc + i] = alpha*temp;
01687               }
01688               else {
01689                 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
01690               }
01691             }
01692           }
01693         }
01694       }
01695     } /* if (!BadArgument) */
01696   } /* END SYRK */
01697 
01698   template<typename OrdinalType, typename ScalarType>
01699   template <typename alpha_type, typename A_type>
01700   void DefaultBLASImpl<OrdinalType, ScalarType>::TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type* A, const OrdinalType lda, ScalarType* B, const OrdinalType ldb) const
01701   {
01702     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
01703     OrdinalType ione = OrdinalTraits<OrdinalType>::one();
01704     alpha_type alpha_zero = ScalarTraits<alpha_type>::zero();
01705     A_type A_zero = ScalarTraits<A_type>::zero();
01706     ScalarType B_zero = ScalarTraits<ScalarType>::zero();
01707     ScalarType one = ScalarTraits<ScalarType>::one();
01708     OrdinalType i, j, k, NRowA = m;
01709     ScalarType temp;
01710     bool BadArgument = false;
01711     bool LSide = (ESideChar[side] == 'L');
01712     bool noUnit = (EDiagChar[diag] == 'N');
01713     bool Upper = (EUploChar[uplo] == 'U');
01714     bool noConj = (ETranspChar[transa] == 'T');
01715 
01716     if(!LSide) { NRowA = n; }
01717 
01718     // Quick return.
01719     if (n==izero || m==izero) { return; }
01720     if( m < izero ) {
01721       std::cout << "BLAS::TRMM Error: M == "<< m <<std::endl;
01722       BadArgument = true; }
01723     if( n < izero ) {
01724       std::cout << "BLAS::TRMM Error: N == "<< n <<std::endl;
01725       BadArgument = true; }
01726     if( lda < NRowA ) {
01727       std::cout << "BLAS::TRMM Error: LDA < "<<NRowA<<std::endl;
01728       BadArgument = true; }
01729     if( ldb < m ) {
01730       std::cout << "BLAS::TRMM Error: LDB < MAX(1,M)"<<std::endl;
01731       BadArgument = true; }
01732 
01733     if(!BadArgument) {
01734 
01735       // B only needs to be zeroed out.
01736       if( alpha == alpha_zero ) {
01737         for( j=izero; j<n; j++ ) {
01738           for( i=izero; i<m; i++ ) {
01739             B[j*ldb + i] = B_zero;
01740           }
01741         }
01742         return;
01743       }
01744 
01745       //  Start the computations.
01746       if ( LSide ) {
01747         // A is on the left side of B.
01748 
01749         if ( ETranspChar[transa]=='N' ) {
01750           // Compute B = alpha*A*B
01751 
01752           if ( Upper ) {
01753             // A is upper triangular
01754             for( j=izero; j<n; j++ ) {
01755               for( k=izero; k<m; k++) {
01756                 if ( B[j*ldb + k] != B_zero ) {
01757                   temp = alpha*B[j*ldb + k];
01758                   for( i=izero; i<k; i++ ) {
01759                     B[j*ldb + i] += temp*A[k*lda + i];
01760                   }
01761                   if ( noUnit )
01762                     temp *=A[k*lda + k];
01763                   B[j*ldb + k] = temp;
01764                 }
01765               }
01766             }
01767           } else {
01768             // A is lower triangular
01769             for( j=izero; j<n; j++ ) {
01770               for( k=m-ione; k>-ione; k-- ) {
01771                 if( B[j*ldb + k] != B_zero ) {
01772                   temp = alpha*B[j*ldb + k];
01773                   B[j*ldb + k] = temp;
01774                   if ( noUnit )
01775                     B[j*ldb + k] *= A[k*lda + k];
01776                   for( i=k+ione; i<m; i++ ) {
01777                     B[j*ldb + i] += temp*A[k*lda + i];
01778                   }
01779                 }
01780               }
01781             }
01782           }
01783         } else {
01784           // Compute B = alpha*A'*B or B = alpha*conj(A')*B
01785           if( Upper ) {
01786             for( j=izero; j<n; j++ ) {
01787               for( i=m-ione; i>-ione; i-- ) {
01788                 temp = B[j*ldb + i];
01789                 if ( noConj ) {
01790                   if( noUnit )
01791                     temp *= A[i*lda + i];
01792                   for( k=izero; k<i; k++ ) {
01793                     temp += A[i*lda + k]*B[j*ldb + k];
01794                   }
01795                 } else {
01796                   if( noUnit )
01797                     temp *= ScalarTraits<A_type>::conjugate(A[i*lda + i]);
01798                   for( k=izero; k<i; k++ ) {
01799                     temp += ScalarTraits<A_type>::conjugate(A[i*lda + k])*B[j*ldb + k];
01800                   }
01801                 }
01802                 B[j*ldb + i] = alpha*temp;
01803               }
01804             }
01805           } else {
01806             for( j=izero; j<n; j++ ) {
01807               for( i=izero; i<m; i++ ) {
01808                 temp = B[j*ldb + i];
01809                 if ( noConj ) {
01810                   if( noUnit )
01811                     temp *= A[i*lda + i];
01812                   for( k=i+ione; k<m; k++ ) {
01813                     temp += A[i*lda + k]*B[j*ldb + k];
01814                   }
01815                 } else {
01816                   if( noUnit )
01817                     temp *= ScalarTraits<A_type>::conjugate(A[i*lda + i]);
01818                   for( k=i+ione; k<m; k++ ) {
01819                     temp += ScalarTraits<A_type>::conjugate(A[i*lda + k])*B[j*ldb + k];
01820                   }
01821                 }
01822                 B[j*ldb + i] = alpha*temp;
01823               }
01824             }
01825           }
01826         }
01827       } else {
01828         // A is on the right hand side of B.
01829 
01830         if( ETranspChar[transa] == 'N' ) {
01831           // Compute B = alpha*B*A
01832 
01833           if( Upper ) {
01834             // A is upper triangular.
01835             for( j=n-ione; j>-ione; j-- ) {
01836               temp = alpha;
01837               if( noUnit )
01838                 temp *= A[j*lda + j];
01839               for( i=izero; i<m; i++ ) {
01840                 B[j*ldb + i] *= temp;
01841               }
01842               for( k=izero; k<j; k++ ) {
01843                 if( A[j*lda + k] != A_zero ) {
01844                   temp = alpha*A[j*lda + k];
01845                   for( i=izero; i<m; i++ ) {
01846                     B[j*ldb + i] += temp*B[k*ldb + i];
01847                   }
01848                 }
01849               }
01850             }
01851           } else {
01852             // A is lower triangular.
01853             for( j=izero; j<n; j++ ) {
01854               temp = alpha;
01855               if( noUnit )
01856                 temp *= A[j*lda + j];
01857               for( i=izero; i<m; i++ ) {
01858                 B[j*ldb + i] *= temp;
01859               }
01860               for( k=j+ione; k<n; k++ ) {
01861                 if( A[j*lda + k] != A_zero ) {
01862                   temp = alpha*A[j*lda + k];
01863                   for( i=izero; i<m; i++ ) {
01864                     B[j*ldb + i] += temp*B[k*ldb + i];
01865                   }
01866                 }
01867               }
01868             }
01869           }
01870         } else {
01871           // Compute B = alpha*B*A' or B = alpha*B*conj(A')
01872 
01873           if( Upper ) {
01874             for( k=izero; k<n; k++ ) {
01875               for( j=izero; j<k; j++ ) {
01876                 if( A[k*lda + j] != A_zero ) {
01877                   if ( noConj )
01878                     temp = alpha*A[k*lda + j];
01879                   else
01880                     temp = alpha*ScalarTraits<A_type>::conjugate(A[k*lda + j]);
01881                   for( i=izero; i<m; i++ ) {
01882                     B[j*ldb + i] += temp*B[k*ldb + i];
01883                   }
01884                 }
01885               }
01886               temp = alpha;
01887               if( noUnit ) {
01888                 if ( noConj )
01889                   temp *= A[k*lda + k];
01890                 else
01891                   temp *= ScalarTraits<A_type>::conjugate(A[k*lda + k]);
01892               }
01893               if( temp != one ) {
01894                 for( i=izero; i<m; i++ ) {
01895                   B[k*ldb + i] *= temp;
01896                 }
01897               }
01898             }
01899           } else {
01900             for( k=n-ione; k>-ione; k-- ) {
01901               for( j=k+ione; j<n; j++ ) {
01902                 if( A[k*lda + j] != A_zero ) {
01903                   if ( noConj )
01904                     temp = alpha*A[k*lda + j];
01905                   else
01906                     temp = alpha*ScalarTraits<A_type>::conjugate(A[k*lda + j]);
01907                   for( i=izero; i<m; i++ ) {
01908                     B[j*ldb + i] += temp*B[k*ldb + i];
01909                   }
01910                 }
01911               }
01912               temp = alpha;
01913               if( noUnit ) {
01914                 if ( noConj )
01915                   temp *= A[k*lda + k];
01916                 else
01917                   temp *= ScalarTraits<A_type>::conjugate(A[k*lda + k]);
01918               }
01919               if( temp != one ) {
01920                 for( i=izero; i<m; i++ ) {
01921                   B[k*ldb + i] *= temp;
01922                 }
01923               }
01924             }
01925           }
01926         } // end if( ETranspChar[transa] == 'N' ) ...
01927       } // end if ( LSide ) ...
01928     } // end if (!BadArgument)
01929   } // end TRMM
01930 
01931   template<typename OrdinalType, typename ScalarType>
01932   template <typename alpha_type, typename A_type>
01933   void DefaultBLASImpl<OrdinalType, ScalarType>::TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type* A, const OrdinalType lda, ScalarType* B, const OrdinalType ldb) const
01934   {
01935     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
01936     OrdinalType ione = OrdinalTraits<OrdinalType>::one();
01937     alpha_type alpha_zero = ScalarTraits<alpha_type>::zero();
01938     A_type A_zero = ScalarTraits<A_type>::zero();
01939     ScalarType B_zero = ScalarTraits<ScalarType>::zero();
01940     alpha_type alpha_one = ScalarTraits<alpha_type>::one();
01941     ScalarType B_one = ScalarTraits<ScalarType>::one();
01942     ScalarType temp;
01943     OrdinalType NRowA = m;
01944     bool BadArgument = false;
01945     bool noUnit = (EDiagChar[diag]=='N');
01946     bool noConj = (ETranspChar[transa] == 'T');
01947 
01948     if (!(ESideChar[side] == 'L')) { NRowA = n; }
01949 
01950     // Quick return.
01951     if (n == izero || m == izero) { return; }
01952     if( m < izero ) {
01953       std::cout << "BLAS::TRSM Error: M == "<<m<<std::endl;
01954       BadArgument = true; }
01955     if( n < izero ) {
01956       std::cout << "BLAS::TRSM Error: N == "<<n<<std::endl;
01957       BadArgument = true; }
01958     if( lda < NRowA ) {
01959       std::cout << "BLAS::TRSM Error: LDA < "<<NRowA<<std::endl;
01960       BadArgument = true; }
01961     if( ldb < m ) {
01962       std::cout << "BLAS::TRSM Error: LDB < MAX(1,M)"<<std::endl;
01963       BadArgument = true; }
01964 
01965     if(!BadArgument)
01966       {
01967         int i, j, k;
01968         // Set the solution to the zero vector.
01969         if(alpha == alpha_zero) {
01970             for(j = izero; j < n; j++) {
01971                 for( i = izero; i < m; i++) {
01972                     B[j*ldb+i] = B_zero;
01973                 }
01974             }
01975         }
01976         else
01977         { // Start the operations.
01978             if(ESideChar[side] == 'L') {
01979                 //
01980                 // Perform computations for OP(A)*X = alpha*B
01981                 //
01982                 if(ETranspChar[transa] == 'N') {
01983                     //
01984                     //  Compute B = alpha*inv( A )*B
01985                     //
01986                     if(EUploChar[uplo] == 'U') {
01987                         // A is upper triangular.
01988                         for(j = izero; j < n; j++) {
01989                             // Perform alpha*B if alpha is not 1.
01990                           if(alpha != alpha_one) {
01991                                 for( i = izero; i < m; i++) {
01992                                     B[j*ldb+i] *= alpha;
01993                                 }
01994                             }
01995                             // Perform a backsolve for column j of B.
01996                             for(k = (m - ione); k > -ione; k--) {
01997                                 // If this entry is zero, we don't have to do anything.
01998                                 if (B[j*ldb + k] != B_zero) {
01999                                     if ( noUnit ) {
02000                                         B[j*ldb + k] /= A[k*lda + k];
02001                                     }
02002                                     for(i = izero; i < k; i++) {
02003                                         B[j*ldb + i] -= B[j*ldb + k] * A[k*lda + i];
02004                                     }
02005                                 }
02006                             }
02007                         }
02008                     }
02009                     else
02010                     { // A is lower triangular.
02011                         for(j = izero; j < n; j++) {
02012                             // Perform alpha*B if alpha is not 1.
02013                             if(alpha != alpha_one) {
02014                                 for( i = izero; i < m; i++) {
02015                                     B[j*ldb+i] *= alpha;
02016                                 }
02017                             }
02018                             // Perform a forward solve for column j of B.
02019                             for(k = izero; k < m; k++) {
02020                                 // If this entry is zero, we don't have to do anything.
02021                                 if (B[j*ldb + k] != B_zero) {
02022                                     if ( noUnit ) {
02023                                         B[j*ldb + k] /= A[k*lda + k];
02024                                     }
02025                                     for(i = k+ione; i < m; i++) {
02026                                         B[j*ldb + i] -= B[j*ldb + k] * A[k*lda + i];
02027                                     }
02028                                 }
02029                             }
02030                         }
02031                     } // end if (uplo == 'U')
02032                 }  // if (transa =='N')
02033                 else {
02034                     //
02035                     //  Compute B = alpha*inv( A' )*B
02036                     //  or      B = alpha*inv( conj(A') )*B
02037                     //
02038                     if(EUploChar[uplo] == 'U') {
02039                         // A is upper triangular.
02040                         for(j = izero; j < n; j++) {
02041                             for( i = izero; i < m; i++) {
02042                                 temp = alpha*B[j*ldb+i];
02043                                 if ( noConj ) {
02044                                   for(k = izero; k < i; k++) {
02045                                       temp -= A[i*lda + k] * B[j*ldb + k];
02046                                   }
02047                                   if ( noUnit ) {
02048                                       temp /= A[i*lda + i];
02049                                   }
02050                                 } else {
02051                                   for(k = izero; k < i; k++) {
02052                                       temp -= ScalarTraits<A_type>::conjugate(A[i*lda + k])
02053                                             * B[j*ldb + k];
02054                                   }
02055                                   if ( noUnit ) {
02056                                       temp /= ScalarTraits<A_type>::conjugate(A[i*lda + i]);
02057                                   }
02058                                 }
02059                                 B[j*ldb + i] = temp;
02060                             }
02061                         }
02062                     }
02063                     else
02064                     { // A is lower triangular.
02065                         for(j = izero; j < n; j++) {
02066                             for(i = (m - ione) ; i > -ione; i--) {
02067                                 temp = alpha*B[j*ldb+i];
02068                                 if ( noConj ) {
02069                                   for(k = i+ione; k < m; k++) {
02070                                     temp -= A[i*lda + k] * B[j*ldb + k];
02071                                   }
02072                                   if ( noUnit ) {
02073                                     temp /= A[i*lda + i];
02074                                   }
02075                                 } else {
02076                                   for(k = i+ione; k < m; k++) {
02077                                     temp -= ScalarTraits<A_type>::conjugate(A[i*lda + k])
02078                                           * B[j*ldb + k];
02079                                   }
02080                                   if ( noUnit ) {
02081                                     temp /= ScalarTraits<A_type>::conjugate(A[i*lda + i]);
02082                                   }
02083                                 }
02084                                 B[j*ldb + i] = temp;
02085                             }
02086                         }
02087                     }
02088                 }
02089             }  // if (side == 'L')
02090             else {
02091                // side == 'R'
02092                //
02093                // Perform computations for X*OP(A) = alpha*B
02094                //
02095               if (ETranspChar[transa] == 'N') {
02096                     //
02097                     //  Compute B = alpha*B*inv( A )
02098                     //
02099                     if(EUploChar[uplo] == 'U') {
02100                         // A is upper triangular.
02101                         // Perform a backsolve for column j of B.
02102                         for(j = izero; j < n; j++) {
02103                             // Perform alpha*B if alpha is not 1.
02104                             if(alpha != alpha_one) {
02105                                 for( i = izero; i < m; i++) {
02106                                     B[j*ldb+i] *= alpha;
02107                                 }
02108                             }
02109                             for(k = izero; k < j; k++) {
02110                                 // If this entry is zero, we don't have to do anything.
02111                                 if (A[j*lda + k] != A_zero) {
02112                                     for(i = izero; i < m; i++) {
02113                                         B[j*ldb + i] -= A[j*lda + k] * B[k*ldb + i];
02114                                     }
02115                                 }
02116                             }
02117                             if ( noUnit ) {
02118                                 temp = B_one/A[j*lda + j];
02119                                 for(i = izero; i < m; i++) {
02120                                     B[j*ldb + i] *= temp;
02121                                 }
02122                             }
02123                         }
02124                     }
02125                     else
02126                     { // A is lower triangular.
02127                         for(j = (n - ione); j > -ione; j--) {
02128                             // Perform alpha*B if alpha is not 1.
02129                             if(alpha != alpha_one) {
02130                                 for( i = izero; i < m; i++) {
02131                                     B[j*ldb+i] *= alpha;
02132                                 }
02133                             }
02134                             // Perform a forward solve for column j of B.
02135                             for(k = j+ione; k < n; k++) {
02136                                 // If this entry is zero, we don't have to do anything.
02137                                 if (A[j*lda + k] != A_zero) {
02138                                     for(i = izero; i < m; i++) {
02139                                         B[j*ldb + i] -= A[j*lda + k] * B[k*ldb + i];
02140                                     }
02141                                 }
02142                             }
02143                             if ( noUnit ) {
02144                                 temp = B_one/A[j*lda + j];
02145                                 for(i = izero; i < m; i++) {
02146                                     B[j*ldb + i] *= temp;
02147                                 }
02148                             }
02149                         }
02150                     } // end if (uplo == 'U')
02151                 }  // if (transa =='N')
02152                 else {
02153                     //
02154                     //  Compute B = alpha*B*inv( A' )
02155                     //  or      B = alpha*B*inv( conj(A') )
02156                     //
02157                     if(EUploChar[uplo] == 'U') {
02158                         // A is upper triangular.
02159                         for(k = (n - ione); k > -ione; k--) {
02160                             if ( noUnit ) {
02161                                 if ( noConj )
02162                                   temp = B_one/A[k*lda + k];
02163                                 else
02164                                   temp = B_one/ScalarTraits<A_type>::conjugate(A[k*lda + k]);
02165                                 for(i = izero; i < m; i++) {
02166                                     B[k*ldb + i] *= temp;
02167                                 }
02168                             }
02169                             for(j = izero; j < k; j++) {
02170                                 if (A[k*lda + j] != A_zero) {
02171                                     if ( noConj )
02172                                       temp = A[k*lda + j];
02173                                     else
02174                                       temp = ScalarTraits<A_type>::conjugate(A[k*lda + j]);
02175                                     for(i = izero; i < m; i++) {
02176                                         B[j*ldb + i] -= temp*B[k*ldb + i];
02177                                     }
02178                                 }
02179                             }
02180                             if (alpha != alpha_one) {
02181                                 for (i = izero; i < m; i++) {
02182                                     B[k*ldb + i] *= alpha;
02183                                 }
02184                             }
02185                         }
02186                     }
02187                     else
02188                     { // A is lower triangular.
02189                         for(k = izero; k < n; k++) {
02190                             if ( noUnit ) {
02191                                 if ( noConj )
02192                                   temp = B_one/A[k*lda + k];
02193                                 else
02194                                   temp = B_one/ScalarTraits<A_type>::conjugate(A[k*lda + k]);
02195                                 for (i = izero; i < m; i++) {
02196                                     B[k*ldb + i] *= temp;
02197                                 }
02198                             }
02199                             for(j = k+ione; j < n; j++) {
02200                                 if(A[k*lda + j] != A_zero) {
02201                                     if ( noConj )
02202                                       temp = A[k*lda + j];
02203                                     else
02204                                       temp = ScalarTraits<A_type>::conjugate(A[k*lda + j]);
02205                                     for(i = izero; i < m; i++) {
02206                                         B[j*ldb + i] -= temp*B[k*ldb + i];
02207                                     }
02208                                 }
02209                             }
02210                             if (alpha != alpha_one) {
02211                                 for (i = izero; i < m; i++) {
02212                                     B[k*ldb + i] *= alpha;
02213                                 }
02214                             }
02215                         }
02216                     }
02217                 }
02218             }
02219         }
02220     }
02221   }
02222 
02223   // Explicit instantiation for template<int,float>
02224 
02225   template <>
02226   class TEUCHOS_LIB_DLL_EXPORT BLAS<int, float>
02227   {
02228   public:
02229     inline BLAS(void) {}
02230     inline BLAS(const BLAS<int, float>& /*BLAS_source*/) {}
02231     inline virtual ~BLAS(void) {}
02232     void ROTG(float* da, float* db, float* c, float* s) const;
02233     void ROT(const int n, float* dx, const int incx, float* dy, const int incy, float* c, float* s) const;
02234     float ASUM(const int n, const float* x, const int incx) const;
02235     void AXPY(const int n, const float alpha, const float* x, const int incx, float* y, const int incy) const;
02236     void COPY(const int n, const float* x, const int incx, float* y, const int incy) const;
02237     float DOT(const int n, const float* x, const int incx, const float* y, const int incy) const;
02238     float NRM2(const int n, const float* x, const int incx) const;
02239     void SCAL(const int n, const float alpha, float* x, const int incx) const;
02240     int IAMAX(const int n, const float* x, const int incx) const;
02241     void GEMV(ETransp trans, const int m, const int n, const float alpha, const float* A, const int lda, const float* x, const int incx, const float beta, float* y, const int incy) const;
02242     void TRMV(EUplo uplo, ETransp trans, EDiag diag, const int n, const float* A, const int lda, float* x, const int incx) const;
02243     void GER(const int m, const int n, const float alpha, const float* x, const int incx, const float* y, const int incy, float* A, const int lda) const;
02244     void GEMM(ETransp transa, ETransp transb, const int m, const int n, const int k, const float alpha, const float* A, const int lda, const float* B, const int ldb, const float beta, float* C, const int ldc) const;
02245     void SYMM(ESide side, EUplo uplo, const int m, const int n, const float alpha, const float* A, const int lda, const float *B, const int ldb, const float beta, float *C, const int ldc) const;
02246     void SYRK(EUplo uplo, ETransp trans, const int n, const int k, const float alpha, const float* A, const int lda, const float beta, float* C, const int ldc) const;
02247     void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const float alpha, const float* A, const int lda, float* B, const int ldb) const;
02248     void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const float alpha, const float* A, const int lda, float* B, const int ldb) const;
02249   };
02250 
02251   // Explicit instantiation for template<int,double>
02252 
02253   template<>
02254   class TEUCHOS_LIB_DLL_EXPORT BLAS<int, double>
02255   {
02256   public:
02257     inline BLAS(void) {}
02258     inline BLAS(const BLAS<int, double>& /*BLAS_source*/) {}
02259     inline virtual ~BLAS(void) {}
02260     void ROTG(double* da, double* db, double* c, double* s) const;
02261     void ROT(const int n, double* dx, const int incx, double* dy, const int incy, double* c, double* s) const;
02262     double ASUM(const int n, const double* x, const int incx) const;
02263     void AXPY(const int n, const double alpha, const double* x, const int incx, double* y, const int incy) const;
02264     void COPY(const int n, const double* x, const int incx, double* y, const int incy) const;
02265     double DOT(const int n, const double* x, const int incx, const double* y, const int incy) const;
02266     double NRM2(const int n, const double* x, const int incx) const;
02267     void SCAL(const int n, const double alpha, double* x, const int incx) const;
02268     int IAMAX(const int n, const double* x, const int incx) const;
02269     void GEMV(ETransp trans, const int m, const int n, const double alpha, const double* A, const int lda, const double* x, const int incx, const double beta, double* y, const int incy) const;
02270     void TRMV(EUplo uplo, ETransp trans, EDiag diag, const int n, const double* A, const int lda, double* x, const int incx) const;
02271     void GER(const int m, const int n, const double alpha, const double* x, const int incx, const double* y, const int incy, double* A, const int lda) const;
02272     void GEMM(ETransp transa, ETransp transb, const int m, const int n, const int k, const double alpha, const double* A, const int lda, const double* B, const int ldb, const double beta, double* C, const int ldc) const;
02273     void SYMM(ESide side, EUplo uplo, const int m, const int n, const double alpha, const double* A, const int lda, const double *B, const int ldb, const double beta, double *C, const int ldc) const;
02274     void SYRK(EUplo uplo, ETransp trans, const int n, const int k, const double alpha, const double* A, const int lda, const double beta, double* C, const int ldc) const;
02275     void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const double alpha, const double* A, const int lda, double* B, const int ldb) const;
02276     void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const double alpha, const double* A, const int lda, double* B, const int ldb) const;
02277   };
02278 
02279   // Explicit instantiation for template<int,complex<float> >
02280 
02281   template<>
02282   class TEUCHOS_LIB_DLL_EXPORT BLAS<int, std::complex<float> >
02283   {
02284   public:
02285     inline BLAS(void) {}
02286     inline BLAS(const BLAS<int, std::complex<float> >& /*BLAS_source*/) {}
02287     inline virtual ~BLAS(void) {}
02288     void ROTG(std::complex<float>* da, std::complex<float>* db, float* c, std::complex<float>* s) const;
02289     void ROT(const int n, std::complex<float>* dx, const int incx, std::complex<float>* dy, const int incy, float* c, std::complex<float>* s) const;
02290     float ASUM(const int n, const std::complex<float>* x, const int incx) const;
02291     void AXPY(const int n, const std::complex<float> alpha, const std::complex<float>* x, const int incx, std::complex<float>* y, const int incy) const;
02292     void COPY(const int n, const std::complex<float>* x, const int incx, std::complex<float>* y, const int incy) const;
02293     std::complex<float> DOT(const int n, const std::complex<float>* x, const int incx, const std::complex<float>* y, const int incy) const;
02294     float NRM2(const int n, const std::complex<float>* x, const int incx) const;
02295     void SCAL(const int n, const std::complex<float> alpha, std::complex<float>* x, const int incx) const;
02296     int IAMAX(const int n, const std::complex<float>* x, const int incx) const;
02297     void GEMV(ETransp trans, const int m, const int n, const std::complex<float> alpha, const std::complex<float>* A, const int lda, const std::complex<float>* x, const int incx, const std::complex<float> beta, std::complex<float>* y, const int incy) const;
02298     void TRMV(EUplo uplo, ETransp trans, EDiag diag, const int n, const std::complex<float>* A, const int lda, std::complex<float>* x, const int incx) const;
02299     void GER(const int m, const int n, const std::complex<float> alpha, const std::complex<float>* x, const int incx, const std::complex<float>* y, const int incy, std::complex<float>* A, const int lda) const;
02300     void GEMM(ETransp transa, ETransp transb, const int m, const int n, const int k, const std::complex<float> alpha, const std::complex<float>* A, const int lda, const std::complex<float>* B, const int ldb, const std::complex<float> beta, std::complex<float>* C, const int ldc) const;
02301     void SYMM(ESide side, EUplo uplo, const int m, const int n, const std::complex<float> alpha, const std::complex<float>* A, const int lda, const std::complex<float> *B, const int ldb, const std::complex<float> beta, std::complex<float> *C, const int ldc) const;
02302     void SYRK(EUplo uplo, ETransp trans, const int n, const int k, const std::complex<float> alpha, const std::complex<float>* A, const int lda, const std::complex<float> beta, std::complex<float>* C, const int ldc) const;
02303     void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const std::complex<float> alpha, const std::complex<float>* A, const int lda, std::complex<float>* B, const int ldb) const;
02304     void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const std::complex<float> alpha, const std::complex<float>* A, const int lda, std::complex<float>* B, const int ldb) const;
02305   };
02306 
02307   // Explicit instantiation for template<int,complex<double> >
02308 
02309   template<>
02310   class TEUCHOS_LIB_DLL_EXPORT BLAS<int, std::complex<double> >
02311   {
02312   public:
02313     inline BLAS(void) {}
02314     inline BLAS(const BLAS<int, std::complex<double> >& /*BLAS_source*/) {}
02315     inline virtual ~BLAS(void) {}
02316     void ROTG(std::complex<double>* da, std::complex<double>* db, double* c, std::complex<double>* s) const;
02317     void ROT(const int n, std::complex<double>* dx, const int incx, std::complex<double>* dy, const int incy, double* c, std::complex<double>* s) const;
02318     double ASUM(const int n, const std::complex<double>* x, const int incx) const;
02319     void AXPY(const int n, const std::complex<double> alpha, const std::complex<double>* x, const int incx, std::complex<double>* y, const int incy) const;
02320     void COPY(const int n, const std::complex<double>* x, const int incx, std::complex<double>* y, const int incy) const;
02321     std::complex<double> DOT(const int n, const std::complex<double>* x, const int incx, const std::complex<double>* y, const int incy) const;
02322     double NRM2(const int n, const std::complex<double>* x, const int incx) const;
02323     void SCAL(const int n, const std::complex<double> alpha, std::complex<double>* x, const int incx) const;
02324     int IAMAX(const int n, const std::complex<double>* x, const int incx) const;
02325     void GEMV(ETransp trans, const int m, const int n, const std::complex<double> alpha, const std::complex<double>* A, const int lda, const std::complex<double>* x, const int incx, const std::complex<double> beta, std::complex<double>* y, const int incy) const;
02326     void TRMV(EUplo uplo, ETransp trans, EDiag diag, const int n, const std::complex<double>* A, const int lda, std::complex<double>* x, const int incx) const;
02327     void GER(const int m, const int n, const std::complex<double> alpha, const std::complex<double>* x, const int incx, const std::complex<double>* y, const int incy, std::complex<double>* A, const int lda) const;
02328     void GEMM(ETransp transa, ETransp transb, const int m, const int n, const int k, const std::complex<double> alpha, const std::complex<double>* A, const int lda, const std::complex<double>* B, const int ldb, const std::complex<double> beta, std::complex<double>* C, const int ldc) const;
02329     void SYMM(ESide side, EUplo uplo, const int m, const int n, const std::complex<double> alpha, const std::complex<double>* A, const int lda, const std::complex<double> *B, const int ldb, const std::complex<double> beta, std::complex<double> *C, const int ldc) const;
02330     void SYRK(EUplo uplo, ETransp trans, const int n, const int k, const std::complex<double> alpha, const std::complex<double>* A, const int lda, const std::complex<double> beta, std::complex<double>* C, const int ldc) const;
02331     void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const std::complex<double> alpha, const std::complex<double>* A, const int lda, std::complex<double>* B, const int ldb) const;
02332     void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const std::complex<double> alpha, const std::complex<double>* A, const int lda, std::complex<double>* B, const int ldb) const;
02333   };
02334 
02335 } // namespace Teuchos
02336 
02337 #endif // _TEUCHOS_BLAS_HPP_
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines