Teuchos Package Browser (Single Doxygen Collection) Version of the Day
Teuchos_BLAS.hpp
Go to the documentation of this file.
00001 // @HEADER
00002 // ***********************************************************************
00003 //
00004 //                    Teuchos: Common Tools Package
00005 //                 Copyright (2004) Sandia Corporation
00006 //
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 //
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00038 //
00039 // ***********************************************************************
00040 // @HEADER
00041 
00042 // Kris
00043 // 06.16.03 -- Start over from scratch
00044 // 06.16.03 -- Initial templatization (Tpetra_BLAS.cpp is no longer needed)
00045 // 06.18.03 -- Changed xxxxx_() function calls to XXXXX_F77()
00046 //          -- Added warning messages for generic calls
00047 // 07.08.03 -- Move into Teuchos package/namespace
00048 // 07.24.03 -- The first iteration of BLAS generics is nearing completion. Caveats:
00049 //             * TRSM isn't finished yet; it works for one case at the moment (left side, upper tri., no transpose, no unit diag.)
00050 //             * Many of the generic implementations are quite inefficient, ugly, or both. I wrote these to be easy to debug, not for efficiency or legibility. The next iteration will improve both of these aspects as much as possible.
00051 //             * Very little verification of input parameters is done, save for the character-type arguments (TRANS, etc.) which is quite robust.
00052 //             * All of the routines that make use of both an incx and incy parameter (which includes much of the L1 BLAS) are set up to work iff incx == incy && incx > 0. Allowing for differing/negative values of incx/incy should be relatively trivial.
00053 //             * All of the L2/L3 routines assume that the entire matrix is being used (that is, if A is mxn, lda = m); they don't work on submatrices yet. This *should* be a reasonably trivial thing to fix, as well.
00054 //          -- Removed warning messages for generic calls
00055 // 08.08.03 -- TRSM now works for all cases where SIDE == L and DIAG == N. DIAG == U is implemented but does not work correctly; SIDE == R is not yet implemented.
00056 // 08.14.03 -- TRSM now works for all cases and accepts (and uses) leading-dimension information.
00057 // 09.26.03 -- character input replaced with enumerated input to cause compiling errors and not run-time errors ( suggested by RAB ).
00058 
00059 #ifndef _TEUCHOS_BLAS_HPP_
00060 #define _TEUCHOS_BLAS_HPP_
00061 
00069 #include "Teuchos_ConfigDefs.hpp"
00070 #include "Teuchos_ScalarTraits.hpp"
00071 #include "Teuchos_OrdinalTraits.hpp"
00072 #include "Teuchos_BLAS_types.hpp"
00073 #include "Teuchos_Assert.hpp"
00074 
00107 namespace Teuchos
00108 {
00109   extern TEUCHOSNUMERICS_LIB_DLL_EXPORT const char ESideChar[];
00110   extern TEUCHOSNUMERICS_LIB_DLL_EXPORT const char ETranspChar[];
00111   extern TEUCHOSNUMERICS_LIB_DLL_EXPORT const char EUploChar[];
00112   extern TEUCHOSNUMERICS_LIB_DLL_EXPORT const char EDiagChar[];
00113   extern TEUCHOSNUMERICS_LIB_DLL_EXPORT const char ETypeChar[];
00114 
00116 
00121   template<typename OrdinalType, typename ScalarType>
00122   class DefaultBLASImpl
00123   {
00124 
00125     typedef typename Teuchos::ScalarTraits<ScalarType>::magnitudeType MagnitudeType;
00126 
00127   public:
00129 
00130 
00132     inline DefaultBLASImpl(void) {}
00133 
00135     inline DefaultBLASImpl(const DefaultBLASImpl<OrdinalType, ScalarType>& /*BLAS_source*/) {}
00136 
00138     inline virtual ~DefaultBLASImpl(void) {}
00140 
00142 
00143 
00145     void ROTG(ScalarType* da, ScalarType* db, MagnitudeType* c, ScalarType* s) const;
00146 
00148     void ROT(const OrdinalType n, ScalarType* dx, const OrdinalType incx, ScalarType* dy, const OrdinalType incy, MagnitudeType* c, ScalarType* s) const;
00149 
00151     void SCAL(const OrdinalType n, const ScalarType alpha, ScalarType* x, const OrdinalType incx) const;
00152 
00154     void COPY(const OrdinalType n, const ScalarType* x, const OrdinalType incx, ScalarType* y, const OrdinalType incy) const;
00155 
00157     template <typename alpha_type, typename x_type>
00158     void AXPY(const OrdinalType n, const alpha_type alpha, const x_type* x, const OrdinalType incx, ScalarType* y, const OrdinalType incy) const;
00159 
00161     typename ScalarTraits<ScalarType>::magnitudeType ASUM(const OrdinalType n, const ScalarType* x, const OrdinalType incx) const;
00162 
00164     template <typename x_type, typename y_type>
00165     ScalarType DOT(const OrdinalType n, const x_type* x, const OrdinalType incx, const y_type* y, const OrdinalType incy) const;
00166 
00168     typename ScalarTraits<ScalarType>::magnitudeType NRM2(const OrdinalType n, const ScalarType* x, const OrdinalType incx) const;
00169 
00171     OrdinalType IAMAX(const OrdinalType n, const ScalarType* x, const OrdinalType incx) const;
00173 
00175 
00176 
00178     template <typename alpha_type, typename A_type, typename x_type, typename beta_type>
00179     void GEMV(ETransp trans, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type* A,
00180               const OrdinalType lda, const x_type* x, const OrdinalType incx, const beta_type beta, ScalarType* y, const OrdinalType incy) const;
00181 
00183     template <typename A_type>
00184     void TRMV(EUplo uplo, ETransp trans, EDiag diag, const OrdinalType n, const A_type* A,
00185               const OrdinalType lda, ScalarType* x, const OrdinalType incx) const;
00186 
00189     template <typename alpha_type, typename x_type, typename y_type>
00190     void GER(const OrdinalType m, const OrdinalType n, const alpha_type alpha, const x_type* x, const OrdinalType incx,
00191              const y_type* y, const OrdinalType incy, ScalarType* A, const OrdinalType lda) const;
00193 
00195 
00196 
00203     template <typename alpha_type, typename A_type, typename B_type, typename beta_type>
00204     void GEMM(ETransp transa, ETransp transb, const OrdinalType m, const OrdinalType n, const OrdinalType k, const alpha_type alpha, const A_type* A, const OrdinalType lda, const B_type* B, const OrdinalType ldb, const beta_type beta, ScalarType* C, const OrdinalType ldc) const;
00205 
00207     template <typename alpha_type, typename A_type, typename B_type, typename beta_type>
00208     void SYMM(ESide side, EUplo uplo, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type* A, const OrdinalType lda, const B_type* B, const OrdinalType ldb, const beta_type beta, ScalarType* C, const OrdinalType ldc) const;
00209 
00211     template <typename alpha_type, typename A_type, typename beta_type>
00212     void SYRK(EUplo uplo, ETransp trans, const OrdinalType n, const OrdinalType k, const alpha_type alpha, const A_type* A, const OrdinalType lda, const beta_type beta, ScalarType* C, const OrdinalType ldc) const;
00213 
00215     template <typename alpha_type, typename A_type>
00216     void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n,
00217                 const alpha_type alpha, const A_type* A, const OrdinalType lda, ScalarType* B, const OrdinalType ldb) const;
00218 
00220     template <typename alpha_type, typename A_type>
00221     void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n,
00222                 const alpha_type alpha, const A_type* A, const OrdinalType lda, ScalarType* B, const OrdinalType ldb) const;
00224   };
00225 
00226   template<typename OrdinalType, typename ScalarType>
00227   class TEUCHOSNUMERICS_LIB_DLL_EXPORT BLAS : public DefaultBLASImpl<OrdinalType,ScalarType>
00228   {
00229 
00230     typedef typename Teuchos::ScalarTraits<ScalarType>::magnitudeType MagnitudeType;
00231 
00232   public:
00234 
00235 
00237     inline BLAS(void) {}
00238 
00240     inline BLAS(const BLAS<OrdinalType, ScalarType>& /*BLAS_source*/) {}
00241 
00243     inline virtual ~BLAS(void) {}
00245   };
00246 
00247 //------------------------------------------------------------------------------------------
00248 //      LEVEL 1 BLAS ROUTINES
00249 //------------------------------------------------------------------------------------------
00250 
00258   namespace details {
00259 
00260     // Compute magnitude.
00261     template<typename ScalarType, bool isComplex>
00262     class MagValue {
00263     public:
00264       void
00265       blas_dabs1(const ScalarType* a, typename ScalarTraits<ScalarType>::magnitudeType* ret) const; 
00266     };
00267 
00268     // Complex-arithmetic specialization. 
00269     template<typename ScalarType>
00270     class MagValue<ScalarType, true> {
00271     public:
00272       void
00273       blas_dabs1(const ScalarType* a, typename ScalarTraits<ScalarType>::magnitudeType* ret) const; 
00274     };
00275 
00276     // Real-arithmetic specialization.
00277     template<typename ScalarType>
00278     class MagValue<ScalarType, false> {
00279     public:
00280       void
00281       blas_dabs1(const ScalarType* a, ScalarType* ret) const;
00282     };
00283  
00284     template<typename ScalarType, bool isComplex>
00285     class GivensRotator {
00286     public:
00287       void
00288       ROTG (ScalarType* a,
00289             ScalarType* b,
00290             typename ScalarTraits<ScalarType>::magnitudeType* c,
00291             ScalarType* s) const;
00292     };
00293 
00294     // Complex-arithmetic specialization.
00295     template<typename ScalarType>
00296     class GivensRotator<ScalarType, true> {
00297     public:
00298       void
00299       ROTG (ScalarType* ca,
00300             ScalarType* cb,
00301             typename ScalarTraits<ScalarType>::magnitudeType* c,
00302             ScalarType* s) const;
00303     };
00304 
00305     // Real-arithmetic specialization.
00306     template<typename ScalarType>
00307     class GivensRotator<ScalarType, false> {
00308     public:
00309       void
00310       ROTG (ScalarType* da,
00311             ScalarType* db,
00312             ScalarType* c,
00313             ScalarType* s) const;
00314     private:
00327       ScalarType SIGN (ScalarType x, ScalarType y) const {
00328         typedef ScalarTraits<ScalarType> STS;
00329 
00330         if (y > STS::zero()) {
00331           return STS::magnitude (x);
00332         } else if (y < STS::zero()) {
00333           return -STS::magnitude (x);
00334         } else { // y == STS::zero()
00335           // Suppose that ScalarType implements signed zero, as IEEE
00336           // 754 - compliant floating-point numbers should.  You can't
00337           // use == to test for signed zero, since +0 == -0.  However,
00338           // 1/0 = Inf > 0 and 1/-0 = -Inf < 0.  Let's hope ScalarType
00339           // supports Inf... we don't need to test for Inf, just see
00340           // if it's greater than or less than zero.
00341           //
00342           // NOTE: This ONLY works if ScalarType is real.  Complex
00343           // infinity doesn't have a sign, so we can't compare it with
00344           // zero.  That's OK, because finite complex numbers don't
00345           // have a sign either; they have an angle.
00346           ScalarType signedInfinity = STS::one() / y;
00347           if (signedInfinity > STS::zero()) {
00348             return STS::magnitude (x);
00349           } else {
00350             // Even if ScalarType doesn't implement signed zero,
00351             // Fortran's SIGN intrinsic returns -ABS(X) if the second
00352             // argument Y is zero.  We imitate this behavior here.
00353             return -STS::magnitude (x);
00354           }
00355         }
00356       }
00357     };
00358 
00359     // Implementation of complex-arithmetic specialization.
00360     template<typename ScalarType>
00361     void
00362     GivensRotator<ScalarType, true>::
00363     ROTG (ScalarType* ca,
00364           ScalarType* cb,
00365           typename ScalarTraits<ScalarType>::magnitudeType* c,
00366           ScalarType* s) const
00367     {
00368       typedef ScalarTraits<ScalarType> STS;
00369       typedef typename STS::magnitudeType MagnitudeType;
00370       typedef ScalarTraits<MagnitudeType> STM;
00371 
00372       // This is a straightforward translation into C++ of the
00373       // reference BLAS' implementation of ZROTG.  You can get
00374       // the Fortran 77 source code of ZROTG here:
00375       //
00376       // http://www.netlib.org/blas/zrotg.f
00377       //
00378       // I used the following rules to translate Fortran types and
00379       // intrinsic functions into C++:
00380       //
00381       // DOUBLE PRECISION -> MagnitudeType
00382       // DOUBLE COMPLEX -> ScalarType
00383       // CDABS -> STS::magnitude
00384       // DCMPLX -> ScalarType constructor (assuming that ScalarType
00385       //   is std::complex<MagnitudeType>)
00386       // DCONJG -> STS::conjugate
00387       // DSQRT -> STM::squareroot
00388       ScalarType alpha;
00389       MagnitudeType norm, scale;
00390 
00391       if (STS::magnitude (*ca) == STM::zero()) {
00392         *c = STM::zero();
00393         *s = STS::one();
00394         *ca = *cb;
00395       } else {
00396         scale = STS::magnitude (*ca) + STS::magnitude (*cb);
00397         { // I introduced temporaries into the translated BLAS code in
00398           // order to make the expression easier to read and also save a
00399           // few floating-point operations.
00400           const MagnitudeType ca_scaled =
00401             STS::magnitude (*ca / ScalarType(scale, STM::zero()));
00402           const MagnitudeType cb_scaled =
00403             STS::magnitude (*cb / ScalarType(scale, STM::zero()));
00404           norm = scale *
00405             STM::squareroot (ca_scaled*ca_scaled + cb_scaled*cb_scaled);
00406         }
00407         alpha = *ca / STS::magnitude (*ca);
00408         *c = STS::magnitude (*ca) / norm;
00409         *s = alpha * STS::conjugate (*cb) / norm;
00410         *ca = alpha * norm;
00411       }
00412     }
00413 
00414     // Implementation of real-arithmetic specialization.
00415     template<typename ScalarType>
00416     void
00417     GivensRotator<ScalarType, false>::
00418     ROTG (ScalarType* da,
00419           ScalarType* db,
00420           ScalarType* c,
00421           ScalarType* s) const
00422     {
00423       typedef ScalarTraits<ScalarType> STS;
00424 
00425       // This is a straightforward translation into C++ of the
00426       // reference BLAS' implementation of DROTG.  You can get
00427       // the Fortran 77 source code of DROTG here:
00428       //
00429       // http://www.netlib.org/blas/drotg.f
00430       //
00431       // I used the following rules to translate Fortran types and
00432       // intrinsic functions into C++:
00433       //
00434       // DOUBLE PRECISION -> ScalarType
00435       // DABS -> STS::magnitude
00436       // DSQRT -> STM::squareroot
00437       // DSIGN -> SIGN (see below)
00438       //
00439       // DSIGN(x,y) (the old DOUBLE PRECISION type-specific form of
00440       // the Fortran type-generic SIGN intrinsic) required special
00441       // translation, which we did in a separate utility function in
00442       // the specializaton of GivensRotator for real arithmetic.
00443       // (ROTG for complex arithmetic doesn't require this function.)
00444       // C99 provides a copysign() math library function, but we are
00445       // not able to rely on the existence of C99 functions here.
00446       ScalarType r, roe, scale, z;
00447 
00448       roe = *db;
00449       if (STS::magnitude (*da) > STS::magnitude (*db)) {
00450         roe = *da;
00451       }
00452       scale = STS::magnitude (*da) + STS::magnitude (*db);
00453       if (scale == STS::zero()) {
00454         *c = STS::one();
00455         *s = STS::zero();
00456         r = STS::zero();
00457         z = STS::zero();
00458       } else {
00459         // I introduced temporaries into the translated BLAS code in
00460         // order to make the expression easier to read and also save
00461         // a few floating-point operations.
00462         const ScalarType da_scaled = *da / scale;
00463         const ScalarType db_scaled = *db / scale;
00464         r = scale * STS::squareroot (da_scaled*da_scaled + db_scaled*db_scaled);
00465         r = SIGN (STS::one(), roe) * r;
00466         *c = *da / r;
00467         *s = *db / r;
00468         z = STS::one();
00469         if (STS::magnitude (*da) > STS::magnitude (*db)) {
00470           z = *s;
00471         }
00472         if (STS::magnitude (*db) >= STS::magnitude (*da) && *c != STS::zero()) {
00473           z = STS::one() / *c;
00474         }
00475       }
00476 
00477       *da = r;
00478       *db = z;
00479     }
00480 
00481     // Real-valued implementation of MagValue
00482     template<typename ScalarType>
00483     void 
00484     MagValue<ScalarType, false>::
00485     blas_dabs1(const ScalarType* a, ScalarType* ret) const 
00486     {
00487       *ret = Teuchos::ScalarTraits<ScalarType>::magnitude( *a );  
00488     }
00489 
00490     // Complex-valued implementation of MagValue
00491     template<typename ScalarType>
00492     void 
00493     MagValue<ScalarType, true>::
00494     blas_dabs1(const ScalarType* a, typename ScalarTraits<ScalarType>::magnitudeType* ret) const 
00495     {
00496       *ret = ScalarTraits<typename ScalarTraits<ScalarType>::magnitudeType>::magnitude(a->real());
00497       *ret += ScalarTraits<typename ScalarTraits<ScalarType>::magnitudeType>::magnitude(a->imag());
00498     }
00499 
00500   } // namespace details
00501 
00502   template<typename OrdinalType, typename ScalarType>
00503   void
00504   DefaultBLASImpl<OrdinalType, ScalarType>::
00505   ROTG (ScalarType* da,
00506         ScalarType* db,
00507         MagnitudeType* c,
00508         ScalarType* s) const
00509   {
00510     typedef ScalarTraits<ScalarType> STS;
00511     details::GivensRotator<ScalarType, STS::isComplex> rotator;
00512     rotator.ROTG (da, db, c, s);
00513   }
00514 
00515   template<typename OrdinalType, typename ScalarType>
00516   void DefaultBLASImpl<OrdinalType,ScalarType>::ROT(const OrdinalType n, ScalarType* dx, const OrdinalType incx, ScalarType* dy, const OrdinalType incy, MagnitudeType* c, ScalarType* s) const
00517   {
00518     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00519     ScalarType sconj = Teuchos::ScalarTraits<ScalarType>::conjugate(*s);
00520     if (n <= 0) return;
00521     if (incx==1 && incy==1) {
00522       for(OrdinalType i=0; i<n; ++i) {
00523         ScalarType temp = *c*dx[i] + sconj*dy[i];
00524         dy[i] = *c*dy[i] - sconj*dx[i];
00525         dx[i] = temp;
00526       }
00527     }
00528     else {
00529       OrdinalType ix = 0, iy = 0;
00530       if (incx < izero) ix = (-n+1)*incx;
00531       if (incy < izero) iy = (-n+1)*incy;
00532       for(OrdinalType i=0; i<n; ++i) {
00533         ScalarType temp = *c*dx[ix] + sconj*dy[iy];
00534         dy[iy] = *c*dy[iy] - sconj*dx[ix];
00535         dx[ix] = temp;
00536         ix += incx;
00537         iy += incy;
00538       }
00539     }
00540   }
00541 
00542   template<typename OrdinalType, typename ScalarType>
00543   void DefaultBLASImpl<OrdinalType, ScalarType>::SCAL(const OrdinalType n, const ScalarType alpha, ScalarType* x, const OrdinalType incx) const
00544   {
00545     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00546     OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00547     OrdinalType i, ix = izero;
00548 
00549     if ( n < ione || incx < ione )
00550       return;
00551 
00552     // Scale the vector.
00553     for(i = izero; i < n; i++)
00554     {
00555       x[ix] *= alpha;
00556       ix += incx;
00557     }
00558   } /* end SCAL */
00559 
00560   template<typename OrdinalType, typename ScalarType>
00561   void DefaultBLASImpl<OrdinalType, ScalarType>::COPY(const OrdinalType n, const ScalarType* x, const OrdinalType incx, ScalarType* y, const OrdinalType incy) const
00562   {
00563     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00564     OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00565     OrdinalType i, ix = izero, iy = izero;
00566     if ( n > izero ) {
00567         // Set the initial indices (ix, iy).
00568         if (incx < izero) { ix = (-n+ione)*incx; }
00569         if (incy < izero) { iy = (-n+ione)*incy; }
00570 
00571         for(i = izero; i < n; i++)
00572           {
00573             y[iy] = x[ix];
00574             ix += incx;
00575             iy += incy;
00576           }
00577       }
00578   } /* end COPY */
00579 
00580   template<typename OrdinalType, typename ScalarType>
00581   template <typename alpha_type, typename x_type>
00582   void DefaultBLASImpl<OrdinalType, ScalarType>::AXPY(const OrdinalType n, const alpha_type alpha, const x_type* x, const OrdinalType incx, ScalarType* y, const OrdinalType incy) const
00583   {
00584     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00585     OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00586     OrdinalType i, ix = izero, iy = izero;
00587     if( n > izero && alpha != ScalarTraits<alpha_type>::zero())
00588       {
00589         // Set the initial indices (ix, iy).
00590         if (incx < izero) { ix = (-n+ione)*incx; }
00591         if (incy < izero) { iy = (-n+ione)*incy; }
00592 
00593         for(i = izero; i < n; i++)
00594           {
00595             y[iy] += alpha * x[ix];
00596             ix += incx;
00597             iy += incy;
00598           }
00599       }
00600   } /* end AXPY */
00601 
00602   template<typename OrdinalType, typename ScalarType>
00603   typename ScalarTraits<ScalarType>::magnitudeType DefaultBLASImpl<OrdinalType, ScalarType>::ASUM(const OrdinalType n, const ScalarType* x, const OrdinalType incx) const
00604   {
00605     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00606     OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00607     typename ScalarTraits<ScalarType>::magnitudeType temp, result =
00608       ScalarTraits<typename ScalarTraits<ScalarType>::magnitudeType>::zero();
00609     OrdinalType i, ix = izero;
00610 
00611     if ( n < ione || incx < ione )
00612       return result;
00613 
00614     details::MagValue<ScalarType, ScalarTraits<ScalarType>::isComplex> mval;
00615     for (i = izero; i < n; i++)
00616       {
00617         mval.blas_dabs1( &x[ix], &temp );
00618         result += temp;
00619         ix += incx;
00620       }
00621    
00622     return result;
00623   } /* end ASUM */
00624 
00625   template<typename OrdinalType, typename ScalarType>
00626   template <typename x_type, typename y_type>
00627   ScalarType DefaultBLASImpl<OrdinalType, ScalarType>::DOT(const OrdinalType n, const x_type* x, const OrdinalType incx, const y_type* y, const OrdinalType incy) const
00628   {
00629     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00630     OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00631     ScalarType result = ScalarTraits<ScalarType>::zero();
00632     OrdinalType i, ix = izero, iy = izero;
00633     if( n > izero )
00634       {
00635         // Set the initial indices (ix,iy).
00636         if (incx < izero) { ix = (-n+ione)*incx; }
00637         if (incy < izero) { iy = (-n+ione)*incy; }
00638 
00639         for(i = izero; i < n; i++)
00640           {
00641             result += ScalarTraits<x_type>::conjugate(x[ix]) * y[iy];
00642             ix += incx;
00643             iy += incy;
00644           }
00645       }
00646     return result;
00647   } /* end DOT */
00648 
00649   template<typename OrdinalType, typename ScalarType>
00650   typename ScalarTraits<ScalarType>::magnitudeType DefaultBLASImpl<OrdinalType, ScalarType>::NRM2(const OrdinalType n, const ScalarType* x, const OrdinalType incx) const
00651   {
00652     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00653     OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00654     typename ScalarTraits<ScalarType>::magnitudeType result =
00655       ScalarTraits<typename ScalarTraits<ScalarType>::magnitudeType>::zero();
00656     OrdinalType i, ix = izero;
00657 
00658     if ( n < ione || incx < ione )
00659       return result;
00660 
00661     for(i = izero; i < n; i++)
00662       {
00663         result += ScalarTraits<ScalarType>::magnitude(ScalarTraits<ScalarType>::conjugate(x[ix]) * x[ix]);
00664         ix += incx;
00665       }
00666     result = ScalarTraits<typename ScalarTraits<ScalarType>::magnitudeType>::squareroot(result);
00667     return result;
00668   } /* end NRM2 */
00669 
00670   template<typename OrdinalType, typename ScalarType>
00671   OrdinalType DefaultBLASImpl<OrdinalType, ScalarType>::IAMAX(const OrdinalType n, const ScalarType* x, const OrdinalType incx) const
00672   {
00673     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00674     OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00675     OrdinalType result = izero, ix = izero, i;
00676     typename ScalarTraits<ScalarType>::magnitudeType curval =
00677       ScalarTraits<typename ScalarTraits<ScalarType>::magnitudeType>::zero();
00678     typename ScalarTraits<ScalarType>::magnitudeType maxval =
00679       ScalarTraits<typename ScalarTraits<ScalarType>::magnitudeType>::zero();
00680 
00681     if ( n < ione || incx < ione )
00682       return result;
00683   
00684     details::MagValue<ScalarType, ScalarTraits<ScalarType>::isComplex> mval;
00685 
00686     mval.blas_dabs1( &x[ix], &maxval );
00687     ix += incx;
00688     for(i = ione; i < n; i++)
00689       {
00690         mval.blas_dabs1( &x[ix], &curval );
00691         if(curval > maxval)
00692           {
00693             result = i;
00694             maxval = curval;
00695           }
00696         ix += incx;
00697       }
00698 
00699     return result + 1; // the BLAS I?AMAX functions return 1-indexed (Fortran-style) values
00700   } /* end IAMAX */
00701 
00702 //------------------------------------------------------------------------------------------
00703 //      LEVEL 2 BLAS ROUTINES
00704 //------------------------------------------------------------------------------------------
00705   template<typename OrdinalType, typename ScalarType>
00706   template <typename alpha_type, typename A_type, typename x_type, typename beta_type>
00707   void DefaultBLASImpl<OrdinalType, ScalarType>::GEMV(ETransp trans, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type* A, const OrdinalType lda, const x_type* x, const OrdinalType incx, const beta_type beta, ScalarType* y, const OrdinalType incy) const
00708   {
00709     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00710     OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00711     alpha_type alpha_zero = ScalarTraits<alpha_type>::zero();
00712     beta_type beta_zero = ScalarTraits<beta_type>::zero();
00713     x_type x_zero = ScalarTraits<x_type>::zero();
00714     ScalarType y_zero = ScalarTraits<ScalarType>::zero();
00715     beta_type beta_one = ScalarTraits<beta_type>::one();
00716     bool noConj = true;
00717     bool BadArgument = false;
00718 
00719     // Quick return if there is nothing to do!
00720     if( m == izero || n == izero || ( alpha == alpha_zero && beta == beta_one ) ){ return; }
00721 
00722     // Otherwise, we need to check the argument list.
00723     if( m < izero ) {
00724         std::cout << "BLAS::GEMV Error: M == " << m << std::endl;
00725         BadArgument = true;
00726     }
00727     if( n < izero ) {
00728         std::cout << "BLAS::GEMV Error: N == " << n << std::endl;
00729         BadArgument = true;
00730     }
00731     if( lda < m ) {
00732         std::cout << "BLAS::GEMV Error: LDA < MAX(1,M)"<< std::endl;
00733         BadArgument = true;
00734     }
00735     if( incx == izero ) {
00736         std::cout << "BLAS::GEMV Error: INCX == 0"<< std::endl;
00737         BadArgument = true;
00738     }
00739     if( incy == izero ) {
00740         std::cout << "BLAS::GEMV Error: INCY == 0"<< std::endl;
00741         BadArgument = true;
00742     }
00743 
00744     if(!BadArgument) {
00745       OrdinalType i, j, lenx, leny, ix, iy, jx, jy;
00746       OrdinalType kx = izero, ky = izero;
00747       ScalarType temp;
00748 
00749       // Determine the lengths of the vectors x and y.
00750       if(ETranspChar[trans] == 'N') {
00751         lenx = n;
00752         leny = m;
00753       } else {
00754         lenx = m;
00755         leny = n;
00756       }
00757 
00758       // Determine if this is a conjugate tranpose
00759       noConj = (ETranspChar[trans] == 'T');
00760 
00761       // Set the starting pointers for the vectors x and y if incx/y < 0.
00762       if (incx < izero ) { kx =  (ione - lenx)*incx; }
00763       if (incy < izero ) { ky =  (ione - leny)*incy; }
00764 
00765       // Form y = beta*y
00766       ix = kx; iy = ky;
00767       if(beta != beta_one) {
00768         if (incy == ione) {
00769           if (beta == beta_zero) {
00770             for(i = izero; i < leny; i++) { y[i] = y_zero; }
00771           } else {
00772             for(i = izero; i < leny; i++) { y[i] *= beta; }
00773           }
00774         } else {
00775           if (beta == beta_zero) {
00776             for(i = izero; i < leny; i++) {
00777               y[iy] = y_zero;
00778               iy += incy;
00779             }
00780           } else {
00781             for(i = izero; i < leny; i++) {
00782               y[iy] *= beta;
00783               iy += incy;
00784             }
00785           }
00786         }
00787       }
00788 
00789       // Return if we don't have to do anything more.
00790       if(alpha == alpha_zero) { return; }
00791 
00792       if( ETranspChar[trans] == 'N' ) {
00793         // Form y = alpha*A*y
00794         jx = kx;
00795         if (incy == ione) {
00796           for(j = izero; j < n; j++) {
00797             if (x[jx] != x_zero) {
00798               temp = alpha*x[jx];
00799               for(i = izero; i < m; i++) {
00800                 y[i] += temp*A[j*lda + i];
00801               }
00802             }
00803             jx += incx;
00804           }
00805         } else {
00806           for(j = izero; j < n; j++) {
00807             if (x[jx] != x_zero) {
00808               temp = alpha*x[jx];
00809               iy = ky;
00810               for(i = izero; i < m; i++) {
00811                 y[iy] += temp*A[j*lda + i];
00812                 iy += incy;
00813               }
00814             }
00815             jx += incx;
00816           }
00817         }
00818       } else {
00819         jy = ky;
00820         if (incx == ione) {
00821           for(j = izero; j < n; j++) {
00822             temp = y_zero;
00823             if ( noConj ) {
00824               for(i = izero; i < m; i++) {
00825                 temp += A[j*lda + i]*x[i];
00826               }
00827             } else {
00828               for(i = izero; i < m; i++) {
00829                 temp += ScalarTraits<A_type>::conjugate(A[j*lda + i])*x[i];
00830               }
00831             }
00832             y[jy] += alpha*temp;
00833             jy += incy;
00834           }
00835         } else {
00836           for(j = izero; j < n; j++) {
00837             temp = y_zero;
00838             ix = kx;
00839             if ( noConj ) {
00840               for (i = izero; i < m; i++) {
00841                 temp += A[j*lda + i]*x[ix];
00842                 ix += incx;
00843               }
00844             } else {
00845               for (i = izero; i < m; i++) {
00846                 temp += ScalarTraits<A_type>::conjugate(A[j*lda + i])*x[ix];
00847                 ix += incx;
00848               }
00849             }
00850             y[jy] += alpha*temp;
00851             jy += incy;
00852           }
00853         }
00854       }
00855     } /* if (!BadArgument) */
00856   } /* end GEMV */
00857 
00858  template<typename OrdinalType, typename ScalarType>
00859  template <typename A_type>
00860  void DefaultBLASImpl<OrdinalType, ScalarType>::TRMV(EUplo uplo, ETransp trans, EDiag diag, const OrdinalType n, const A_type* A, const OrdinalType lda, ScalarType* x, const OrdinalType incx) const
00861   {
00862     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00863     OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00864     ScalarType zero = ScalarTraits<ScalarType>::zero();
00865     bool BadArgument = false;
00866     bool noConj = true;
00867 
00868     // Quick return if there is nothing to do!
00869     if( n == izero ){ return; }
00870 
00871     // Otherwise, we need to check the argument list.
00872     if( n < izero ) {
00873       std::cout << "BLAS::TRMV Error: N == " << n << std::endl;
00874       BadArgument = true;
00875     }
00876     if( lda < n ) {
00877       std::cout << "BLAS::TRMV Error: LDA < MAX(1,N)"<< std::endl;
00878       BadArgument = true;
00879     }
00880     if( incx == izero ) {
00881       std::cout << "BLAS::TRMV Error: INCX == 0"<< std::endl;
00882       BadArgument = true;
00883     }
00884 
00885     if(!BadArgument) {
00886       OrdinalType i, j, ix, jx, kx = izero;
00887       ScalarType temp;
00888       bool noUnit = (EDiagChar[diag] == 'N');
00889 
00890       // Determine if this is a conjugate tranpose
00891       noConj = (ETranspChar[trans] == 'T');
00892 
00893       // Set the starting pointer for the vector x if incx < 0.
00894       if (incx < izero) { kx = (-n+ione)*incx; }
00895 
00896       // Start the operations for a nontransposed triangular matrix
00897       if (ETranspChar[trans] == 'N') {
00898         /* Compute x = A*x */
00899         if (EUploChar[uplo] == 'U') {
00900           /* A is an upper triangular matrix */
00901           if (incx == ione) {
00902             for (j=izero; j<n; j++) {
00903               if (x[j] != zero) {
00904                 temp = x[j];
00905                 for (i=izero; i<j; i++) {
00906                   x[i] += temp*A[j*lda + i];
00907                 }
00908                 if ( noUnit )
00909                   x[j] *= A[j*lda + j];
00910               }
00911             }
00912           } else {
00913             jx = kx;
00914             for (j=izero; j<n; j++) {
00915               if (x[jx] != zero) {
00916                 temp = x[jx];
00917                 ix = kx;
00918                 for (i=izero; i<j; i++) {
00919                   x[ix] += temp*A[j*lda + i];
00920                   ix += incx;
00921                 }
00922                 if ( noUnit )
00923                   x[jx] *= A[j*lda + j];
00924               }
00925               jx += incx;
00926             }
00927           } /* if (incx == ione) */
00928         } else { /* A is a lower triangular matrix */
00929           if (incx == ione) {
00930             for (j=n-ione; j>-ione; j--) {
00931               if (x[j] != zero) {
00932                 temp = x[j];
00933                 for (i=n-ione; i>j; i--) {
00934                   x[i] += temp*A[j*lda + i];
00935                 }
00936                 if ( noUnit )
00937                   x[j] *= A[j*lda + j];
00938               }
00939             }
00940           } else {
00941             kx += (n-ione)*incx;
00942             jx = kx;
00943             for (j=n-ione; j>-ione; j--) {
00944               if (x[jx] != zero) {
00945                 temp = x[jx];
00946                 ix = kx;
00947                 for (i=n-ione; i>j; i--) {
00948                   x[ix] += temp*A[j*lda + i];
00949                   ix -= incx;
00950                 }
00951                 if ( noUnit )
00952                   x[jx] *= A[j*lda + j];
00953               }
00954               jx -= incx;
00955             }
00956           }
00957         } /* if (EUploChar[uplo]=='U') */
00958       } else { /* A is transposed/conjugated */
00959         /* Compute x = A'*x */
00960         if (EUploChar[uplo]=='U') {
00961           /* A is an upper triangular matrix */
00962           if (incx == ione) {
00963             for (j=n-ione; j>-ione; j--) {
00964               temp = x[j];
00965               if ( noConj ) {
00966                 if ( noUnit )
00967                   temp *= A[j*lda + j];
00968                 for (i=j-ione; i>-ione; i--) {
00969                   temp += A[j*lda + i]*x[i];
00970                 }
00971               } else {
00972                 if ( noUnit )
00973                   temp *= ScalarTraits<A_type>::conjugate(A[j*lda + j]);
00974                 for (i=j-ione; i>-ione; i--) {
00975                   temp += ScalarTraits<A_type>::conjugate(A[j*lda + i])*x[i];
00976                 }
00977               }
00978               x[j] = temp;
00979             }
00980           } else {
00981             jx = kx + (n-ione)*incx;
00982             for (j=n-ione; j>-ione; j--) {
00983               temp = x[jx];
00984               ix = jx;
00985               if ( noConj ) {
00986                 if ( noUnit )
00987                   temp *= A[j*lda + j];
00988                 for (i=j-ione; i>-ione; i--) {
00989                   ix -= incx;
00990                   temp += A[j*lda + i]*x[ix];
00991                 }
00992               } else {
00993                 if ( noUnit )
00994                   temp *= ScalarTraits<A_type>::conjugate(A[j*lda + j]);
00995                 for (i=j-ione; i>-ione; i--) {
00996                   ix -= incx;
00997                   temp += ScalarTraits<A_type>::conjugate(A[j*lda + i])*x[ix];
00998                 }
00999               }
01000               x[jx] = temp;
01001               jx -= incx;
01002             }
01003           }
01004         } else {
01005           /* A is a lower triangular matrix */
01006           if (incx == ione) {
01007             for (j=izero; j<n; j++) {
01008               temp = x[j];
01009               if ( noConj ) {
01010                 if ( noUnit )
01011                   temp *= A[j*lda + j];
01012                 for (i=j+ione; i<n; i++) {
01013                   temp += A[j*lda + i]*x[i];
01014                 }
01015               } else {
01016                 if ( noUnit )
01017                   temp *= ScalarTraits<A_type>::conjugate(A[j*lda + j]);
01018                 for (i=j+ione; i<n; i++) {
01019                   temp += ScalarTraits<A_type>::conjugate(A[j*lda + i])*x[i];
01020                 }
01021               }
01022               x[j] = temp;
01023             }
01024           } else {
01025             jx = kx;
01026             for (j=izero; j<n; j++) {
01027               temp = x[jx];
01028               ix = jx;
01029               if ( noConj ) {
01030                 if ( noUnit )
01031                   temp *= A[j*lda + j];
01032                 for (i=j+ione; i<n; i++) {
01033                   ix += incx;
01034                   temp += A[j*lda + i]*x[ix];
01035                 }
01036               } else {
01037                 if ( noUnit )
01038                   temp *= ScalarTraits<A_type>::conjugate(A[j*lda + j]);
01039                 for (i=j+ione; i<n; i++) {
01040                   ix += incx;
01041                   temp += ScalarTraits<A_type>::conjugate(A[j*lda + i])*x[ix];
01042                 }
01043               }
01044               x[jx] = temp;
01045               jx += incx;
01046             }
01047           }
01048         } /* if (EUploChar[uplo]=='U') */
01049       } /* if (ETranspChar[trans]=='N') */
01050     } /* if (!BadArgument) */
01051   } /* end TRMV */
01052 
01053   template<typename OrdinalType, typename ScalarType>
01054   template <typename alpha_type, typename x_type, typename y_type>
01055   void DefaultBLASImpl<OrdinalType, ScalarType>::GER(const OrdinalType m, const OrdinalType n, const alpha_type alpha, const x_type* x, const OrdinalType incx, const y_type* y, const OrdinalType incy, ScalarType* A, const OrdinalType lda) const
01056   {
01057     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
01058     OrdinalType ione = OrdinalTraits<OrdinalType>::one();
01059     alpha_type alpha_zero = ScalarTraits<alpha_type>::zero();
01060     y_type y_zero = ScalarTraits<y_type>::zero();
01061     bool BadArgument = false;
01062 
01063     // Quick return if there is nothing to do!
01064     if( m == izero || n == izero || alpha == alpha_zero ){ return; }
01065 
01066     // Otherwise, we need to check the argument list.
01067     if( m < izero ) {
01068         std::cout << "BLAS::GER Error: M == " << m << std::endl;
01069         BadArgument = true;
01070     }
01071     if( n < izero ) {
01072         std::cout << "BLAS::GER Error: N == " << n << std::endl;
01073         BadArgument = true;
01074     }
01075     if( lda < m ) {
01076         std::cout << "BLAS::GER Error: LDA < MAX(1,M)"<< std::endl;
01077         BadArgument = true;
01078     }
01079     if( incx == 0 ) {
01080         std::cout << "BLAS::GER Error: INCX == 0"<< std::endl;
01081         BadArgument = true;
01082     }
01083     if( incy == 0 ) {
01084         std::cout << "BLAS::GER Error: INCY == 0"<< std::endl;
01085         BadArgument = true;
01086     }
01087 
01088     if(!BadArgument) {
01089       OrdinalType i, j, ix, jy = izero, kx = izero;
01090       ScalarType temp;
01091 
01092       // Set the starting pointers for the vectors x and y if incx/y < 0.
01093       if (incx < izero) { kx = (-m+ione)*incx; }
01094       if (incy < izero) { jy = (-n+ione)*incy; }
01095 
01096       // Start the operations for incx == 1
01097       if( incx == ione ) {
01098         for( j=izero; j<n; j++ ) {
01099           if ( y[jy] != y_zero ) {
01100             temp = alpha*y[jy];
01101             for ( i=izero; i<m; i++ ) {
01102               A[j*lda + i] += x[i]*temp;
01103             }
01104           }
01105           jy += incy;
01106         }
01107       }
01108       else { // Start the operations for incx != 1
01109         for( j=izero; j<n; j++ ) {
01110           if ( y[jy] != y_zero ) {
01111             temp = alpha*y[jy];
01112             ix = kx;
01113             for( i=izero; i<m; i++ ) {
01114               A[j*lda + i] += x[ix]*temp;
01115               ix += incx;
01116             }
01117           }
01118           jy += incy;
01119         }
01120       }
01121     } /* if(!BadArgument) */
01122   } /* end GER */
01123 
01124 //------------------------------------------------------------------------------------------
01125 //      LEVEL 3 BLAS ROUTINES
01126 //------------------------------------------------------------------------------------------
01127 
01128   template<typename OrdinalType, typename ScalarType>
01129   template <typename alpha_type, typename A_type, typename B_type, typename beta_type>
01130   void DefaultBLASImpl<OrdinalType, ScalarType>::GEMM(ETransp transa, ETransp transb, const OrdinalType m, const OrdinalType n, const OrdinalType k, const alpha_type alpha, const A_type* A, const OrdinalType lda, const B_type* B, const OrdinalType ldb, const beta_type beta, ScalarType* C, const OrdinalType ldc) const
01131   {
01132 
01133     typedef TypeNameTraits<OrdinalType> OTNT;
01134     typedef TypeNameTraits<ScalarType> STNT;
01135 
01136     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
01137     alpha_type alpha_zero = ScalarTraits<alpha_type>::zero();
01138     beta_type beta_zero = ScalarTraits<beta_type>::zero();
01139     B_type B_zero = ScalarTraits<B_type>::zero();
01140     ScalarType C_zero = ScalarTraits<ScalarType>::zero();
01141     beta_type beta_one = ScalarTraits<beta_type>::one();
01142     OrdinalType i, j, p;
01143     OrdinalType NRowA = m, NRowB = k;
01144     ScalarType temp;
01145     bool BadArgument = false;
01146     bool conjA = false, conjB = false;
01147 
01148     // Change dimensions of matrix if either matrix is transposed
01149     if( !(ETranspChar[transa]=='N') ) {
01150       NRowA = k;
01151     }
01152     if( !(ETranspChar[transb]=='N') ) {
01153       NRowB = n;
01154     }
01155 
01156     // Quick return if there is nothing to do!
01157     if( (m==izero) || (n==izero) || (((alpha==alpha_zero)||(k==izero)) && (beta==beta_one)) ){ return; }
01158     if( m < izero ) {
01159       std::cout << "BLAS::GEMM Error: M == " << m << std::endl;
01160       BadArgument = true;
01161     }
01162     if( n < izero ) {
01163       std::cout << "BLAS::GEMM Error: N == " << n << std::endl;
01164       BadArgument = true;
01165     }
01166     if( k < izero ) {
01167       std::cout << "BLAS::GEMM Error: K == " << k << std::endl;
01168       BadArgument = true;
01169     }
01170     if( lda < NRowA ) {
01171       std::cout << "BLAS::GEMM Error: LDA < "<<NRowA<<std::endl;
01172       BadArgument = true;
01173     }
01174     if( ldb < NRowB ) {
01175       std::cout << "BLAS::GEMM Error: LDB < "<<NRowB<<std::endl;
01176       BadArgument = true;
01177     }
01178      if( ldc < m ) {
01179       std::cout << "BLAS::GEMM Error: LDC < MAX(1,M)"<< std::endl;
01180       BadArgument = true;
01181     }
01182 
01183     if(!BadArgument) {
01184 
01185       // Determine if this is a conjugate tranpose
01186       conjA = (ETranspChar[transa] == 'C');
01187       conjB = (ETranspChar[transb] == 'C');
01188 
01189       // Only need to scale the resulting matrix C.
01190       if( alpha == alpha_zero ) {
01191         if( beta == beta_zero ) {
01192           for (j=izero; j<n; j++) {
01193             for (i=izero; i<m; i++) {
01194               C[j*ldc + i] = C_zero;
01195             }
01196           }
01197         } else {
01198           for (j=izero; j<n; j++) {
01199             for (i=izero; i<m; i++) {
01200               C[j*ldc + i] *= beta;
01201             }
01202           }
01203         }
01204         return;
01205       }
01206       //
01207       // Now start the operations.
01208       //
01209       if ( ETranspChar[transb]=='N' ) {
01210         if ( ETranspChar[transa]=='N' ) {
01211           // Compute C = alpha*A*B + beta*C
01212           for (j=izero; j<n; j++) {
01213             if( beta == beta_zero ) {
01214               for (i=izero; i<m; i++){
01215                 C[j*ldc + i] = C_zero;
01216               }
01217             } else if( beta != beta_one ) {
01218               for (i=izero; i<m; i++){
01219                 C[j*ldc + i] *= beta;
01220               }
01221             }
01222             for (p=izero; p<k; p++){
01223               if (B[j*ldb + p] != B_zero ){
01224                 temp = alpha*B[j*ldb + p];
01225                 for (i=izero; i<m; i++) {
01226                   C[j*ldc + i] += temp*A[p*lda + i];
01227                 }
01228               }
01229             }
01230           }
01231         } else if ( conjA ) {
01232           // Compute C = alpha*conj(A')*B + beta*C
01233           for (j=izero; j<n; j++) {
01234             for (i=izero; i<m; i++) {
01235               temp = C_zero;
01236               for (p=izero; p<k; p++) {
01237                 temp += ScalarTraits<A_type>::conjugate(A[i*lda + p])*B[j*ldb + p];
01238               }
01239               if (beta == beta_zero) {
01240                 C[j*ldc + i] = alpha*temp;
01241               } else {
01242                 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
01243               }
01244             }
01245           }
01246         } else {
01247           // Compute C = alpha*A'*B + beta*C
01248           for (j=izero; j<n; j++) {
01249             for (i=izero; i<m; i++) {
01250               temp = C_zero;
01251               for (p=izero; p<k; p++) {
01252                 temp += A[i*lda + p]*B[j*ldb + p];
01253               }
01254               if (beta == beta_zero) {
01255                 C[j*ldc + i] = alpha*temp;
01256               } else {
01257                 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
01258               }
01259             }
01260           }
01261         }
01262       } else if ( ETranspChar[transa]=='N' ) {
01263         if ( conjB ) {
01264           // Compute C = alpha*A*conj(B') + beta*C
01265           for (j=izero; j<n; j++) {
01266             if (beta == beta_zero) {
01267               for (i=izero; i<m; i++) {
01268                 C[j*ldc + i] = C_zero;
01269               }
01270             } else if ( beta != beta_one ) {
01271               for (i=izero; i<m; i++) {
01272                 C[j*ldc + i] *= beta;
01273               }
01274             }
01275             for (p=izero; p<k; p++) {
01276               if (B[p*ldb + j] != B_zero) {
01277                 temp = alpha*ScalarTraits<B_type>::conjugate(B[p*ldb + j]);
01278                 for (i=izero; i<m; i++) {
01279                   C[j*ldc + i] += temp*A[p*lda + i];
01280                 }
01281               }
01282             }
01283           }
01284         } else {
01285           // Compute C = alpha*A*B' + beta*C
01286           for (j=izero; j<n; j++) {
01287             if (beta == beta_zero) {
01288               for (i=izero; i<m; i++) {
01289                 C[j*ldc + i] = C_zero;
01290               }
01291             } else if ( beta != beta_one ) {
01292               for (i=izero; i<m; i++) {
01293                 C[j*ldc + i] *= beta;
01294               }
01295             }
01296             for (p=izero; p<k; p++) {
01297               if (B[p*ldb + j] != B_zero) {
01298                 temp = alpha*B[p*ldb + j];
01299                 for (i=izero; i<m; i++) {
01300                   C[j*ldc + i] += temp*A[p*lda + i];
01301                 }
01302               }
01303             }
01304           }
01305         }
01306       } else if ( conjA ) {
01307         if ( conjB ) {
01308           // Compute C = alpha*conj(A')*conj(B') + beta*C
01309           for (j=izero; j<n; j++) {
01310             for (i=izero; i<m; i++) {
01311               temp = C_zero;
01312               for (p=izero; p<k; p++) {
01313                 temp += ScalarTraits<A_type>::conjugate(A[i*lda + p])
01314                       * ScalarTraits<B_type>::conjugate(B[p*ldb + j]);
01315               }
01316               if (beta == beta_zero) {
01317                 C[j*ldc + i] = alpha*temp;
01318               } else {
01319                 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
01320               }
01321             }
01322           }
01323         } else {
01324           // Compute C = alpha*conj(A')*B' + beta*C
01325           for (j=izero; j<n; j++) {
01326             for (i=izero; i<m; i++) {
01327               temp = C_zero;
01328               for (p=izero; p<k; p++) {
01329                 temp += ScalarTraits<A_type>::conjugate(A[i*lda + p])
01330                       * B[p*ldb + j];
01331               }
01332               if (beta == beta_zero) {
01333                 C[j*ldc + i] = alpha*temp;
01334               } else {
01335                 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
01336               }
01337             }
01338           }
01339         }
01340      } else {
01341        if ( conjB ) {
01342          // Compute C = alpha*A'*conj(B') + beta*C
01343          for (j=izero; j<n; j++) {
01344             for (i=izero; i<m; i++) {
01345               temp = C_zero;
01346               for (p=izero; p<k; p++) {
01347                 temp += A[i*lda + p]
01348                       * ScalarTraits<B_type>::conjugate(B[p*ldb + j]);
01349               }
01350               if (beta == beta_zero) {
01351                 C[j*ldc + i] = alpha*temp;
01352               } else {
01353                 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
01354               }
01355             }
01356           }
01357         } else {
01358           // Compute C = alpha*A'*B' + beta*C
01359           for (j=izero; j<n; j++) {
01360             for (i=izero; i<m; i++) {
01361               temp = C_zero;
01362               for (p=izero; p<k; p++) {
01363                 temp += A[i*lda + p]*B[p*ldb + j];
01364               }
01365               if (beta == beta_zero) {
01366                 C[j*ldc + i] = alpha*temp;
01367               } else {
01368                 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
01369               }
01370             }
01371           }
01372         } // end if (ETranspChar[transa]=='N') ...
01373       } // end if (ETranspChar[transb]=='N') ...
01374     } // end if (!BadArgument) ...
01375   } // end of GEMM
01376 
01377 
01378   template<typename OrdinalType, typename ScalarType>
01379   template <typename alpha_type, typename A_type, typename B_type, typename beta_type>
01380   void DefaultBLASImpl<OrdinalType, ScalarType>::SYMM(ESide side, EUplo uplo, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type* A, const OrdinalType lda, const B_type* B, const OrdinalType ldb, const beta_type beta, ScalarType* C, const OrdinalType ldc) const
01381   {
01382     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
01383     OrdinalType ione = OrdinalTraits<OrdinalType>::one();
01384     alpha_type alpha_zero = ScalarTraits<alpha_type>::zero();
01385     beta_type beta_zero = ScalarTraits<beta_type>::zero();
01386     ScalarType C_zero = ScalarTraits<ScalarType>::zero();
01387     beta_type beta_one = ScalarTraits<beta_type>::one();
01388     OrdinalType i, j, k, NRowA = m;
01389     ScalarType temp1, temp2;
01390     bool BadArgument = false;
01391     bool Upper = (EUploChar[uplo] == 'U');
01392     if (ESideChar[side] == 'R') { NRowA = n; }
01393 
01394     // Quick return.
01395     if ( (m==izero) || (n==izero) || ( (alpha==alpha_zero)&&(beta==beta_one) ) ) { return; }
01396     if( m < izero ) {
01397       std::cout << "BLAS::SYMM Error: M == "<< m << std::endl;
01398       BadArgument = true; }
01399     if( n < izero ) {
01400       std::cout << "BLAS::SYMM Error: N == "<< n << std::endl;
01401       BadArgument = true; }
01402     if( lda < NRowA ) {
01403       std::cout << "BLAS::SYMM Error: LDA < "<<NRowA<<std::endl;
01404       BadArgument = true; }
01405     if( ldb < m ) {
01406       std::cout << "BLAS::SYMM Error: LDB < MAX(1,M)"<<std::endl;
01407       BadArgument = true; }
01408     if( ldc < m ) {
01409       std::cout << "BLAS::SYMM Error: LDC < MAX(1,M)"<<std::endl;
01410       BadArgument = true; }
01411 
01412     if(!BadArgument) {
01413 
01414       // Only need to scale C and return.
01415       if (alpha == alpha_zero) {
01416         if (beta == beta_zero ) {
01417           for (j=izero; j<n; j++) {
01418             for (i=izero; i<m; i++) {
01419               C[j*ldc + i] = C_zero;
01420             }
01421           }
01422         } else {
01423           for (j=izero; j<n; j++) {
01424             for (i=izero; i<m; i++) {
01425               C[j*ldc + i] *= beta;
01426             }
01427           }
01428         }
01429         return;
01430       }
01431 
01432       if ( ESideChar[side] == 'L') {
01433         // Compute C = alpha*A*B + beta*C
01434 
01435         if (Upper) {
01436           // The symmetric part of A is stored in the upper triangular part of the matrix.
01437           for (j=izero; j<n; j++) {
01438             for (i=izero; i<m; i++) {
01439               temp1 = alpha*B[j*ldb + i];
01440               temp2 = C_zero;
01441               for (k=izero; k<i; k++) {
01442                 C[j*ldc + k] += temp1*A[i*lda + k];
01443                 temp2 += B[j*ldb + k]*A[i*lda + k];
01444               }
01445               if (beta == beta_zero) {
01446                 C[j*ldc + i] = temp1*A[i*lda + i] + alpha*temp2;
01447               } else {
01448                 C[j*ldc + i] = beta*C[j*ldc + i] + temp1*A[i*lda + i] + alpha*temp2;
01449               }
01450             }
01451           }
01452         } else {
01453           // The symmetric part of A is stored in the lower triangular part of the matrix.
01454           for (j=izero; j<n; j++) {
01455             for (i=m-ione; i>-ione; i--) {
01456               temp1 = alpha*B[j*ldb + i];
01457               temp2 = C_zero;
01458               for (k=i+ione; k<m; k++) {
01459                 C[j*ldc + k] += temp1*A[i*lda + k];
01460                 temp2 += B[j*ldb + k]*A[i*lda + k];
01461               }
01462               if (beta == beta_zero) {
01463                 C[j*ldc + i] = temp1*A[i*lda + i] + alpha*temp2;
01464               } else {
01465                 C[j*ldc + i] = beta*C[j*ldc + i] + temp1*A[i*lda + i] + alpha*temp2;
01466               }
01467             }
01468           }
01469         }
01470       } else {
01471         // Compute C = alpha*B*A + beta*C.
01472         for (j=izero; j<n; j++) {
01473           temp1 = alpha*A[j*lda + j];
01474           if (beta == beta_zero) {
01475             for (i=izero; i<m; i++) {
01476               C[j*ldc + i] = temp1*B[j*ldb + i];
01477             }
01478           } else {
01479             for (i=izero; i<m; i++) {
01480               C[j*ldc + i] = beta*C[j*ldc + i] + temp1*B[j*ldb + i];
01481             }
01482           }
01483           for (k=izero; k<j; k++) {
01484             if (Upper) {
01485               temp1 = alpha*A[j*lda + k];
01486             } else {
01487               temp1 = alpha*A[k*lda + j];
01488             }
01489             for (i=izero; i<m; i++) {
01490               C[j*ldc + i] += temp1*B[k*ldb + i];
01491             }
01492           }
01493           for (k=j+ione; k<n; k++) {
01494             if (Upper) {
01495               temp1 = alpha*A[k*lda + j];
01496             } else {
01497               temp1 = alpha*A[j*lda + k];
01498             }
01499             for (i=izero; i<m; i++) {
01500               C[j*ldc + i] += temp1*B[k*ldb + i];
01501             }
01502           }
01503         }
01504       } // end if (ESideChar[side]=='L') ...
01505     } // end if(!BadArgument) ...
01506 } // end SYMM
01507 
01508   template<typename OrdinalType, typename ScalarType>
01509   template <typename alpha_type, typename A_type, typename beta_type>
01510   void DefaultBLASImpl<OrdinalType, ScalarType>::SYRK(EUplo uplo, ETransp trans, const OrdinalType n, const OrdinalType k, const alpha_type alpha, const A_type* A, const OrdinalType lda, const beta_type beta, ScalarType* C, const OrdinalType ldc) const
01511   {
01512     typedef TypeNameTraits<OrdinalType> OTNT;
01513     typedef TypeNameTraits<ScalarType> STNT;
01514 
01515     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
01516     alpha_type alpha_zero = ScalarTraits<alpha_type>::zero();
01517     beta_type beta_zero = ScalarTraits<beta_type>::zero();
01518     beta_type beta_one = ScalarTraits<beta_type>::one();
01519     A_type temp, A_zero = ScalarTraits<A_type>::zero();
01520     ScalarType C_zero = ScalarTraits<ScalarType>::zero();
01521     OrdinalType i, j, l, NRowA = n;
01522     bool BadArgument = false;
01523     bool Upper = (EUploChar[uplo] == 'U');
01524 
01525     TEUCHOS_TEST_FOR_EXCEPTION(
01526       Teuchos::ScalarTraits<ScalarType>::isComplex
01527       && (trans == CONJ_TRANS),
01528       std::logic_error,
01529             "Teuchos::BLAS<"<<OTNT::name()<<","<<STNT::name()<<">::SYRK()"
01530             " does not support CONJ_TRANS for complex data types."
01531       );
01532 
01533     // Change dimensions of A matrix is transposed
01534     if( !(ETranspChar[trans]=='N') ) {
01535       NRowA = k;
01536     }
01537 
01538     // Quick return.
01539     if ( n==izero ) { return; }
01540     if ( ( (alpha==alpha_zero) || (k==izero) )  && (beta==beta_one) ) { return; }
01541     if( n < izero ) {
01542       std::cout << "BLAS::SYRK Error: N == "<< n <<std::endl;
01543       BadArgument = true; }
01544     if( k < izero ) {
01545       std::cout << "BLAS::SYRK Error: K == "<< k <<std::endl;
01546       BadArgument = true; }
01547     if( lda < NRowA ) {
01548       std::cout << "BLAS::SYRK Error: LDA < "<<NRowA<<std::endl;
01549       BadArgument = true; }
01550     if( ldc < n ) {
01551       std::cout << "BLAS::SYRK Error: LDC < MAX(1,N)"<<std::endl;
01552       BadArgument = true; }
01553 
01554     if(!BadArgument) {
01555 
01556       // Scale C when alpha is zero
01557       if (alpha == alpha_zero) {
01558         if (Upper) {
01559           if (beta==beta_zero) {
01560             for (j=izero; j<n; j++) {
01561               for (i=izero; i<=j; i++) {
01562                 C[j*ldc + i] = C_zero;
01563               }
01564             }
01565           }
01566           else {
01567             for (j=izero; j<n; j++) {
01568               for (i=izero; i<=j; i++) {
01569                 C[j*ldc + i] *= beta;
01570               }
01571             }
01572           }
01573         }
01574         else {
01575           if (beta==beta_zero) {
01576             for (j=izero; j<n; j++) {
01577               for (i=j; i<n; i++) {
01578                 C[j*ldc + i] = C_zero;
01579               }
01580             }
01581           }
01582           else {
01583             for (j=izero; j<n; j++) {
01584               for (i=j; i<n; i++) {
01585                 C[j*ldc + i] *= beta;
01586               }
01587             }
01588           }
01589         }
01590         return;
01591       }
01592 
01593       // Now we can start the computation
01594 
01595       if ( ETranspChar[trans]=='N' ) {
01596 
01597         // Form C <- alpha*A*A' + beta*C
01598         if (Upper) {
01599           for (j=izero; j<n; j++) {
01600             if (beta==beta_zero) {
01601               for (i=izero; i<=j; i++) {
01602                 C[j*ldc + i] = C_zero;
01603               }
01604             }
01605             else if (beta!=beta_one) {
01606               for (i=izero; i<=j; i++) {
01607                 C[j*ldc + i] *= beta;
01608               }
01609             }
01610             for (l=izero; l<k; l++) {
01611               if (A[l*lda + j] != A_zero) {
01612                 temp = alpha*A[l*lda + j];
01613                 for (i = izero; i <=j; i++) {
01614                   C[j*ldc + i] += temp*A[l*lda + i];
01615                 }
01616               }
01617             }
01618           }
01619         }
01620         else {
01621           for (j=izero; j<n; j++) {
01622             if (beta==beta_zero) {
01623               for (i=j; i<n; i++) {
01624                 C[j*ldc + i] = C_zero;
01625               }
01626             }
01627             else if (beta!=beta_one) {
01628               for (i=j; i<n; i++) {
01629                 C[j*ldc + i] *= beta;
01630               }
01631             }
01632             for (l=izero; l<k; l++) {
01633               if (A[l*lda + j] != A_zero) {
01634                 temp = alpha*A[l*lda + j];
01635                 for (i=j; i<n; i++) {
01636                   C[j*ldc + i] += temp*A[l*lda + i];
01637                 }
01638               }
01639             }
01640           }
01641         }
01642       }
01643       else {
01644 
01645         // Form C <- alpha*A'*A + beta*C
01646         if (Upper) {
01647           for (j=izero; j<n; j++) {
01648             for (i=izero; i<=j; i++) {
01649               temp = A_zero;
01650               for (l=izero; l<k; l++) {
01651                 temp += A[i*lda + l]*A[j*lda + l];
01652               }
01653               if (beta==beta_zero) {
01654                 C[j*ldc + i] = alpha*temp;
01655               }
01656               else {
01657                 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
01658               }
01659             }
01660           }
01661         }
01662         else {
01663           for (j=izero; j<n; j++) {
01664             for (i=j; i<n; i++) {
01665               temp = A_zero;
01666               for (l=izero; l<k; ++l) {
01667                 temp += A[i*lda + l]*A[j*lda + l];
01668               }
01669               if (beta==beta_zero) {
01670                 C[j*ldc + i] = alpha*temp;
01671               }
01672               else {
01673                 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
01674               }
01675             }
01676           }
01677         }
01678       }
01679     } /* if (!BadArgument) */
01680   } /* END SYRK */
01681 
01682   template<typename OrdinalType, typename ScalarType>
01683   template <typename alpha_type, typename A_type>
01684   void DefaultBLASImpl<OrdinalType, ScalarType>::TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type* A, const OrdinalType lda, ScalarType* B, const OrdinalType ldb) const
01685   {
01686     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
01687     OrdinalType ione = OrdinalTraits<OrdinalType>::one();
01688     alpha_type alpha_zero = ScalarTraits<alpha_type>::zero();
01689     A_type A_zero = ScalarTraits<A_type>::zero();
01690     ScalarType B_zero = ScalarTraits<ScalarType>::zero();
01691     ScalarType one = ScalarTraits<ScalarType>::one();
01692     OrdinalType i, j, k, NRowA = m;
01693     ScalarType temp;
01694     bool BadArgument = false;
01695     bool LSide = (ESideChar[side] == 'L');
01696     bool noUnit = (EDiagChar[diag] == 'N');
01697     bool Upper = (EUploChar[uplo] == 'U');
01698     bool noConj = (ETranspChar[transa] == 'T');
01699 
01700     if(!LSide) { NRowA = n; }
01701 
01702     // Quick return.
01703     if (n==izero || m==izero) { return; }
01704     if( m < izero ) {
01705       std::cout << "BLAS::TRMM Error: M == "<< m <<std::endl;
01706       BadArgument = true; }
01707     if( n < izero ) {
01708       std::cout << "BLAS::TRMM Error: N == "<< n <<std::endl;
01709       BadArgument = true; }
01710     if( lda < NRowA ) {
01711       std::cout << "BLAS::TRMM Error: LDA < "<<NRowA<<std::endl;
01712       BadArgument = true; }
01713     if( ldb < m ) {
01714       std::cout << "BLAS::TRMM Error: LDB < MAX(1,M)"<<std::endl;
01715       BadArgument = true; }
01716 
01717     if(!BadArgument) {
01718 
01719       // B only needs to be zeroed out.
01720       if( alpha == alpha_zero ) {
01721         for( j=izero; j<n; j++ ) {
01722           for( i=izero; i<m; i++ ) {
01723             B[j*ldb + i] = B_zero;
01724           }
01725         }
01726         return;
01727       }
01728 
01729       //  Start the computations.
01730       if ( LSide ) {
01731         // A is on the left side of B.
01732 
01733         if ( ETranspChar[transa]=='N' ) {
01734           // Compute B = alpha*A*B
01735 
01736           if ( Upper ) {
01737             // A is upper triangular
01738             for( j=izero; j<n; j++ ) {
01739               for( k=izero; k<m; k++) {
01740                 if ( B[j*ldb + k] != B_zero ) {
01741                   temp = alpha*B[j*ldb + k];
01742                   for( i=izero; i<k; i++ ) {
01743                     B[j*ldb + i] += temp*A[k*lda + i];
01744                   }
01745                   if ( noUnit )
01746                     temp *=A[k*lda + k];
01747                   B[j*ldb + k] = temp;
01748                 }
01749               }
01750             }
01751           } else {
01752             // A is lower triangular
01753             for( j=izero; j<n; j++ ) {
01754               for( k=m-ione; k>-ione; k-- ) {
01755                 if( B[j*ldb + k] != B_zero ) {
01756                   temp = alpha*B[j*ldb + k];
01757                   B[j*ldb + k] = temp;
01758                   if ( noUnit )
01759                     B[j*ldb + k] *= A[k*lda + k];
01760                   for( i=k+ione; i<m; i++ ) {
01761                     B[j*ldb + i] += temp*A[k*lda + i];
01762                   }
01763                 }
01764               }
01765             }
01766           }
01767         } else {
01768           // Compute B = alpha*A'*B or B = alpha*conj(A')*B
01769           if( Upper ) {
01770             for( j=izero; j<n; j++ ) {
01771               for( i=m-ione; i>-ione; i-- ) {
01772                 temp = B[j*ldb + i];
01773                 if ( noConj ) {
01774                   if( noUnit )
01775                     temp *= A[i*lda + i];
01776                   for( k=izero; k<i; k++ ) {
01777                     temp += A[i*lda + k]*B[j*ldb + k];
01778                   }
01779                 } else {
01780                   if( noUnit )
01781                     temp *= ScalarTraits<A_type>::conjugate(A[i*lda + i]);
01782                   for( k=izero; k<i; k++ ) {
01783                     temp += ScalarTraits<A_type>::conjugate(A[i*lda + k])*B[j*ldb + k];
01784                   }
01785                 }
01786                 B[j*ldb + i] = alpha*temp;
01787               }
01788             }
01789           } else {
01790             for( j=izero; j<n; j++ ) {
01791               for( i=izero; i<m; i++ ) {
01792                 temp = B[j*ldb + i];
01793                 if ( noConj ) {
01794                   if( noUnit )
01795                     temp *= A[i*lda + i];
01796                   for( k=i+ione; k<m; k++ ) {
01797                     temp += A[i*lda + k]*B[j*ldb + k];
01798                   }
01799                 } else {
01800                   if( noUnit )
01801                     temp *= ScalarTraits<A_type>::conjugate(A[i*lda + i]);
01802                   for( k=i+ione; k<m; k++ ) {
01803                     temp += ScalarTraits<A_type>::conjugate(A[i*lda + k])*B[j*ldb + k];
01804                   }
01805                 }
01806                 B[j*ldb + i] = alpha*temp;
01807               }
01808             }
01809           }
01810         }
01811       } else {
01812         // A is on the right hand side of B.
01813 
01814         if( ETranspChar[transa] == 'N' ) {
01815           // Compute B = alpha*B*A
01816 
01817           if( Upper ) {
01818             // A is upper triangular.
01819             for( j=n-ione; j>-ione; j-- ) {
01820               temp = alpha;
01821               if( noUnit )
01822                 temp *= A[j*lda + j];
01823               for( i=izero; i<m; i++ ) {
01824                 B[j*ldb + i] *= temp;
01825               }
01826               for( k=izero; k<j; k++ ) {
01827                 if( A[j*lda + k] != A_zero ) {
01828                   temp = alpha*A[j*lda + k];
01829                   for( i=izero; i<m; i++ ) {
01830                     B[j*ldb + i] += temp*B[k*ldb + i];
01831                   }
01832                 }
01833               }
01834             }
01835           } else {
01836             // A is lower triangular.
01837             for( j=izero; j<n; j++ ) {
01838               temp = alpha;
01839               if( noUnit )
01840                 temp *= A[j*lda + j];
01841               for( i=izero; i<m; i++ ) {
01842                 B[j*ldb + i] *= temp;
01843               }
01844               for( k=j+ione; k<n; k++ ) {
01845                 if( A[j*lda + k] != A_zero ) {
01846                   temp = alpha*A[j*lda + k];
01847                   for( i=izero; i<m; i++ ) {
01848                     B[j*ldb + i] += temp*B[k*ldb + i];
01849                   }
01850                 }
01851               }
01852             }
01853           }
01854         } else {
01855           // Compute B = alpha*B*A' or B = alpha*B*conj(A')
01856 
01857           if( Upper ) {
01858             for( k=izero; k<n; k++ ) {
01859               for( j=izero; j<k; j++ ) {
01860                 if( A[k*lda + j] != A_zero ) {
01861                   if ( noConj )
01862                     temp = alpha*A[k*lda + j];
01863                   else
01864                     temp = alpha*ScalarTraits<A_type>::conjugate(A[k*lda + j]);
01865                   for( i=izero; i<m; i++ ) {
01866                     B[j*ldb + i] += temp*B[k*ldb + i];
01867                   }
01868                 }
01869               }
01870               temp = alpha;
01871               if( noUnit ) {
01872                 if ( noConj )
01873                   temp *= A[k*lda + k];
01874                 else
01875                   temp *= ScalarTraits<A_type>::conjugate(A[k*lda + k]);
01876               }
01877               if( temp != one ) {
01878                 for( i=izero; i<m; i++ ) {
01879                   B[k*ldb + i] *= temp;
01880                 }
01881               }
01882             }
01883           } else {
01884             for( k=n-ione; k>-ione; k-- ) {
01885               for( j=k+ione; j<n; j++ ) {
01886                 if( A[k*lda + j] != A_zero ) {
01887                   if ( noConj )
01888                     temp = alpha*A[k*lda + j];
01889                   else
01890                     temp = alpha*ScalarTraits<A_type>::conjugate(A[k*lda + j]);
01891                   for( i=izero; i<m; i++ ) {
01892                     B[j*ldb + i] += temp*B[k*ldb + i];
01893                   }
01894                 }
01895               }
01896               temp = alpha;
01897               if( noUnit ) {
01898                 if ( noConj )
01899                   temp *= A[k*lda + k];
01900                 else
01901                   temp *= ScalarTraits<A_type>::conjugate(A[k*lda + k]);
01902               }
01903               if( temp != one ) {
01904                 for( i=izero; i<m; i++ ) {
01905                   B[k*ldb + i] *= temp;
01906                 }
01907               }
01908             }
01909           }
01910         } // end if( ETranspChar[transa] == 'N' ) ...
01911       } // end if ( LSide ) ...
01912     } // end if (!BadArgument)
01913   } // end TRMM
01914 
01915   template<typename OrdinalType, typename ScalarType>
01916   template <typename alpha_type, typename A_type>
01917   void DefaultBLASImpl<OrdinalType, ScalarType>::TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type* A, const OrdinalType lda, ScalarType* B, const OrdinalType ldb) const
01918   {
01919     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
01920     OrdinalType ione = OrdinalTraits<OrdinalType>::one();
01921     alpha_type alpha_zero = ScalarTraits<alpha_type>::zero();
01922     A_type A_zero = ScalarTraits<A_type>::zero();
01923     ScalarType B_zero = ScalarTraits<ScalarType>::zero();
01924     alpha_type alpha_one = ScalarTraits<alpha_type>::one();
01925     ScalarType B_one = ScalarTraits<ScalarType>::one();
01926     ScalarType temp;
01927     OrdinalType NRowA = m;
01928     bool BadArgument = false;
01929     bool noUnit = (EDiagChar[diag]=='N');
01930     bool noConj = (ETranspChar[transa] == 'T');
01931 
01932     if (!(ESideChar[side] == 'L')) { NRowA = n; }
01933 
01934     // Quick return.
01935     if (n == izero || m == izero) { return; }
01936     if( m < izero ) {
01937       std::cout << "BLAS::TRSM Error: M == "<<m<<std::endl;
01938       BadArgument = true; }
01939     if( n < izero ) {
01940       std::cout << "BLAS::TRSM Error: N == "<<n<<std::endl;
01941       BadArgument = true; }
01942     if( lda < NRowA ) {
01943       std::cout << "BLAS::TRSM Error: LDA < "<<NRowA<<std::endl;
01944       BadArgument = true; }
01945     if( ldb < m ) {
01946       std::cout << "BLAS::TRSM Error: LDB < MAX(1,M)"<<std::endl;
01947       BadArgument = true; }
01948 
01949     if(!BadArgument)
01950       {
01951         int i, j, k;
01952         // Set the solution to the zero vector.
01953         if(alpha == alpha_zero) {
01954             for(j = izero; j < n; j++) {
01955                 for( i = izero; i < m; i++) {
01956                     B[j*ldb+i] = B_zero;
01957                 }
01958             }
01959         }
01960         else
01961         { // Start the operations.
01962             if(ESideChar[side] == 'L') {
01963                 //
01964                 // Perform computations for OP(A)*X = alpha*B
01965                 //
01966                 if(ETranspChar[transa] == 'N') {
01967                     //
01968                     //  Compute B = alpha*inv( A )*B
01969                     //
01970                     if(EUploChar[uplo] == 'U') {
01971                         // A is upper triangular.
01972                         for(j = izero; j < n; j++) {
01973                             // Perform alpha*B if alpha is not 1.
01974                           if(alpha != alpha_one) {
01975                                 for( i = izero; i < m; i++) {
01976                                     B[j*ldb+i] *= alpha;
01977                                 }
01978                             }
01979                             // Perform a backsolve for column j of B.
01980                             for(k = (m - ione); k > -ione; k--) {
01981                                 // If this entry is zero, we don't have to do anything.
01982                                 if (B[j*ldb + k] != B_zero) {
01983                                     if ( noUnit ) {
01984                                         B[j*ldb + k] /= A[k*lda + k];
01985                                     }
01986                                     for(i = izero; i < k; i++) {
01987                                         B[j*ldb + i] -= B[j*ldb + k] * A[k*lda + i];
01988                                     }
01989                                 }
01990                             }
01991                         }
01992                     }
01993                     else
01994                     { // A is lower triangular.
01995                         for(j = izero; j < n; j++) {
01996                             // Perform alpha*B if alpha is not 1.
01997                             if(alpha != alpha_one) {
01998                                 for( i = izero; i < m; i++) {
01999                                     B[j*ldb+i] *= alpha;
02000                                 }
02001                             }
02002                             // Perform a forward solve for column j of B.
02003                             for(k = izero; k < m; k++) {
02004                                 // If this entry is zero, we don't have to do anything.
02005                                 if (B[j*ldb + k] != B_zero) {
02006                                     if ( noUnit ) {
02007                                         B[j*ldb + k] /= A[k*lda + k];
02008                                     }
02009                                     for(i = k+ione; i < m; i++) {
02010                                         B[j*ldb + i] -= B[j*ldb + k] * A[k*lda + i];
02011                                     }
02012                                 }
02013                             }
02014                         }
02015                     } // end if (uplo == 'U')
02016                 }  // if (transa =='N')
02017                 else {
02018                     //
02019                     //  Compute B = alpha*inv( A' )*B
02020                     //  or      B = alpha*inv( conj(A') )*B
02021                     //
02022                     if(EUploChar[uplo] == 'U') {
02023                         // A is upper triangular.
02024                         for(j = izero; j < n; j++) {
02025                             for( i = izero; i < m; i++) {
02026                                 temp = alpha*B[j*ldb+i];
02027                                 if ( noConj ) {
02028                                   for(k = izero; k < i; k++) {
02029                                       temp -= A[i*lda + k] * B[j*ldb + k];
02030                                   }
02031                                   if ( noUnit ) {
02032                                       temp /= A[i*lda + i];
02033                                   }
02034                                 } else {
02035                                   for(k = izero; k < i; k++) {
02036                                       temp -= ScalarTraits<A_type>::conjugate(A[i*lda + k])
02037                                             * B[j*ldb + k];
02038                                   }
02039                                   if ( noUnit ) {
02040                                       temp /= ScalarTraits<A_type>::conjugate(A[i*lda + i]);
02041                                   }
02042                                 }
02043                                 B[j*ldb + i] = temp;
02044                             }
02045                         }
02046                     }
02047                     else
02048                     { // A is lower triangular.
02049                         for(j = izero; j < n; j++) {
02050                             for(i = (m - ione) ; i > -ione; i--) {
02051                                 temp = alpha*B[j*ldb+i];
02052                                 if ( noConj ) {
02053                                   for(k = i+ione; k < m; k++) {
02054                                     temp -= A[i*lda + k] * B[j*ldb + k];
02055                                   }
02056                                   if ( noUnit ) {
02057                                     temp /= A[i*lda + i];
02058                                   }
02059                                 } else {
02060                                   for(k = i+ione; k < m; k++) {
02061                                     temp -= ScalarTraits<A_type>::conjugate(A[i*lda + k])
02062                                           * B[j*ldb + k];
02063                                   }
02064                                   if ( noUnit ) {
02065                                     temp /= ScalarTraits<A_type>::conjugate(A[i*lda + i]);
02066                                   }
02067                                 }
02068                                 B[j*ldb + i] = temp;
02069                             }
02070                         }
02071                     }
02072                 }
02073             }  // if (side == 'L')
02074             else {
02075                // side == 'R'
02076                //
02077                // Perform computations for X*OP(A) = alpha*B
02078                //
02079               if (ETranspChar[transa] == 'N') {
02080                     //
02081                     //  Compute B = alpha*B*inv( A )
02082                     //
02083                     if(EUploChar[uplo] == 'U') {
02084                         // A is upper triangular.
02085                         // Perform a backsolve for column j of B.
02086                         for(j = izero; j < n; j++) {
02087                             // Perform alpha*B if alpha is not 1.
02088                             if(alpha != alpha_one) {
02089                                 for( i = izero; i < m; i++) {
02090                                     B[j*ldb+i] *= alpha;
02091                                 }
02092                             }
02093                             for(k = izero; k < j; k++) {
02094                                 // If this entry is zero, we don't have to do anything.
02095                                 if (A[j*lda + k] != A_zero) {
02096                                     for(i = izero; i < m; i++) {
02097                                         B[j*ldb + i] -= A[j*lda + k] * B[k*ldb + i];
02098                                     }
02099                                 }
02100                             }
02101                             if ( noUnit ) {
02102                                 temp = B_one/A[j*lda + j];
02103                                 for(i = izero; i < m; i++) {
02104                                     B[j*ldb + i] *= temp;
02105                                 }
02106                             }
02107                         }
02108                     }
02109                     else
02110                     { // A is lower triangular.
02111                         for(j = (n - ione); j > -ione; j--) {
02112                             // Perform alpha*B if alpha is not 1.
02113                             if(alpha != alpha_one) {
02114                                 for( i = izero; i < m; i++) {
02115                                     B[j*ldb+i] *= alpha;
02116                                 }
02117                             }
02118                             // Perform a forward solve for column j of B.
02119                             for(k = j+ione; k < n; k++) {
02120                                 // If this entry is zero, we don't have to do anything.
02121                                 if (A[j*lda + k] != A_zero) {
02122                                     for(i = izero; i < m; i++) {
02123                                         B[j*ldb + i] -= A[j*lda + k] * B[k*ldb + i];
02124                                     }
02125                                 }
02126                             }
02127                             if ( noUnit ) {
02128                                 temp = B_one/A[j*lda + j];
02129                                 for(i = izero; i < m; i++) {
02130                                     B[j*ldb + i] *= temp;
02131                                 }
02132                             }
02133                         }
02134                     } // end if (uplo == 'U')
02135                 }  // if (transa =='N')
02136                 else {
02137                     //
02138                     //  Compute B = alpha*B*inv( A' )
02139                     //  or      B = alpha*B*inv( conj(A') )
02140                     //
02141                     if(EUploChar[uplo] == 'U') {
02142                         // A is upper triangular.
02143                         for(k = (n - ione); k > -ione; k--) {
02144                             if ( noUnit ) {
02145                                 if ( noConj )
02146                                   temp = B_one/A[k*lda + k];
02147                                 else
02148                                   temp = B_one/ScalarTraits<A_type>::conjugate(A[k*lda + k]);
02149                                 for(i = izero; i < m; i++) {
02150                                     B[k*ldb + i] *= temp;
02151                                 }
02152                             }
02153                             for(j = izero; j < k; j++) {
02154                                 if (A[k*lda + j] != A_zero) {
02155                                     if ( noConj )
02156                                       temp = A[k*lda + j];
02157                                     else
02158                                       temp = ScalarTraits<A_type>::conjugate(A[k*lda + j]);
02159                                     for(i = izero; i < m; i++) {
02160                                         B[j*ldb + i] -= temp*B[k*ldb + i];
02161                                     }
02162                                 }
02163                             }
02164                             if (alpha != alpha_one) {
02165                                 for (i = izero; i < m; i++) {
02166                                     B[k*ldb + i] *= alpha;
02167                                 }
02168                             }
02169                         }
02170                     }
02171                     else
02172                     { // A is lower triangular.
02173                         for(k = izero; k < n; k++) {
02174                             if ( noUnit ) {
02175                                 if ( noConj )
02176                                   temp = B_one/A[k*lda + k];
02177                                 else
02178                                   temp = B_one/ScalarTraits<A_type>::conjugate(A[k*lda + k]);
02179                                 for (i = izero; i < m; i++) {
02180                                     B[k*ldb + i] *= temp;
02181                                 }
02182                             }
02183                             for(j = k+ione; j < n; j++) {
02184                                 if(A[k*lda + j] != A_zero) {
02185                                     if ( noConj )
02186                                       temp = A[k*lda + j];
02187                                     else
02188                                       temp = ScalarTraits<A_type>::conjugate(A[k*lda + j]);
02189                                     for(i = izero; i < m; i++) {
02190                                         B[j*ldb + i] -= temp*B[k*ldb + i];
02191                                     }
02192                                 }
02193                             }
02194                             if (alpha != alpha_one) {
02195                                 for (i = izero; i < m; i++) {
02196                                     B[k*ldb + i] *= alpha;
02197                                 }
02198                             }
02199                         }
02200                     }
02201                 }
02202             }
02203         }
02204     }
02205   }
02206 
02207   // Explicit instantiation for template<int,float>
02208 
02209   template <>
02210   class TEUCHOSNUMERICS_LIB_DLL_EXPORT BLAS<int, float>
02211   {
02212   public:
02213     inline BLAS(void) {}
02214     inline BLAS(const BLAS<int, float>& /*BLAS_source*/) {}
02215     inline virtual ~BLAS(void) {}
02216     void ROTG(float* da, float* db, float* c, float* s) const;
02217     void ROT(const int n, float* dx, const int incx, float* dy, const int incy, float* c, float* s) const;
02218     float ASUM(const int n, const float* x, const int incx) const;
02219     void AXPY(const int n, const float alpha, const float* x, const int incx, float* y, const int incy) const;
02220     void COPY(const int n, const float* x, const int incx, float* y, const int incy) const;
02221     float DOT(const int n, const float* x, const int incx, const float* y, const int incy) const;
02222     float NRM2(const int n, const float* x, const int incx) const;
02223     void SCAL(const int n, const float alpha, float* x, const int incx) const;
02224     int IAMAX(const int n, const float* x, const int incx) const;
02225     void GEMV(ETransp trans, const int m, const int n, const float alpha, const float* A, const int lda, const float* x, const int incx, const float beta, float* y, const int incy) const;
02226     void TRMV(EUplo uplo, ETransp trans, EDiag diag, const int n, const float* A, const int lda, float* x, const int incx) const;
02227     void GER(const int m, const int n, const float alpha, const float* x, const int incx, const float* y, const int incy, float* A, const int lda) const;
02228     void GEMM(ETransp transa, ETransp transb, const int m, const int n, const int k, const float alpha, const float* A, const int lda, const float* B, const int ldb, const float beta, float* C, const int ldc) const;
02229     void SYMM(ESide side, EUplo uplo, const int m, const int n, const float alpha, const float* A, const int lda, const float *B, const int ldb, const float beta, float *C, const int ldc) const;
02230     void SYRK(EUplo uplo, ETransp trans, const int n, const int k, const float alpha, const float* A, const int lda, const float beta, float* C, const int ldc) const;
02231     void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const float alpha, const float* A, const int lda, float* B, const int ldb) const;
02232     void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const float alpha, const float* A, const int lda, float* B, const int ldb) const;
02233   };
02234 
02235   // Explicit instantiation for template<int,double>
02236 
02237   template<>
02238   class TEUCHOSNUMERICS_LIB_DLL_EXPORT BLAS<int, double>
02239   {
02240   public:
02241     inline BLAS(void) {}
02242     inline BLAS(const BLAS<int, double>& /*BLAS_source*/) {}
02243     inline virtual ~BLAS(void) {}
02244     void ROTG(double* da, double* db, double* c, double* s) const;
02245     void ROT(const int n, double* dx, const int incx, double* dy, const int incy, double* c, double* s) const;
02246     double ASUM(const int n, const double* x, const int incx) const;
02247     void AXPY(const int n, const double alpha, const double* x, const int incx, double* y, const int incy) const;
02248     void COPY(const int n, const double* x, const int incx, double* y, const int incy) const;
02249     double DOT(const int n, const double* x, const int incx, const double* y, const int incy) const;
02250     double NRM2(const int n, const double* x, const int incx) const;
02251     void SCAL(const int n, const double alpha, double* x, const int incx) const;
02252     int IAMAX(const int n, const double* x, const int incx) const;
02253     void GEMV(ETransp trans, const int m, const int n, const double alpha, const double* A, const int lda, const double* x, const int incx, const double beta, double* y, const int incy) const;
02254     void TRMV(EUplo uplo, ETransp trans, EDiag diag, const int n, const double* A, const int lda, double* x, const int incx) const;
02255     void GER(const int m, const int n, const double alpha, const double* x, const int incx, const double* y, const int incy, double* A, const int lda) const;
02256     void GEMM(ETransp transa, ETransp transb, const int m, const int n, const int k, const double alpha, const double* A, const int lda, const double* B, const int ldb, const double beta, double* C, const int ldc) const;
02257     void SYMM(ESide side, EUplo uplo, const int m, const int n, const double alpha, const double* A, const int lda, const double *B, const int ldb, const double beta, double *C, const int ldc) const;
02258     void SYRK(EUplo uplo, ETransp trans, const int n, const int k, const double alpha, const double* A, const int lda, const double beta, double* C, const int ldc) const;
02259     void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const double alpha, const double* A, const int lda, double* B, const int ldb) const;
02260     void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const double alpha, const double* A, const int lda, double* B, const int ldb) const;
02261   };
02262 
02263   // Explicit instantiation for template<int,complex<float> >
02264 
02265   template<>
02266   class TEUCHOSNUMERICS_LIB_DLL_EXPORT BLAS<int, std::complex<float> >
02267   {
02268   public:
02269     inline BLAS(void) {}
02270     inline BLAS(const BLAS<int, std::complex<float> >& /*BLAS_source*/) {}
02271     inline virtual ~BLAS(void) {}
02272     void ROTG(std::complex<float>* da, std::complex<float>* db, float* c, std::complex<float>* s) const;
02273     void ROT(const int n, std::complex<float>* dx, const int incx, std::complex<float>* dy, const int incy, float* c, std::complex<float>* s) const;
02274     float ASUM(const int n, const std::complex<float>* x, const int incx) const;
02275     void AXPY(const int n, const std::complex<float> alpha, const std::complex<float>* x, const int incx, std::complex<float>* y, const int incy) const;
02276     void COPY(const int n, const std::complex<float>* x, const int incx, std::complex<float>* y, const int incy) const;
02277     std::complex<float> DOT(const int n, const std::complex<float>* x, const int incx, const std::complex<float>* y, const int incy) const;
02278     float NRM2(const int n, const std::complex<float>* x, const int incx) const;
02279     void SCAL(const int n, const std::complex<float> alpha, std::complex<float>* x, const int incx) const;
02280     int IAMAX(const int n, const std::complex<float>* x, const int incx) const;
02281     void GEMV(ETransp trans, const int m, const int n, const std::complex<float> alpha, const std::complex<float>* A, const int lda, const std::complex<float>* x, const int incx, const std::complex<float> beta, std::complex<float>* y, const int incy) const;
02282     void TRMV(EUplo uplo, ETransp trans, EDiag diag, const int n, const std::complex<float>* A, const int lda, std::complex<float>* x, const int incx) const;
02283     void GER(const int m, const int n, const std::complex<float> alpha, const std::complex<float>* x, const int incx, const std::complex<float>* y, const int incy, std::complex<float>* A, const int lda) const;
02284     void GEMM(ETransp transa, ETransp transb, const int m, const int n, const int k, const std::complex<float> alpha, const std::complex<float>* A, const int lda, const std::complex<float>* B, const int ldb, const std::complex<float> beta, std::complex<float>* C, const int ldc) const;
02285     void SYMM(ESide side, EUplo uplo, const int m, const int n, const std::complex<float> alpha, const std::complex<float>* A, const int lda, const std::complex<float> *B, const int ldb, const std::complex<float> beta, std::complex<float> *C, const int ldc) const;
02286     void SYRK(EUplo uplo, ETransp trans, const int n, const int k, const std::complex<float> alpha, const std::complex<float>* A, const int lda, const std::complex<float> beta, std::complex<float>* C, const int ldc) const;
02287     void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const std::complex<float> alpha, const std::complex<float>* A, const int lda, std::complex<float>* B, const int ldb) const;
02288     void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const std::complex<float> alpha, const std::complex<float>* A, const int lda, std::complex<float>* B, const int ldb) const;
02289   };
02290 
02291   // Explicit instantiation for template<int,complex<double> >
02292 
02293   template<>
02294   class TEUCHOSNUMERICS_LIB_DLL_EXPORT BLAS<int, std::complex<double> >
02295   {
02296   public:
02297     inline BLAS(void) {}
02298     inline BLAS(const BLAS<int, std::complex<double> >& /*BLAS_source*/) {}
02299     inline virtual ~BLAS(void) {}
02300     void ROTG(std::complex<double>* da, std::complex<double>* db, double* c, std::complex<double>* s) const;
02301     void ROT(const int n, std::complex<double>* dx, const int incx, std::complex<double>* dy, const int incy, double* c, std::complex<double>* s) const;
02302     double ASUM(const int n, const std::complex<double>* x, const int incx) const;
02303     void AXPY(const int n, const std::complex<double> alpha, const std::complex<double>* x, const int incx, std::complex<double>* y, const int incy) const;
02304     void COPY(const int n, const std::complex<double>* x, const int incx, std::complex<double>* y, const int incy) const;
02305     std::complex<double> DOT(const int n, const std::complex<double>* x, const int incx, const std::complex<double>* y, const int incy) const;
02306     double NRM2(const int n, const std::complex<double>* x, const int incx) const;
02307     void SCAL(const int n, const std::complex<double> alpha, std::complex<double>* x, const int incx) const;
02308     int IAMAX(const int n, const std::complex<double>* x, const int incx) const;
02309     void GEMV(ETransp trans, const int m, const int n, const std::complex<double> alpha, const std::complex<double>* A, const int lda, const std::complex<double>* x, const int incx, const std::complex<double> beta, std::complex<double>* y, const int incy) const;
02310     void TRMV(EUplo uplo, ETransp trans, EDiag diag, const int n, const std::complex<double>* A, const int lda, std::complex<double>* x, const int incx) const;
02311     void GER(const int m, const int n, const std::complex<double> alpha, const std::complex<double>* x, const int incx, const std::complex<double>* y, const int incy, std::complex<double>* A, const int lda) const;
02312     void GEMM(ETransp transa, ETransp transb, const int m, const int n, const int k, const std::complex<double> alpha, const std::complex<double>* A, const int lda, const std::complex<double>* B, const int ldb, const std::complex<double> beta, std::complex<double>* C, const int ldc) const;
02313     void SYMM(ESide side, EUplo uplo, const int m, const int n, const std::complex<double> alpha, const std::complex<double>* A, const int lda, const std::complex<double> *B, const int ldb, const std::complex<double> beta, std::complex<double> *C, const int ldc) const;
02314     void SYRK(EUplo uplo, ETransp trans, const int n, const int k, const std::complex<double> alpha, const std::complex<double>* A, const int lda, const std::complex<double> beta, std::complex<double>* C, const int ldc) const;
02315     void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const std::complex<double> alpha, const std::complex<double>* A, const int lda, std::complex<double>* B, const int ldb) const;
02316     void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const std::complex<double> alpha, const std::complex<double>* A, const int lda, std::complex<double>* B, const int ldb) const;
02317   };
02318 
02319 } // namespace Teuchos
02320 
02321 #endif // _TEUCHOS_BLAS_HPP_
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines