Teuchos_BLAS.hpp

Go to the documentation of this file.
00001 // @HEADER
00002 // ***********************************************************************
00003 // 
00004 //                    Teuchos: Common Tools Package
00005 //                 Copyright (2004) Sandia Corporation
00006 // 
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 // 
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //  
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //  
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00025 // 
00026 // ***********************************************************************
00027 // @HEADER
00028 
00029 // Kris
00030 // 06.16.03 -- Start over from scratch
00031 // 06.16.03 -- Initial templatization (Tpetra_BLAS.cpp is no longer needed)
00032 // 06.18.03 -- Changed xxxxx_() function calls to XXXXX_F77()
00033 //          -- Added warning messages for generic calls
00034 // 07.08.03 -- Move into Teuchos package/namespace
00035 // 07.24.03 -- The first iteration of BLAS generics is nearing completion. Caveats:
00036 //             * TRSM isn't finished yet; it works for one case at the moment (left side, upper tri., no transpose, no unit diag.)
00037 //             * Many of the generic implementations are quite inefficient, ugly, or both. I wrote these to be easy to debug, not for efficiency or legibility. The next iteration will improve both of these aspects as much as possible.
00038 //             * Very little verification of input parameters is done, save for the character-type arguments (TRANS, etc.) which is quite robust.
00039 //             * All of the routines that make use of both an incx and incy parameter (which includes much of the L1 BLAS) are set up to work iff incx == incy && incx > 0. Allowing for differing/negative values of incx/incy should be relatively trivial.
00040 //             * All of the L2/L3 routines assume that the entire matrix is being used (that is, if A is mxn, lda = m); they don't work on submatrices yet. This *should* be a reasonably trivial thing to fix, as well.
00041 //          -- Removed warning messages for generic calls
00042 // 08.08.03 -- TRSM now works for all cases where SIDE == L and DIAG == N. DIAG == U is implemented but does not work correctly; SIDE == R is not yet implemented.
00043 // 08.14.03 -- TRSM now works for all cases and accepts (and uses) leading-dimension information.
00044 // 09.26.03 -- character input replaced with enumerated input to cause compiling errors and not run-time errors ( suggested by RAB ).
00045 
00046 #ifndef _TEUCHOS_BLAS_HPP_
00047 #define _TEUCHOS_BLAS_HPP_
00048 
00056 /* for INTEL_CXML, the second arg may need to be changed to 'one'.  If so
00057 the appropriate declaration of one will need to be added back into
00058 functions that include the macro:
00059 */
00060 #if defined (INTEL_CXML)
00061         unsigned int one=1;
00062 #endif
00063 
00064 #ifdef CHAR_MACRO
00065 #undef CHAR_MACRO
00066 #endif
00067 #if defined (INTEL_CXML)
00068 #define CHAR_MACRO(char_var) &char_var, one
00069 #else
00070 #define CHAR_MACRO(char_var) &char_var
00071 #endif
00072 
00073 #include "Teuchos_ConfigDefs.hpp"
00074 #include "Teuchos_BLAS_wrappers.hpp"
00075 #include "Teuchos_BLAS_types.hpp"
00076 #include "Teuchos_ScalarTraits.hpp"
00077 #include "Teuchos_OrdinalTraits.hpp"
00078 
00112 namespace Teuchos
00113 {
00114   extern const char ESideChar[];
00115   extern const char ETranspChar[];
00116   extern const char EUploChar[];
00117   extern const char EDiagChar[];
00118 
00119   template<typename OrdinalType, typename ScalarType>
00120   class BLAS
00121   {    
00122 
00123     typedef typename Teuchos::ScalarTraits<ScalarType>::magnitudeType MagnitudeType;
00124     
00125   public:
00127 
00128     
00130     inline BLAS(void) {}
00131 
00133     inline BLAS(const BLAS<OrdinalType, ScalarType>& BLAS_source) {}
00134 
00136     inline virtual ~BLAS(void) {}
00138 
00140 
00141 
00143     void ROTG(ScalarType* da, ScalarType* db, MagnitudeType* c, ScalarType* s) const;
00144 
00146     void ROT(const OrdinalType n, ScalarType* dx, const OrdinalType incx, ScalarType* dy, const OrdinalType incy, MagnitudeType* c, ScalarType* s) const;
00147 
00149     void SCAL(const OrdinalType n, const ScalarType alpha, ScalarType* x, const OrdinalType incx) const;
00150 
00152     void COPY(const OrdinalType n, const ScalarType* x, const OrdinalType incx, ScalarType* y, const OrdinalType incy) const;
00153 
00155     void AXPY(const OrdinalType n, const ScalarType alpha, const ScalarType* x, const OrdinalType incx, ScalarType* y, const OrdinalType incy) const;
00156 
00158     typename ScalarTraits<ScalarType>::magnitudeType ASUM(const OrdinalType n, const ScalarType* x, const OrdinalType incx) const;
00159 
00161     ScalarType DOT(const OrdinalType n, const ScalarType* x, const OrdinalType incx, const ScalarType* y, const OrdinalType incy) const;
00162 
00164     typename ScalarTraits<ScalarType>::magnitudeType NRM2(const OrdinalType n, const ScalarType* x, const OrdinalType incx) const;
00165 
00167     OrdinalType IAMAX(const OrdinalType n, const ScalarType* x, const OrdinalType incx) const;
00168 
00170 
00172 
00173 
00175     void GEMV(ETransp trans, const OrdinalType m, const OrdinalType n, const ScalarType alpha, const ScalarType* A, 
00176         const OrdinalType lda, const ScalarType* x, const OrdinalType incx, const ScalarType beta, ScalarType* y, const OrdinalType incy) const;
00177 
00179     void TRMV(EUplo uplo, ETransp trans, EDiag diag, const OrdinalType n, const ScalarType* A, 
00180         const OrdinalType lda, ScalarType* x, const OrdinalType incx) const;
00181 
00183     void GER(const OrdinalType m, const OrdinalType n, const ScalarType alpha, const ScalarType* x, const OrdinalType incx, 
00184        const ScalarType* y, const OrdinalType incy, ScalarType* A, const OrdinalType lda) const;
00186     
00188 
00189 
00191     void GEMM(ETransp transa, ETransp transb, const OrdinalType m, const OrdinalType n, const OrdinalType k, const ScalarType alpha, const ScalarType* A, const OrdinalType lda, const ScalarType* B, const OrdinalType ldb, const ScalarType beta, ScalarType* C, const OrdinalType ldc) const;
00192 
00194     void SYMM(ESide side, EUplo uplo, const OrdinalType m, const OrdinalType n, const ScalarType alpha, const ScalarType* A, const OrdinalType lda, const ScalarType* B, const OrdinalType ldb, const ScalarType beta, ScalarType* C, const OrdinalType ldc) const;
00195 
00197     void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n,
00198                 const ScalarType alpha, const ScalarType* A, const OrdinalType lda, ScalarType* B, const OrdinalType ldb) const;
00199 
00201     void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n,
00202                 const ScalarType alpha, const ScalarType* A, const OrdinalType lda, ScalarType* B, const OrdinalType ldb) const;
00204   };
00205 
00206 //------------------------------------------------------------------------------------------
00207 //      LEVEL 1 BLAS ROUTINES  
00208 //------------------------------------------------------------------------------------------
00209     
00210   template<typename OrdinalType, typename ScalarType>
00211   void BLAS<OrdinalType, ScalarType>::ROTG(ScalarType* da, ScalarType* db, MagnitudeType* c, ScalarType* s) const
00212   {
00213     ScalarType roe, scale, r;
00214     ScalarType zero = ScalarTraits<ScalarType>::zero();
00215     ScalarType one = ScalarTraits<ScalarType>::one();
00216 
00217     if ( ScalarTraits<ScalarType>::magnitude( *da ) > ScalarTraits<ScalarType>::magnitude( *db ) ) { roe = *da; }
00218     scale = ScalarTraits<ScalarType>::magnitude( *da ) + ScalarTraits<ScalarType>::magnitude( *db );
00219     if ( scale == zero ) // There is nothing to do.
00220     {
00221       *c = one;
00222       *s = zero;
00223       *da = zero; *db = zero;
00224     } else { // Compute the Givens rotation.
00225       r = scale*ScalarTraits<ScalarType>::squareroot( ( *da/scale)*(*da/scale) + (*db/scale)*(*db/scale) );
00226       if ( roe < zero ) { r *= -one; }
00227       *c = *da / r;
00228       *s = *db / r;
00229       *db = ScalarTraits<ScalarType>::one();
00230       if( ScalarTraits<ScalarType>::magnitude( *da ) > ScalarTraits<ScalarType>::magnitude( *db ) ){ *db = *s; }
00231       if( ScalarTraits<ScalarType>::magnitude( *db ) >= ScalarTraits<ScalarType>::magnitude( *da ) &&
00232      *c != ScalarTraits<ScalarType>::zero() ) { *db = one / *c; }
00233       *da = r;
00234     }
00235   } /* end ROTG */
00236       
00237   template<typename OrdinalType, typename ScalarType>
00238   void BLAS<OrdinalType,ScalarType>::ROT(const OrdinalType n, ScalarType* dx, const OrdinalType incx, ScalarType* dy, const OrdinalType incy, MagnitudeType* c, ScalarType* s) const
00239   {
00240     // ToDo:  Implement this.
00241   }
00242 
00243   template<typename OrdinalType, typename ScalarType>
00244   void BLAS<OrdinalType, ScalarType>::SCAL(const OrdinalType n, const ScalarType alpha, ScalarType* x, const OrdinalType incx) const
00245   {
00246     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00247     OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00248     OrdinalType i, ix = izero;
00249     if ( n > izero ) {
00250         // Set the initial index (ix).
00251         if (incx < izero) { ix = (-n+ione)*incx; } 
00252         // Scale the std::vector.
00253         for(i = izero; i < n; i++)
00254         {
00255             x[ix] *= alpha;
00256             ix += incx;
00257         }
00258     }
00259   } /* end SCAL */
00260 
00261   template<typename OrdinalType, typename ScalarType>
00262   void BLAS<OrdinalType, ScalarType>::COPY(const OrdinalType n, const ScalarType* x, const OrdinalType incx, ScalarType* y, const OrdinalType incy) const
00263   {
00264     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00265     OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00266     OrdinalType i, ix = izero, iy = izero;
00267     if ( n > izero ) {
00268   // Set the initial indices (ix, iy).
00269       if (incx < izero) { ix = (-n+ione)*incx; }
00270       if (incy < izero) { iy = (-n+ione)*incy; }
00271 
00272         for(i = izero; i < n; i++)
00273           {
00274       y[iy] = x[ix];
00275       ix += incx;
00276       iy += incy;
00277           }
00278       }
00279   } /* end COPY */
00280 
00281   template<typename OrdinalType, typename ScalarType>
00282   void BLAS<OrdinalType, ScalarType>::AXPY(const OrdinalType n, const ScalarType alpha, const ScalarType* x, const OrdinalType incx, ScalarType* y, const OrdinalType incy) const
00283   {
00284     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00285     OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00286     OrdinalType i, ix = izero, iy = izero;
00287     if( n > izero && alpha != ScalarTraits<ScalarType>::zero())
00288       {
00289   // Set the initial indices (ix, iy).
00290       if (incx < izero) { ix = (-n+ione)*incx; }
00291       if (incy < izero) { iy = (-n+ione)*incy; }
00292 
00293         for(i = izero; i < n; i++)
00294           {
00295       y[iy] += alpha * x[ix];
00296       ix += incx;
00297       iy += incy;
00298           }
00299       }
00300   } /* end AXPY */
00301 
00302   template<typename OrdinalType, typename ScalarType>
00303   typename ScalarTraits<ScalarType>::magnitudeType BLAS<OrdinalType, ScalarType>::ASUM(const OrdinalType n, const ScalarType* x, const OrdinalType incx) const
00304   {
00305     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00306     OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00307     typename ScalarTraits<ScalarType>::magnitudeType result = 
00308       ScalarTraits<typename ScalarTraits<ScalarType>::magnitudeType>::zero();
00309     OrdinalType i, ix = izero;
00310     if( n > izero ) {
00311   // Set the initial indices
00312   if (incx < izero) { ix = (-n+ione)*incx; }
00313 
00314       for(i = izero; i < n; i++)
00315           {
00316       result += ScalarTraits<ScalarType>::magnitude(x[ix]);
00317       ix += incx;
00318           }
00319     } 
00320    return result;
00321   } /* end ASUM */
00322   
00323   template<typename OrdinalType, typename ScalarType>
00324   ScalarType BLAS<OrdinalType, ScalarType>::DOT(const OrdinalType n, const ScalarType* x, const OrdinalType incx, const ScalarType* y, const OrdinalType incy) const
00325   {
00326     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00327     OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00328     ScalarType result = ScalarTraits<ScalarType>::zero();
00329     OrdinalType i, ix = izero, iy = izero;
00330     if( n > izero ) 
00331       {
00332   // Set the initial indices (ix,iy).       
00333   if (incx < izero) { ix = (-n+ione)*incx; }
00334   if (incy < izero) { iy = (-n+ione)*incy; }
00335 
00336   for(i = izero; i < n; i++)
00337     {
00338       result += ScalarTraits<ScalarType>::conjugate(x[ix]) * y[iy];
00339       ix += incx;
00340       iy += incy;
00341     }
00342       }
00343     return result;
00344   } /* end DOT */
00345   
00346   template<typename OrdinalType, typename ScalarType>
00347   typename ScalarTraits<ScalarType>::magnitudeType BLAS<OrdinalType, ScalarType>::NRM2(const OrdinalType n, const ScalarType* x, const OrdinalType incx) const
00348   {
00349     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00350     OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00351     typename ScalarTraits<ScalarType>::magnitudeType result = 
00352       ScalarTraits<typename ScalarTraits<ScalarType>::magnitudeType>::zero();
00353     OrdinalType i, ix = izero;
00354     if ( n > izero ) 
00355       {
00356   // Set the initial index.
00357   if (incx < izero) { ix = (-n+ione)*incx; }  
00358     
00359   for(i = izero; i < n; i++)
00360           {
00361       result += ScalarTraits<ScalarType>::conjugate(x[ix]) * x[ix];
00362       ix += incx;
00363           }
00364       result = ScalarTraits<ScalarType>::squareroot(result);
00365       } 
00366     return result;
00367   } /* end NRM2 */
00368   
00369   template<typename OrdinalType, typename ScalarType>
00370   OrdinalType BLAS<OrdinalType, ScalarType>::IAMAX(const OrdinalType n, const ScalarType* x, const OrdinalType incx) const
00371   {
00372     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00373     OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00374     OrdinalType result = izero, ix = izero, i;
00375     ScalarType maxval;
00376 
00377     if ( n > izero ) 
00378       {
00379   if (incx < izero) { ix = (-n+ione)*incx; }
00380   maxval = ScalarTraits<ScalarType>::magnitude(x[ix]);
00381   ix += incx;
00382       for(i = ione; i < n; i++)
00383           {
00384       if(ScalarTraits<ScalarType>::magnitude(x[ix]) > maxval)
00385         {
00386         result = i;
00387           maxval = ScalarTraits<ScalarType>::magnitude(x[ix]);
00388         }
00389       ix += incx;
00390     }
00391       }
00392     return result + 1; // the BLAS I?AMAX functions return 1-indexed (Fortran-style) values
00393   } /* end IAMAX */
00394 
00395 //------------------------------------------------------------------------------------------
00396 //      LEVEL 2 BLAS ROUTINES
00397 //------------------------------------------------------------------------------------------
00398 
00399   template<typename OrdinalType, typename ScalarType>
00400   void BLAS<OrdinalType, ScalarType>::GEMV(ETransp trans, const OrdinalType m, const OrdinalType n, const ScalarType alpha, const ScalarType* A, const OrdinalType lda, const ScalarType* x, const OrdinalType incx, const ScalarType beta, ScalarType* y, const OrdinalType incy) const
00401   {
00402     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00403     OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00404     ScalarType zero = ScalarTraits<ScalarType>::zero();
00405     ScalarType one = ScalarTraits<ScalarType>::one();
00406     bool BadArgument = false;
00407 
00408     // Quick return if there is nothing to do!
00409     if( m == izero || n == izero || ( alpha == zero && beta == one ) ){ return; }
00410     
00411     // Otherwise, we need to check the argument list.
00412     if( m < izero ) { 
00413   std::cout << "BLAS::GEMV Error: M == " << m << std::endl;     
00414   BadArgument = true;
00415     }
00416     if( n < izero ) { 
00417   std::cout << "BLAS::GEMV Error: N == " << n << std::endl;     
00418   BadArgument = true;
00419     }
00420     if( lda < m ) { 
00421   std::cout << "BLAS::GEMV Error: LDA < MAX(1,M)"<< std::endl;      
00422   BadArgument = true;
00423     }
00424     if( incx == izero ) {
00425   std::cout << "BLAS::GEMV Error: INCX == 0"<< std::endl;
00426   BadArgument = true;
00427     }
00428     if( incy == izero ) {
00429   std::cout << "BLAS::GEMV Error: INCY == 0"<< std::endl;
00430   BadArgument = true;
00431     }
00432 
00433     if(!BadArgument) {
00434       OrdinalType i, j, lenx, leny, ix, iy, jx, jy; 
00435       OrdinalType kx = izero, ky = izero;
00436       ScalarType temp;
00437 
00438       // Determine the lengths of the vectors x and y.
00439       if(ETranspChar[trans] == 'N') {
00440   lenx = n;
00441   leny = m;
00442       } else {
00443   lenx = m;
00444   leny = n;
00445       }
00446 
00447       // Set the starting pointers for the vectors x and y if incx/y < 0.
00448       if (incx < izero ) { kx =  (ione - lenx)*incx; }
00449       if (incy < izero ) { ky =  (ione - leny)*incy; }
00450 
00451       // Form y = beta*y
00452       ix = kx; iy = ky;
00453       if(beta != one) {
00454   if (incy == ione) {
00455     if (beta == zero) {
00456       for(i = izero; i < leny; i++) { y[i] = zero; }
00457     } else {
00458       for(i = izero; i < leny; i++) { y[i] *= beta; }
00459     }
00460   } else {
00461     if (beta == zero) {
00462       for(i = izero; i < leny; i++) {
00463         y[iy] = zero;
00464         iy += incy;
00465       }
00466     } else {
00467       for(i = izero; i < leny; i++) {
00468         y[iy] *= beta;
00469         iy += incy;
00470       }
00471     }
00472   }
00473       }
00474   
00475       // Return if we don't have to do anything more.
00476       if(alpha == zero) { return; }
00477 
00478       if( ETranspChar[trans] == 'N' ) {
00479   // Form y = alpha*A*y
00480   jx = kx;
00481   if (incy == ione) {
00482     for(j = izero; j < n; j++) {
00483       if (x[jx] != zero) {
00484         temp = alpha*x[jx];
00485         for(i = izero; i < m; i++) {
00486     y[i] += temp*A[j*lda + i];
00487         }
00488       }
00489       jx += incx;
00490     }
00491   } else {
00492     for(j = izero; j < n; j++) {
00493       if (x[jx] != zero) {
00494         temp = alpha*x[jx];
00495         iy = ky;
00496         for(i = izero; i < m; i++) {
00497     y[iy] += temp*A[j*lda + i];
00498     iy += incy;
00499         }
00500       }
00501       jx += incx;
00502     }
00503   }
00504       } else {
00505   jy = ky;
00506   if (incx == ione) {
00507     for(j = izero; j < n; j++) {
00508       temp = zero;
00509       for(i = izero; i < m; i++) {
00510         temp += A[j*lda + i]*x[i];
00511       }
00512       y[jy] += alpha*temp;
00513       jy += incy;
00514     }
00515   } else {
00516     for(j = izero; j < n; j++) {
00517       temp = zero;
00518       ix = kx;
00519       for (i = izero; i < m; i++) {
00520         temp += A[j*lda + i]*x[ix];
00521         ix += incx;
00522       }
00523       y[jy] += alpha*temp;
00524       jy += incy;
00525     }
00526   }
00527       }
00528     } /* if (!BadArgument) */
00529   } /* end GEMV */
00530 
00531  template<typename OrdinalType, typename ScalarType>
00532  void BLAS<OrdinalType, ScalarType>::TRMV(EUplo uplo, ETransp trans, EDiag diag, const OrdinalType n, const ScalarType* A, const OrdinalType lda, ScalarType* x, const OrdinalType incx) const
00533   {
00534     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00535     OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00536     ScalarType zero = ScalarTraits<ScalarType>::zero();
00537     ScalarType one = ScalarTraits<ScalarType>::one();
00538     bool BadArgument = false;
00539 
00540     // Quick return if there is nothing to do!
00541     if( n == izero ){ return; }
00542     
00543     // Otherwise, we need to check the argument list.
00544     if( n < izero ) { 
00545       std::cout << "BLAS::TRMV Error: N == " << n << std::endl;     
00546       BadArgument = true;
00547     }
00548     if( lda < n ) { 
00549       std::cout << "BLAS::TRMV Error: LDA < MAX(1,N)"<< std::endl;      
00550       BadArgument = true;
00551     }
00552     if( incx == izero ) {
00553       std::cout << "BLAS::TRMV Error: INCX == 0"<< std::endl;
00554       BadArgument = true;
00555     }
00556 
00557     if(!BadArgument) {
00558       OrdinalType i, j, ix, jx, kx = izero;
00559       ScalarType temp;
00560       bool NoUnit = (EDiagChar[diag] == 'N');
00561 
00562       // Set the starting pointer for the std::vector x if incx < 0.
00563       if (incx < izero) { kx = (-n+ione)*incx; }
00564 
00565       // Start the operations for a nontransposed triangular matrix 
00566       if (ETranspChar[trans] == 'N') {
00567   /* Compute x = A*x */
00568   if (EUploChar[uplo] == 'U') {
00569     /* A is an upper triangular matrix */
00570     if (incx == ione) {
00571       for (j=izero; j<n; j++) {
00572         if (x[j] != zero) {
00573     temp = x[j];
00574     for (i=izero; i<j; i++) {
00575       x[i] += temp*A[j*lda + i];
00576     }
00577     if (NoUnit) 
00578       x[j] *= A[j*lda + j];
00579         }
00580       }
00581     } else {
00582       jx = kx;
00583       for (j=izero; j<n; j++) {
00584         if (x[jx] != zero) {
00585     temp = x[jx];
00586     ix = kx;
00587     for (i=izero; i<j; i++) {
00588       x[ix] += temp*A[j*lda + i];
00589       ix += incx;
00590     }
00591     if (NoUnit)
00592       x[jx] *= A[j*lda + j];
00593         }
00594         jx += incx;
00595       }
00596     } /* if (incx == ione) */
00597   } else { /* A is a lower triangular matrix */
00598     if (incx == ione) {
00599       for (j=n-ione; j>-ione; j--) {
00600         if (x[j] != zero) {
00601     temp = x[j];
00602     for (i=n-ione; i>j; i--) {
00603       x[i] += temp*A[j*lda + i];
00604     }
00605     if (NoUnit)
00606       x[j] *= A[j*lda + j];
00607         }
00608       }
00609     } else {
00610       kx += (n-ione)*incx;
00611       jx = kx;
00612       for (j=n-ione; j>-ione; j--) {
00613         if (x[jx] != zero) {
00614     temp = x[jx];
00615     ix = kx;
00616     for (i=n-ione; i>j; i--) {
00617       x[ix] += temp*A[j*lda + i];
00618       ix -= incx;
00619     }
00620     if (NoUnit) 
00621       x[jx] *= A[j*lda + j];
00622         }
00623         jx -= incx;
00624       }
00625     }
00626   } /* if (EUploChar[uplo]=='U') */
00627       } else { /* A is transposed/conjugated */
00628   /* Compute x = A'*x */
00629   if (EUploChar[uplo]=='U') {
00630     /* A is an upper triangular matrix */
00631     if (incx == ione) {
00632       for (j=n-ione; j>-ione; j--) {
00633         temp = x[j];
00634         if (NoUnit)
00635     temp *= A[j*lda + j];
00636         for (i=j-ione; i>-ione; i--) {
00637     temp += A[j*lda + i]*x[i];
00638         }
00639         x[j] = temp;
00640       }
00641     } else {
00642       jx = kx + (n-ione)*incx;
00643       for (j=n-ione; j>-ione; j--) {
00644         temp = x[jx];
00645         ix = jx;
00646         if (NoUnit)
00647     temp *= A[j*lda + j];
00648         for (i=j-ione; i>-ione; i--) {
00649     ix -= incx;
00650     temp += A[j*lda + i]*x[ix];
00651         }
00652         x[jx] = temp;
00653         jx -= incx;
00654       }
00655     }
00656   } else {
00657     /* A is a lower triangular matrix */
00658     if (incx == ione) {
00659       for (j=izero; j<n; j++) {
00660         temp = x[j];
00661         if (NoUnit)
00662     temp *= A[j*lda + j];
00663         for (i=j+ione; i<n; i++) {
00664     temp += A[j*lda + i]*x[i];
00665         }
00666         x[j] = temp;
00667       }
00668     } else {
00669       jx = kx;
00670       for (j=izero; j<n; j++) {
00671         temp = x[jx];
00672         ix = jx;
00673         if (NoUnit) 
00674     temp *= A[j*lda + j];
00675         for (i=j+ione; i<n; i++) {
00676     ix += incx;
00677     temp += A[j*lda + i]*x[ix];
00678         }
00679         x[jx] = temp;
00680         jx += incx;       
00681       }
00682     }
00683   } /* if (EUploChar[uplo]=='U') */
00684       } /* if (ETranspChar[trans]=='N') */
00685     } /* if (!BadArgument) */
00686   } /* end TRMV */
00687         
00688   template<typename OrdinalType, typename ScalarType>
00689   void BLAS<OrdinalType, ScalarType>::GER(const OrdinalType m, const OrdinalType n, const ScalarType alpha, const ScalarType* x, const OrdinalType incx, const ScalarType* y, const OrdinalType incy, ScalarType* A, const OrdinalType lda) const
00690   {
00691     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00692     OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00693     ScalarType zero = ScalarTraits<ScalarType>::zero();
00694     ScalarType one = ScalarTraits<ScalarType>::one();
00695     bool BadArgument = false;
00696 
00697     // Quick return if there is nothing to do!
00698     if( m == izero || n == izero || alpha == zero ){ return; }
00699     
00700     // Otherwise, we need to check the argument list.
00701     if( m < izero ) { 
00702   std::cout << "BLAS::GER Error: M == " << m << std::endl;      
00703   BadArgument = true;
00704     }
00705     if( n < izero ) { 
00706   std::cout << "BLAS::GER Error: N == " << n << std::endl;      
00707   BadArgument = true;
00708     }
00709     if( lda < m ) { 
00710   std::cout << "BLAS::GER Error: LDA < MAX(1,M)"<< std::endl;     
00711   BadArgument = true;
00712     }
00713     if( incx == 0 ) {
00714   std::cout << "BLAS::GER Error: INCX == 0"<< std::endl;
00715   BadArgument = true;
00716     }
00717     if( incy == 0 ) {
00718   std::cout << "BLAS::GER Error: INCY == 0"<< std::endl;
00719   BadArgument = true;
00720     }
00721 
00722     if(!BadArgument) {
00723       OrdinalType i, j, ix, jy = izero, kx = izero;
00724       ScalarType temp;
00725 
00726       // Set the starting pointers for the vectors x and y if incx/y < 0.
00727       if (incx < izero) { kx = (-m+ione)*incx; }
00728       if (incy < izero) { jy = (-n+ione)*incy; }
00729 
00730       // Start the operations for incx == 1
00731       if( incx == ione ) {
00732   for( j=izero; j<n; j++ ) {
00733     if ( y[jy] != zero ) {
00734       temp = alpha*y[jy];
00735       for ( i=izero; i<m; i++ ) {
00736         A[j*lda + i] += x[i]*temp;
00737       }
00738     }
00739     jy += incy;
00740   }
00741       } 
00742       else { // Start the operations for incx != 1
00743   for( j=izero; j<n; j++ ) {
00744     if ( y[jy] != zero ) {
00745       temp = alpha*y[jy];
00746       ix = kx;
00747       for( i=izero; i<m; i++ ) {
00748         A[j*lda + i] += x[ix]*temp;
00749         ix += incx;
00750       }
00751     }
00752     jy += incy;
00753   }
00754       }
00755     } /* if(!BadArgument) */
00756   } /* end GER */
00757   
00758 //------------------------------------------------------------------------------------------
00759 //      LEVEL 3 BLAS ROUTINES
00760 //------------------------------------------------------------------------------------------
00761         
00762   template<typename OrdinalType, typename ScalarType>
00763   void BLAS<OrdinalType, ScalarType>::GEMM(ETransp transa, ETransp transb, const OrdinalType m, const OrdinalType n, const OrdinalType k, const ScalarType alpha, const ScalarType* A, const OrdinalType lda, const ScalarType* B, const OrdinalType ldb, const ScalarType beta, ScalarType* C, const OrdinalType ldc) const
00764   {
00765     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00766     ScalarType zero = ScalarTraits<ScalarType>::zero();
00767     ScalarType one = ScalarTraits<ScalarType>::one();
00768     OrdinalType i, j, p;
00769     OrdinalType NRowA = m, NRowB = k;
00770     ScalarType temp;
00771     bool BadArgument = false;
00772 
00773     // Change dimensions of matrix if either matrix is transposed
00774     if( !(ETranspChar[transa]=='N') ) {
00775       NRowA = k;
00776     }
00777     if( !(ETranspChar[transb]=='N') ) {
00778       NRowB = n;
00779     }
00780 
00781     // Quick return if there is nothing to do!
00782     if( (m==izero) || (n==izero) || (((alpha==zero)||(k==izero)) && (beta==one)) ){ return; }
00783     if( m < izero ) { 
00784       std::cout << "BLAS::GEMM Error: M == " << m << std::endl;     
00785       BadArgument = true;
00786     }
00787     if( n < izero ) { 
00788       std::cout << "BLAS::GEMM Error: N == " << n << std::endl;     
00789       BadArgument = true;
00790     }
00791     if( k < izero ) { 
00792       std::cout << "BLAS::GEMM Error: K == " << k << std::endl;     
00793       BadArgument = true;
00794     }
00795     if( lda < NRowA ) { 
00796       std::cout << "BLAS::GEMM Error: LDA < MAX(1,M)"<< std::endl;      
00797       BadArgument = true;
00798     }
00799     if( ldb < NRowB ) { 
00800       std::cout << "BLAS::GEMM Error: LDB < MAX(1,K)"<< std::endl;      
00801       BadArgument = true;
00802     }
00803      if( ldc < m ) { 
00804       std::cout << "BLAS::GEMM Error: LDC < MAX(1,M)"<< std::endl;      
00805       BadArgument = true;
00806     }
00807 
00808     if(!BadArgument) {
00809 
00810       // Only need to scale the resulting matrix C.
00811       if( alpha == zero ) {
00812   if( beta == zero ) {
00813     for (j=izero; j<n; j++) {
00814       for (i=izero; i<m; i++) {
00815         C[j*ldc + i] = zero;
00816       }
00817     }
00818   } else {
00819     for (j=izero; j<n; j++) {
00820       for (i=izero; i<m; i++) {
00821         C[j*ldc + i] *= beta;
00822       }
00823     }
00824   }
00825   return;
00826       }
00827       //
00828       // Now start the operations.
00829       //
00830       if ( ETranspChar[transb]=='N' ) {
00831   if ( ETranspChar[transa]=='N' ) {
00832     // Compute C = alpha*A*B + beta*C
00833     for (j=izero; j<n; j++) {
00834       if( beta == zero ) {
00835         for (i=izero; i<m; i++){
00836     C[j*ldc + i] = zero;
00837         }
00838       } else if( beta != one ) {
00839         for (i=izero; i<m; i++){
00840     C[j*ldc + i] *= beta;
00841         }
00842       }
00843       for (p=izero; p<k; p++){
00844         if (B[j*ldb + p] != zero ){
00845     temp = alpha*B[j*ldb + p];
00846     for (i=izero; i<m; i++) {
00847       C[j*ldc + i] += temp*A[p*lda + i];
00848     }
00849         }
00850       }
00851     }
00852   } else {
00853     // Compute C = alpha*A'*B + beta*C
00854     for (j=izero; j<n; j++) {
00855       for (i=izero; i<m; i++) {
00856         temp = zero;
00857         for (p=izero; p<k; p++) {
00858     temp += A[i*lda + p]*B[j*ldb + p];
00859         }
00860         if (beta == zero) {
00861     C[j*ldc + i] = alpha*temp;
00862         } else {
00863     C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
00864         }
00865       }
00866     }
00867   }
00868       } else {
00869   if ( ETranspChar[transa]=='N' ) {
00870     // Compute C = alpha*A*B' + beta*C
00871     for (j=izero; j<n; j++) {
00872       if (beta == zero) {
00873         for (i=izero; i<m; i++) {
00874     C[j*ldc + i] = zero;
00875         } 
00876       } else if ( beta != one ) {
00877         for (i=izero; i<m; i++) {
00878     C[j*ldc + i] *= beta;
00879         }
00880       }
00881       for (p=izero; p<k; p++) {
00882         if (B[p*ldb + j] != zero) {
00883     temp = alpha*B[p*ldb + j];
00884     for (i=izero; i<m; i++) {
00885       C[j*ldc + i] += temp*A[p*lda + i];
00886     }
00887         }
00888       }
00889     }
00890   } else {
00891     // Compute C += alpha*A'*B' + beta*C
00892     for (j=izero; j<n; j++) {
00893       for (i=izero; i<m; i++) {
00894         temp = zero;
00895         for (p=izero; p<k; p++) {
00896     temp += A[i*lda + p]*B[p*ldb + j];
00897         }
00898         if (beta == zero) {
00899     C[j*ldc + i] = alpha*temp;
00900         } else {
00901     C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
00902         }
00903       }
00904     }
00905   } // end if (ETranspChar[transa]=='N') ...
00906       } // end if (ETranspChar[transb]=='N') ...
00907     } // end if (!BadArgument) ...
00908   } // end of GEMM
00909 
00910 
00911   template<typename OrdinalType, typename ScalarType>
00912   void BLAS<OrdinalType, ScalarType>::SYMM(ESide side, EUplo uplo, const OrdinalType m, const OrdinalType n, const ScalarType alpha, const ScalarType* A, const OrdinalType lda, const ScalarType* B, const OrdinalType ldb, const ScalarType beta, ScalarType* C, const OrdinalType ldc) const
00913   {
00914     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00915     OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00916     ScalarType zero = ScalarTraits<ScalarType>::zero();
00917     ScalarType one = ScalarTraits<ScalarType>::one();
00918     OrdinalType i, j, k, NRowA = m;
00919     ScalarType temp1, temp2;
00920     bool BadArgument = false;
00921     bool Upper = (EUploChar[uplo] == 'U');
00922     if (ESideChar[side] == 'R') { NRowA = n; }
00923     
00924     // Quick return.
00925     if ( (m==izero) || (n==izero) || ( (alpha==zero)&&(beta==one) ) ) { return; }
00926     if( m < 0 ) { 
00927       std::cout << "BLAS::SYMM Error: M == "<< m << std::endl;
00928       BadArgument = true; }
00929     if( n < 0 ) {
00930       std::cout << "BLAS::SYMM Error: N == "<< n << std::endl;
00931       BadArgument = true; }
00932     if( lda < NRowA ) {
00933       std::cout << "BLAS::SYMM Error: LDA == "<<lda<<std::endl;
00934       BadArgument = true; }
00935     if( ldb < m ) {
00936       std::cout << "BLAS::SYMM Error: LDB == "<<ldb<<std::endl;
00937       BadArgument = true; }
00938     if( ldc < m ) {
00939       std::cout << "BLAS::SYMM Error: LDC == "<<ldc<<std::endl;
00940       BadArgument = true; }
00941 
00942     if(!BadArgument) {
00943 
00944       // Only need to scale C and return.
00945       if (alpha == zero) {
00946   if (beta == zero ) {
00947     for (j=izero; j<n; j++) {
00948       for (i=izero; i<m; i++) {
00949         C[j*ldc + i] = zero;
00950       }
00951     }
00952   } else {
00953     for (j=izero; j<n; j++) {
00954       for (i=izero; i<m; i++) {
00955         C[j*ldc + i] *= beta;
00956       }
00957     }
00958   }
00959   return;
00960       }
00961 
00962       if ( ESideChar[side] == 'L') {
00963   // Compute C = alpha*A*B + beta*C
00964 
00965   if (Upper) {
00966     // The symmetric part of A is stored in the upper triangular part of the matrix.
00967     for (j=izero; j<n; j++) {
00968       for (i=izero; i<m; i++) {
00969         temp1 = alpha*B[j*ldb + i];
00970         temp2 = zero;
00971         for (k=izero; k<i; k++) {
00972     C[j*ldc + k] += temp1*A[i*lda + k];
00973     temp2 += B[j*ldb + k]*A[i*lda + k];
00974         }
00975         if (beta == zero) {
00976     C[j*ldc + i] = temp1*A[i*lda + i] + alpha*temp2;
00977         } else {
00978     C[j*ldc + i] = beta*C[j*ldc + i] + temp1*A[i*lda + i] + alpha*temp2;
00979         }
00980       }
00981     }
00982   } else {
00983     // The symmetric part of A is stored in the lower triangular part of the matrix.
00984     for (j=izero; j<n; j++) {
00985       for (i=m-ione; i>-ione; i--) {
00986         temp1 = alpha*B[j*ldb + i];
00987         temp2 = zero;
00988         for (k=i+ione; k<m; k++) {
00989     C[j*ldc + k] += temp1*A[i*lda + k];
00990     temp2 += B[j*ldb + k]*A[i*lda + k];
00991         }
00992         if (beta == zero) {
00993     C[j*ldc + i] = temp1*A[i*lda + i] + alpha*temp2;
00994         } else {
00995     C[j*ldc + i] = beta*C[j*ldc + i] + temp1*A[i*lda + i] + alpha*temp2;
00996         }
00997       }
00998     }
00999   }
01000       } else {
01001   // Compute C = alpha*B*A + beta*C.
01002   for (j=izero; j<n; j++) {
01003     temp1 = alpha*A[j*lda + j];
01004     if (beta == zero) {
01005       for (i=izero; i<m; i++) {
01006         C[j*ldc + i] = temp1*B[j*ldb + i];
01007       }
01008     } else {
01009       for (i=izero; i<m; i++) {
01010         C[j*ldc + i] = beta*C[j*ldc + i] + temp1*B[j*ldb + i];
01011       }
01012     }
01013     for (k=izero; k<j; k++) {
01014       if (Upper) {
01015         temp1 = alpha*A[j*lda + k];
01016       } else {
01017         temp1 = alpha*A[k*lda + j];
01018       }
01019       for (i=izero; i<m; i++) {
01020         C[j*ldc + i] += temp1*B[k*ldb + i];
01021       }
01022     }
01023     for (k=j+ione; k<n; k++) {
01024       if (Upper) {
01025         temp1 = alpha*A[k*lda + j];
01026       } else {
01027         temp1 = alpha*A[j*lda + k];
01028       }
01029       for (i=izero; i<m; i++) {
01030         C[j*ldc + i] += temp1*B[k*ldb + i];
01031       }
01032     }
01033   }
01034       } // end if (ESideChar[side]=='L') ...
01035     } // end if(!BadArgument) ...
01036 } // end SYMM
01037   
01038   template<typename OrdinalType, typename ScalarType>
01039   void BLAS<OrdinalType, ScalarType>::TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, const ScalarType alpha, const ScalarType* A, const OrdinalType lda, ScalarType* B, const OrdinalType ldb) const
01040   {
01041     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
01042     OrdinalType ione = OrdinalTraits<OrdinalType>::one();
01043     ScalarType zero = ScalarTraits<ScalarType>::zero();
01044     ScalarType one = ScalarTraits<ScalarType>::one();
01045     OrdinalType i, j, k, NRowA = m;
01046     ScalarType temp;
01047     bool BadArgument = false;
01048     bool LSide = (ESideChar[side] == 'L');
01049     bool NoUnit = (EDiagChar[diag] == 'N');
01050     bool Upper = (EUploChar[uplo] == 'U');
01051 
01052     if(!LSide) { NRowA = n; }
01053 
01054     // Quick return.
01055     if (n==izero || m==izero) { return; }
01056     if( m < 0 ) {
01057       std::cout << "BLAS::TRMM Error: M == "<< m <<std::endl;
01058       BadArgument = true; }
01059     if( n < 0 ) {
01060       std::cout << "BLAS::TRMM Error: N == "<< n <<std::endl;
01061       BadArgument = true; }
01062     if( lda < NRowA ) {
01063       std::cout << "BLAS::TRMM Error: LDA == "<< lda << std::endl;
01064       BadArgument = true; }
01065     if( ldb < m ) {
01066       std::cout << "BLAS::TRMM Error: M == "<< ldb << std::endl;
01067       BadArgument = true; }
01068 
01069     if(!BadArgument) {
01070 
01071       // B only needs to be zeroed out.
01072       if( alpha == zero ) {
01073   for( j=izero; j<n; j++ ) {
01074     for( i=izero; i<m; i++ ) {
01075       B[j*ldb + i] = zero;
01076     }
01077   }
01078   return;
01079       }
01080       
01081       //  Start the computations. 
01082       if ( LSide ) {
01083   // A is on the left side of B.
01084   
01085   if ( ETranspChar[transa]=='N' ) {
01086     // Compute B = alpha*A*B
01087 
01088     if ( Upper ) {
01089       // A is upper triangular
01090       for( j=izero; j<n; j++ ) {
01091         for( k=izero; k<m; k++) {
01092     if ( B[j*ldb + k] != zero ) {
01093       temp = alpha*B[j*ldb + k];
01094       for( i=izero; i<k; i++ ) {
01095         B[j*ldb + i] += temp*A[k*lda + i];
01096       }
01097       if ( NoUnit )
01098         temp *=A[k*lda + k];
01099       B[j*ldb + k] = temp;
01100     }
01101         }
01102       }
01103     } else {
01104       // A is lower triangular
01105       for( j=izero; j<n; j++ ) {
01106         for( k=m-ione; k>-ione; k-- ) {
01107     if( B[j*ldb + k] != zero ) {
01108       temp = alpha*B[j*ldb + k];
01109       B[j*ldb + k] = temp;
01110       if ( NoUnit )
01111         B[j*ldb + k] *= A[k*lda + k];
01112       for( i=k+ione; i<m; i++ ) {
01113         B[j*ldb + i] += temp*A[k*lda + i];
01114       }
01115     }
01116         }
01117       }
01118     }
01119   } else {
01120     // Compute B = alpha*A'*B
01121     if( Upper ) {
01122       for( j=izero; j<n; j++ ) {
01123         for( i=m-ione; i>-ione; i-- ) {
01124     temp = B[j*ldb + i];
01125     if( NoUnit )
01126       temp *= A[i*lda + i];
01127     for( k=izero; k<i; k++ ) {
01128       temp += A[i*lda + k]*B[j*ldb + k];
01129     }
01130     B[j*ldb + i] = alpha*temp;
01131         }
01132       }
01133     } else {
01134       for( j=izero; j<n; j++ ) {
01135         for( i=izero; i<m; i++ ) {
01136     temp = B[j*ldb + i];
01137     if( NoUnit ) 
01138       temp *= A[i*lda + i];
01139     for( k=i+ione; k<m; k++ ) {
01140       temp += A[i*lda + k]*B[j*ldb + k];
01141     }
01142     B[j*ldb + i] = alpha*temp;
01143         }
01144       }
01145     }
01146   }
01147       } else {
01148   // A is on the right hand side of B.
01149   
01150   if( ETranspChar[transa] == 'N' ) {
01151     // Compute B = alpha*B*A
01152 
01153     if( Upper ) {
01154       // A is upper triangular.
01155       for( j=n-ione; j>-ione; j-- ) {
01156         temp = alpha;
01157         if( NoUnit )
01158     temp *= A[j*lda + j];
01159         for( i=izero; i<m; i++ ) {
01160     B[j*ldb + i] *= temp;
01161         }
01162         for( k=izero; k<j; k++ ) {
01163     if( A[j*lda + k] != zero ) {
01164       temp = alpha*A[j*lda + k];
01165       for( i=izero; i<m; i++ ) {
01166         B[j*ldb + i] += temp*B[k*ldb + i];
01167       }
01168     }
01169         }
01170       }
01171     } else {
01172       // A is lower triangular.
01173       for( j=izero; j<n; j++ ) {
01174         temp = alpha;
01175         if( NoUnit )
01176     temp *= A[j*lda + j];
01177         for( i=izero; i<m; i++ ) {
01178     B[j*ldb + i] *= temp;
01179         }
01180         for( k=j+ione; k<n; k++ ) {
01181     if( A[j*lda + k] != zero ) {
01182       temp = alpha*A[j*lda + k];
01183       for( i=izero; i<m; i++ ) {
01184         B[j*ldb + i] += temp*B[k*ldb + i];
01185       }
01186     }
01187         }
01188       }
01189     }
01190   } else {
01191     // Compute B = alpha*B*A'
01192 
01193     if( Upper ) {
01194       for( k=izero; k<n; k++ ) {
01195         for( j=izero; j<k; j++ ) {
01196     if( A[k*lda + j] != zero ) {
01197       temp = alpha*A[k*lda + j];
01198       for( i=izero; i<m; i++ ) {
01199         B[j*ldb + i] += temp*B[k*ldb + i];
01200       }
01201     }
01202         }
01203         temp = alpha;
01204         if( NoUnit ) 
01205     temp *= A[k*lda + k];
01206         if( temp != one ) {
01207     for( i=izero; i<m; i++ ) {
01208       B[k*ldb + i] *= temp;
01209     }
01210         }
01211       }
01212     } else {
01213       for( k=n-ione; k>-ione; k-- ) {
01214         for( j=k+ione; j<n; j++ ) {
01215     if( A[k*lda + j] != zero ) {
01216       temp = alpha*A[k*lda + j];
01217       for( i=izero; i<m; i++ ) {
01218         B[j*ldb + i] += temp*B[k*ldb + i];
01219       }
01220     }
01221         }
01222         temp = alpha;
01223         if( NoUnit )
01224     temp *= A[k*lda + k];
01225         if( temp != one ) {
01226     for( i=izero; i<m; i++ ) {
01227       B[k*ldb + i] *= temp;
01228     }
01229         }
01230       }
01231     }
01232   } // end if( ETranspChar[transa] == 'N' ) ...
01233       } // end if ( LSide ) ...
01234     } // end if (!BadArgument)
01235   } // end TRMM
01236   
01237   template<typename OrdinalType, typename ScalarType>
01238   void BLAS<OrdinalType, ScalarType>::TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, const ScalarType alpha, const ScalarType* A, const OrdinalType lda, ScalarType* B, const OrdinalType ldb) const
01239   {
01240     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
01241     OrdinalType ione = OrdinalTraits<OrdinalType>::one();
01242     ScalarType zero = ScalarTraits<ScalarType>::zero();
01243     ScalarType one = ScalarTraits<ScalarType>::one();
01244     ScalarType temp;
01245     OrdinalType NRowA = m;
01246     bool BadArgument = false;
01247     bool NoUnit = (EDiagChar[diag]=='N');
01248     
01249     if (!(ESideChar[side] == 'L')) { NRowA = n; }
01250 
01251     // Quick return.
01252     if (n == izero || m == izero) { return; }
01253     if( m < izero ) {
01254       std::cout << "BLAS::TRSM Error: M == "<<m<<std::endl;
01255       BadArgument = true; }
01256     if( n < izero ) {
01257       std::cout << "BLAS::TRSM Error: N == "<<n<<std::endl;
01258       BadArgument = true; }
01259     if( lda < NRowA ) {
01260       std::cout << "BLAS::TRSM Error: LDA == "<<lda<<std::endl;
01261       BadArgument = true; }
01262     if( ldb < m ) {
01263       std::cout << "BLAS::TRSM Error: LDB == "<<ldb<<std::endl;
01264       BadArgument = true; }
01265 
01266     if(!BadArgument)
01267       {
01268   int i, j, k;
01269   // Set the solution to the zero std::vector.
01270   if(alpha == zero) {
01271       for(j = izero; j < n; j++) {
01272         for( i = izero; i < m; i++) {
01273         B[j*ldb+i] = zero;
01274           }
01275       }
01276   }
01277   else 
01278   { // Start the operations.
01279       if(ESideChar[side] == 'L') {
01280     //
01281         // Perform computations for OP(A)*X = alpha*B     
01282     //
01283     if(ETranspChar[transa] == 'N') {
01284         //
01285         //  Compute B = alpha*inv( A )*B
01286         //
01287         if(EUploChar[uplo] == 'U') { 
01288       // A is upper triangular.
01289       for(j = izero; j < n; j++) {
01290               // Perform alpha*B if alpha is not 1.
01291               if(alpha != one) {
01292                 for( i = izero; i < m; i++) {
01293                 B[j*ldb+i] *= alpha;
01294             }
01295           }
01296           // Perform a backsolve for column j of B.
01297           for(k = (m - ione); k > -ione; k--) {
01298         // If this entry is zero, we don't have to do anything.
01299         if (B[j*ldb + k] != zero) {
01300             if (NoUnit) {
01301           B[j*ldb + k] /= A[k*lda + k];
01302             }
01303             for(i = izero; i < k; i++) {
01304           B[j*ldb + i] -= B[j*ldb + k] * A[k*lda + i];
01305             }
01306         }
01307           }
01308       }
01309         }
01310         else 
01311         { // A is lower triangular.
01312                         for(j = izero; j < n; j++) {
01313                             // Perform alpha*B if alpha is not 1.
01314                             if(alpha != one) {
01315                                 for( i = izero; i < m; i++) {
01316                                     B[j*ldb+i] *= alpha;
01317                                 }
01318                             }
01319                             // Perform a forward solve for column j of B.
01320                             for(k = izero; k < m; k++) {
01321                                 // If this entry is zero, we don't have to do anything.
01322                                 if (B[j*ldb + k] != zero) {   
01323                                     if (NoUnit) {
01324                                         B[j*ldb + k] /= A[k*lda + k];
01325                                     }
01326                                     for(i = k+ione; i < m; i++) {
01327                                         B[j*ldb + i] -= B[j*ldb + k] * A[k*lda + i];
01328                                     }
01329                                 }
01330                             }
01331                         }
01332         } // end if (uplo == 'U')
01333     }  // if (transa =='N') 
01334         else { 
01335         //
01336         //  Compute B = alpha*inv( A' )*B
01337         //
01338         if(EUploChar[uplo] == 'U') { 
01339       // A is upper triangular.
01340       for(j = izero; j < n; j++) {
01341                   for( i = izero; i < m; i++) {
01342             temp = alpha*B[j*ldb+i];
01343             for(k = izero; k < i; k++) {
01344             temp -= A[i*lda + k] * B[j*ldb + k];
01345         }
01346         if (NoUnit) {
01347             temp /= A[i*lda + i];
01348         }
01349         B[j*ldb + i] = temp;
01350           }
01351       }
01352         }
01353         else
01354         { // A is lower triangular.
01355                         for(j = izero; j < n; j++) {
01356                             for(i = (m - ione) ; i > -ione; i--) {
01357                                 temp = alpha*B[j*ldb+i];
01358                               for(k = i+ione; k < m; k++) {
01359             temp -= A[i*lda + k] * B[j*ldb + k];
01360         }
01361         if (NoUnit) {
01362             temp /= A[i*lda + i];
01363         }
01364         B[j*ldb + i] = temp;
01365                             }
01366                         }
01367         }
01368     }
01369       }  // if (side == 'L')
01370       else { 
01371          // side == 'R'
01372          //
01373          // Perform computations for X*OP(A) = alpha*B      
01374          //
01375         if (ETranspChar[transa] == 'N') {
01376         //
01377         //  Compute B = alpha*B*inv( A )
01378         //
01379         if(EUploChar[uplo] == 'U') { 
01380       // A is upper triangular.
01381           // Perform a backsolve for column j of B.
01382       for(j = izero; j < n; j++) {
01383               // Perform alpha*B if alpha is not 1.
01384               if(alpha != one) {
01385                 for( i = izero; i < m; i++) {
01386                 B[j*ldb+i] *= alpha;
01387             }
01388           }
01389           for(k = izero; k < j; k++) {
01390         // If this entry is zero, we don't have to do anything.
01391         if (A[j*lda + k] != zero) {
01392             for(i = izero; i < m; i++) {
01393           B[j*ldb + i] -= A[j*lda + k] * B[k*ldb + i];
01394             }
01395         }
01396           }
01397           if (NoUnit) {
01398         temp = one/A[j*lda + j];
01399         for(i = izero; i < m; i++) {
01400             B[j*ldb + i] *= temp;
01401         }
01402           }
01403       }
01404         }
01405         else 
01406         { // A is lower triangular.
01407                         for(j = (n - ione); j > -ione; j--) {
01408                             // Perform alpha*B if alpha is not 1.
01409                             if(alpha != one) {
01410                                 for( i = izero; i < m; i++) {
01411                                     B[j*ldb+i] *= alpha;
01412                                 }
01413                             }
01414                             // Perform a forward solve for column j of B.
01415                             for(k = j+ione; k < n; k++) {
01416                                 // If this entry is zero, we don't have to do anything.
01417         if (A[j*lda + k] != zero) {
01418             for(i = izero; i < m; i++) {
01419                                         B[j*ldb + i] -= A[j*lda + k] * B[k*ldb + i]; 
01420                                     }
01421                                 } 
01422                             }
01423           if (NoUnit) {
01424         temp = one/A[j*lda + j];
01425         for(i = izero; i < m; i++) {
01426             B[j*ldb + i] *= temp;
01427         }
01428           }     
01429                         }
01430         } // end if (uplo == 'U')
01431     }  // if (transa =='N') 
01432         else { 
01433         //
01434         //  Compute B = alpha*B*inv( A' )
01435         //
01436         if(EUploChar[uplo] == 'U') { 
01437       // A is upper triangular.
01438       for(k = (n - ione); k > -ione; k--) {
01439           if (NoUnit) {
01440         temp = one/A[k*lda + k];
01441                     for(i = izero; i < m; i++) {
01442                 B[k*ldb + i] *= temp;
01443         }
01444           }
01445           for(j = izero; j < k; j++) {
01446         if (A[k*lda + j] != zero) {
01447             temp = A[k*lda + j];
01448             for(i = izero; i < m; i++) {
01449           B[j*ldb + i] -= temp*B[k*ldb + i];
01450             }
01451         }
01452           }
01453           if (alpha != one) {
01454         for (i = izero; i < m; i++) {
01455             B[k*ldb + i] *= alpha;
01456         }
01457           }
01458       }
01459         }
01460         else
01461         { // A is lower triangular.
01462       for(k = izero; k < n; k++) {
01463           if (NoUnit) {
01464         temp = one/A[k*lda + k];
01465         for (i = izero; i < m; i++) {
01466             B[k*ldb + i] *= temp;
01467         }
01468           }
01469           for(j = k+ione; j < n; j++) {
01470         if(A[k*lda + j] != zero) {
01471             temp = A[k*lda + j];
01472             for(i = izero; i < m; i++) {
01473           B[j*ldb + i] -= temp*B[k*ldb + i];
01474             }
01475         }
01476           }
01477           if (alpha != one) {
01478         for (i = izero; i < m; i++) {
01479             B[k*ldb + i] *= alpha;
01480         }
01481           }
01482                         }
01483         }
01484     }   
01485       }
01486   }
01487     }
01488   }
01489   
01490 #ifndef DOXYGEN_SHOULD_SKIP_THIS
01491 
01492 #ifdef HAVE_TEUCHOS_BLASFLOAT
01493 
01494   template<typename OrdinalType>
01495   class BLAS<OrdinalType, float>
01496   {    
01497   public:
01498     inline BLAS(void) {}
01499     inline BLAS(const BLAS<OrdinalType, float>& BLAS_source) {}
01500     inline virtual ~BLAS(void) {}
01501     void ROTG(float* da, float* db, float* c, float* s) const;
01502     void ROT(const OrdinalType n, float* dx, const OrdinalType incx, float* dy, const OrdinalType incy, float* c, float* s) const;
01503     float ASUM(const OrdinalType n, const float* x, const OrdinalType incx) const;
01504     void AXPY(const OrdinalType n, const float alpha, const float* x, const OrdinalType incx, float* y, const OrdinalType incy) const;
01505     void COPY(const OrdinalType n, const float* x, const OrdinalType incx, float* y, const OrdinalType incy) const;
01506     float DOT(const OrdinalType n, const float* x, const OrdinalType incx, const float* y, const OrdinalType incy) const;
01507     float NRM2(const OrdinalType n, const float* x, const OrdinalType incx) const;
01508     void SCAL(const OrdinalType n, const float alpha, float* x, const OrdinalType incx) const;
01509     OrdinalType IAMAX(const OrdinalType n, const float* x, const OrdinalType incx) const;
01510     void GEMV(ETransp trans, const OrdinalType m, const OrdinalType n, const float alpha, const float* A, const OrdinalType lda, const float* x, const OrdinalType incx, const float beta, float* y, const OrdinalType incy) const;
01511     void TRMV(EUplo uplo, ETransp trans, EDiag diag, const OrdinalType n, const float* A, const OrdinalType lda, float* x, const OrdinalType incx) const;
01512     void GER(const OrdinalType m, const OrdinalType n, const float alpha, const float* x, const OrdinalType incx, const float* y, const OrdinalType incy, float* A, const OrdinalType lda) const;
01513     void GEMM(ETransp transa, ETransp transb, const OrdinalType m, const OrdinalType n, const OrdinalType k, const float alpha, const float* A, const OrdinalType lda, const float* B, const OrdinalType ldb, const float beta, float* C, const OrdinalType ldc) const;
01514     void SYMM(ESide side, EUplo uplo, const OrdinalType m, const OrdinalType n, const float alpha, const float* A, const OrdinalType lda, const float *B, const OrdinalType ldb, const float beta, float *C, const OrdinalType ldc) const;
01515     void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, const float alpha, const float* A, const OrdinalType lda, float* B, const OrdinalType ldb) const;
01516     void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, const float alpha, const float* A, const OrdinalType lda, float* B, const OrdinalType ldb) const;
01517   };
01518 
01519   template<typename OrdinalType>
01520   void BLAS<OrdinalType, float>::ROTG(float* da, float* db, float* c, float* s) const
01521   { SROTG_F77(da, db, c, s ); }
01522 
01523   template<typename OrdinalType>
01524   void BLAS<OrdinalType, float>::ROT(const OrdinalType n, float* dx, const OrdinalType incx, float* dy, const OrdinalType incy, float* c, float* s) const
01525   { SROT_F77(&n, dx, &incx, dy, &incy, c, s); }
01526 
01527   template<typename OrdinalType>
01528   float BLAS<OrdinalType, float>::ASUM(const OrdinalType n, const float* x, const OrdinalType incx) const
01529   {
01530     float tmp = SASUM_F77(&n, x, &incx);
01531     return tmp;
01532   }
01533   
01534   template<typename OrdinalType>
01535   void BLAS<OrdinalType, float>::AXPY(const OrdinalType n, const float alpha, const float* x, const OrdinalType incx, float* y, const OrdinalType incy) const
01536   { SAXPY_F77(&n, &alpha, x, &incx, y, &incy); }
01537   
01538   template<typename OrdinalType>
01539   void BLAS<OrdinalType, float>::COPY(const OrdinalType n, const float* x, const OrdinalType incx, float* y, const OrdinalType incy) const 
01540   { SCOPY_F77(&n, x, &incx, y, &incy); }
01541   
01542   template<typename OrdinalType>
01543   float BLAS<OrdinalType, float>::DOT(const OrdinalType n, const float* x, const OrdinalType incx, const float* y, const OrdinalType incy) const
01544   { return SDOT_F77(&n, x, &incx, y, &incy); }
01545   
01546   template<typename OrdinalType>
01547   OrdinalType BLAS<OrdinalType, float>::IAMAX(const OrdinalType n, const float* x, const OrdinalType incx) const
01548   { return ISAMAX_F77(&n, x, &incx); }
01549 
01550   template<typename OrdinalType>
01551   float BLAS<OrdinalType, float>::NRM2(const OrdinalType n, const float* x, const OrdinalType incx) const
01552   { return SNRM2_F77(&n, x, &incx); }
01553   
01554   template<typename OrdinalType>
01555   void BLAS<OrdinalType, float>::SCAL(const OrdinalType n, const float alpha, float* x, const OrdinalType incx) const
01556   { SSCAL_F77(&n, &alpha, x, &incx); }
01557   
01558   template<typename OrdinalType>
01559   void BLAS<OrdinalType, float>::GEMV(ETransp trans, const OrdinalType m, const OrdinalType n, const float alpha, const float* A, const OrdinalType lda, const float* x, const OrdinalType incx, const float beta, float* y, const OrdinalType incy) const
01560   { SGEMV_F77(CHAR_MACRO(ETranspChar[trans]), &m, &n, &alpha, A, &lda, x, &incx, &beta, y, &incy); }
01561   
01562   template<typename OrdinalType>
01563   void BLAS<OrdinalType, float>::GER(const OrdinalType m, const OrdinalType n, const float alpha, const float* x, const OrdinalType incx, const float* y, const OrdinalType incy, float* A, const OrdinalType lda) const
01564   { SGER_F77(&m, &n, &alpha, x, &incx, y, &incy, A, &lda); }
01565 
01566   template<typename OrdinalType>
01567   void BLAS<OrdinalType, float>::TRMV(EUplo uplo, ETransp trans, EDiag diag, const OrdinalType n, const float* A, const OrdinalType lda, float* x, const OrdinalType incx) const
01568   { STRMV_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), CHAR_MACRO(EDiagChar[diag]), &n, A, &lda, x, &incx); }
01569   
01570   template<typename OrdinalType>
01571   void BLAS<OrdinalType, float>::GEMM(ETransp transa, ETransp transb, const OrdinalType m, const OrdinalType n, const OrdinalType k, const float alpha, const float* A, const OrdinalType lda, const float* B, const OrdinalType ldb, const float beta, float* C, const OrdinalType ldc) const
01572   { SGEMM_F77(CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(ETranspChar[transb]), &m, &n, &k, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
01573   
01574   template<typename OrdinalType>
01575   void BLAS<OrdinalType, float>::SYMM(ESide side, EUplo uplo, const OrdinalType m, const OrdinalType n, const float alpha, const float* A, const OrdinalType lda, const float* B, const OrdinalType ldb, const float beta, float* C, const OrdinalType ldc) const
01576   { SSYMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), &m, &n, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
01577   
01578   template<typename OrdinalType>
01579   void BLAS<OrdinalType, float>::TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, const float alpha, const float* A, const OrdinalType lda, float* B, const OrdinalType ldb) const
01580   { STRMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
01581   
01582   template<typename OrdinalType>
01583   void BLAS<OrdinalType, float>::TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, const float alpha, const float* A, const OrdinalType lda, float* B, const OrdinalType ldb) const
01584   { STRSM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
01585 
01586 #endif // HAVE_TEUCHOS_BLASFLOAT
01587 
01588   template<typename OrdinalType>
01589   class BLAS<OrdinalType, double>
01590   {    
01591   public:
01592     inline BLAS(void) {}
01593     inline BLAS(const BLAS<OrdinalType, double>& BLAS_source) {}
01594     inline virtual ~BLAS(void) {}
01595     void ROTG(double* da, double* db, double* c, double* s) const;
01596     void ROT(const OrdinalType n, double* dx, const OrdinalType incx, double* dy, const OrdinalType incy, double* c, double* s) const;
01597     double ASUM(const OrdinalType n, const double* x, const OrdinalType incx) const;
01598     void AXPY(const OrdinalType n, const double alpha, const double* x, const OrdinalType incx, double* y, const OrdinalType incy) const;
01599     void COPY(const OrdinalType n, const double* x, const OrdinalType incx, double* y, const OrdinalType incy) const;
01600     double DOT(const OrdinalType n, const double* x, const OrdinalType incx, const double* y, const OrdinalType incy) const;
01601     double NRM2(const OrdinalType n, const double* x, const OrdinalType incx) const;
01602     void SCAL(const OrdinalType n, const double alpha, double* x, const OrdinalType incx) const;
01603     OrdinalType IAMAX(const OrdinalType n, const double* x, const OrdinalType incx) const;
01604     void GEMV(ETransp trans, const OrdinalType m, const OrdinalType n, const double alpha, const double* A, const OrdinalType lda, const double* x, const OrdinalType incx, const double beta, double* y, const OrdinalType incy) const;
01605     void TRMV(EUplo uplo, ETransp trans, EDiag diag, const OrdinalType n, const double* A, const OrdinalType lda, double* x, const OrdinalType incx) const;
01606     void GER(const OrdinalType m, const OrdinalType n, const double alpha, const double* x, const OrdinalType incx, const double* y, const OrdinalType incy, double* A, const OrdinalType lda) const;
01607     void GEMM(ETransp transa, ETransp transb, const OrdinalType m, const OrdinalType n, const OrdinalType k, const double alpha, const double* A, const OrdinalType lda, const double* B, const OrdinalType ldb, const double beta, double* C, const OrdinalType ldc) const;
01608     void SYMM(ESide side, EUplo uplo, const OrdinalType m, const OrdinalType n, const double alpha, const double* A, const OrdinalType lda, const double *B, const OrdinalType ldb, const double beta, double *C, const OrdinalType ldc) const;
01609     void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, const double alpha, const double* A, const OrdinalType lda, double* B, const OrdinalType ldb) const;
01610     void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, const double alpha, const double* A, const OrdinalType lda, double* B, const OrdinalType ldb) const;
01611   };
01612   
01613   template<typename OrdinalType>
01614   void BLAS<OrdinalType, double>::ROTG(double* da, double* db, double* c, double* s) const
01615   { DROTG_F77(da, db, c, s); }
01616 
01617   template<typename OrdinalType>
01618   void BLAS<OrdinalType, double>::ROT(const OrdinalType n, double* dx, const OrdinalType incx, double* dy, const OrdinalType incy, double* c, double* s) const
01619   { DROT_F77(&n, dx, &incx, dy, &incy, c, s); }
01620 
01621   template<typename OrdinalType>
01622   double BLAS<OrdinalType, double>::ASUM(const OrdinalType n, const double* x, const OrdinalType incx) const
01623   { return DASUM_F77(&n, x, &incx); }
01624   
01625   template<typename OrdinalType>
01626   void BLAS<OrdinalType, double>::AXPY(const OrdinalType n, const double alpha, const double* x, const OrdinalType incx, double* y, const OrdinalType incy) const
01627   { DAXPY_F77(&n, &alpha, x, &incx, y, &incy); }
01628   
01629   template<typename OrdinalType>
01630   void BLAS<OrdinalType, double>::COPY(const OrdinalType n, const double* x, const OrdinalType incx, double* y, const OrdinalType incy) const
01631   { DCOPY_F77(&n, x, &incx, y, &incy); }
01632   
01633   template<typename OrdinalType>
01634   double BLAS<OrdinalType, double>::DOT(const OrdinalType n, const double* x, const OrdinalType incx, const double* y, const OrdinalType incy) const
01635   { return DDOT_F77(&n, x, &incx, y, &incy); }
01636   
01637   template<typename OrdinalType>
01638   OrdinalType BLAS<OrdinalType, double>::IAMAX(const OrdinalType n, const double* x, const OrdinalType incx) const
01639   { return IDAMAX_F77(&n, x, &incx); }
01640 
01641   template<typename OrdinalType>
01642   double BLAS<OrdinalType, double>::NRM2(const OrdinalType n, const double* x, const OrdinalType incx) const
01643   { return DNRM2_F77(&n, x, &incx); }
01644   
01645   template<typename OrdinalType>
01646   void BLAS<OrdinalType, double>::SCAL(const OrdinalType n, const double alpha, double* x, const OrdinalType incx) const
01647   { DSCAL_F77(&n, &alpha, x, &incx); }
01648   
01649   template<typename OrdinalType>
01650   void BLAS<OrdinalType, double>::GEMV(ETransp trans, const OrdinalType m, const OrdinalType n, const double alpha, const double* A, const OrdinalType lda, const double* x, const OrdinalType incx, const double beta, double* y, const OrdinalType incy) const
01651   { DGEMV_F77(CHAR_MACRO(ETranspChar[trans]), &m, &n, &alpha, A, &lda, x, &incx, &beta, y, &incy); }
01652   
01653   template<typename OrdinalType>
01654   void BLAS<OrdinalType, double>::GER(const OrdinalType m, const OrdinalType n, const double alpha, const double* x, const OrdinalType incx, const double* y, const OrdinalType incy, double* A, const OrdinalType lda) const
01655   { DGER_F77(&m, &n, &alpha, x, &incx, y, &incy, A, &lda); }
01656 
01657   template<typename OrdinalType>
01658   void BLAS<OrdinalType, double>::TRMV(EUplo uplo, ETransp trans, EDiag diag, const OrdinalType n, const double* A, const OrdinalType lda, double* x, const OrdinalType incx) const
01659   { DTRMV_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), CHAR_MACRO(EDiagChar[diag]), &n, A, &lda, x, &incx); }
01660   
01661   template<typename OrdinalType>
01662   void BLAS<OrdinalType, double>::GEMM(ETransp transa, ETransp transb, const OrdinalType m, const OrdinalType n, const OrdinalType k, const double alpha, const double* A, const OrdinalType lda, const double* B, const OrdinalType ldb, const double beta, double* C, const OrdinalType ldc) const
01663   { DGEMM_F77(CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(ETranspChar[transb]), &m, &n, &k, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
01664   
01665   template<typename OrdinalType>
01666   void BLAS<OrdinalType, double>::SYMM(ESide side, EUplo uplo, const OrdinalType m, const OrdinalType n, const double alpha, const double* A, const OrdinalType lda, const double *B, const OrdinalType ldb, const double beta, double *C, const OrdinalType ldc) const
01667   { DSYMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), &m, &n, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
01668   
01669   template<typename OrdinalType>
01670   void BLAS<OrdinalType, double>::TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, const double alpha, const double* A, const OrdinalType lda, double* B, const OrdinalType ldb) const
01671   { DTRMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
01672 
01673   template<typename OrdinalType>
01674   void BLAS<OrdinalType, double>::TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, const double alpha, const double* A, const OrdinalType lda, double* B, const OrdinalType ldb) const
01675   { DTRSM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
01676   
01677 #ifdef HAVE_TEUCHOS_COMPLEX
01678 
01679 #ifdef HAVE_TEUCHOS_BLASFLOAT
01680 
01681   template<typename OrdinalType>
01682   class BLAS<OrdinalType, std::complex<float> >
01683   {    
01684   public:
01685     inline BLAS(void) {}
01686     inline BLAS(const BLAS<OrdinalType, std::complex<float> >& BLAS_source) {}
01687     inline virtual ~BLAS(void) {}
01688     void ROTG(std::complex<float>* da, std::complex<float>* db, float* c, std::complex<float>* s) const;
01689     void ROT(const OrdinalType n, std::complex<float>* dx, const OrdinalType incx, std::complex<float>* dy, const OrdinalType incy, float* c, std::complex<float>* s) const;
01690     float ASUM(const OrdinalType n, const std::complex<float>* x, const OrdinalType incx) const;
01691     void AXPY(const OrdinalType n, const std::complex<float> alpha, const std::complex<float>* x, const OrdinalType incx, std::complex<float>* y, const OrdinalType incy) const;
01692     void COPY(const OrdinalType n, const std::complex<float>* x, const OrdinalType incx, std::complex<float>* y, const OrdinalType incy) const;
01693     std::complex<float> DOT(const OrdinalType n, const std::complex<float>* x, const OrdinalType incx, const std::complex<float>* y, const OrdinalType incy) const;
01694     float NRM2(const OrdinalType n, const std::complex<float>* x, const OrdinalType incx) const;
01695     void SCAL(const OrdinalType n, const std::complex<float> alpha, std::complex<float>* x, const OrdinalType incx) const;
01696     OrdinalType IAMAX(const OrdinalType n, const std::complex<float>* x, const OrdinalType incx) const;
01697     void GEMV(ETransp trans, const OrdinalType m, const OrdinalType n, const std::complex<float> alpha, const std::complex<float>* A, const OrdinalType lda, const std::complex<float>* x, const OrdinalType incx, const std::complex<float> beta, std::complex<float>* y, const OrdinalType incy) const;
01698     void TRMV(EUplo uplo, ETransp trans, EDiag diag, const OrdinalType n, const std::complex<float>* A, const OrdinalType lda, std::complex<float>* x, const OrdinalType incx) const;
01699     void GER(const OrdinalType m, const OrdinalType n, const std::complex<float> alpha, const std::complex<float>* x, const OrdinalType incx, const std::complex<float>* y, const OrdinalType incy, std::complex<float>* A, const OrdinalType lda) const;
01700     void GEMM(ETransp transa, ETransp transb, const OrdinalType m, const OrdinalType n, const OrdinalType k, const std::complex<float> alpha, const std::complex<float>* A, const OrdinalType lda, const std::complex<float>* B, const OrdinalType ldb, const std::complex<float> beta, std::complex<float>* C, const OrdinalType ldc) const;
01701     void SYMM(ESide side, EUplo uplo, const OrdinalType m, const OrdinalType n, const std::complex<float> alpha, const std::complex<float>* A, const OrdinalType lda, const std::complex<float> *B, const OrdinalType ldb, const std::complex<float> beta, std::complex<float> *C, const OrdinalType ldc) const;
01702     void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, const std::complex<float> alpha, const std::complex<float>* A, const OrdinalType lda, std::complex<float>* B, const OrdinalType ldb) const;
01703     void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, const std::complex<float> alpha, const std::complex<float>* A, const OrdinalType lda, std::complex<float>* B, const OrdinalType ldb) const;
01704   };
01705 
01706   template<typename OrdinalType>
01707   void BLAS<OrdinalType, std::complex<float> >::ROTG(std::complex<float>* da, std::complex<float>* db, float* c, std::complex<float>* s) const
01708   { CROTG_F77(da, db, c, s ); }
01709 
01710   template<typename OrdinalType>
01711   void BLAS<OrdinalType, std::complex<float> >::ROT(const OrdinalType n, std::complex<float>* dx, const OrdinalType incx, std::complex<float>* dy, const OrdinalType incy, float* c, std::complex<float>* s) const
01712   { CROT_F77(&n, dx, &incx, dy, &incy, c, s); }
01713 
01714   template<typename OrdinalType>
01715   float BLAS<OrdinalType, std::complex<float> >::ASUM(const OrdinalType n, const std::complex<float>* x, const OrdinalType incx) const
01716   { return CASUM_F77(&n, x, &incx); }
01717   
01718   template<typename OrdinalType>
01719   void BLAS<OrdinalType, std::complex<float> >::AXPY(const OrdinalType n, const std::complex<float> alpha, const std::complex<float>* x, const OrdinalType incx, std::complex<float>* y, const OrdinalType incy) const
01720   { CAXPY_F77(&n, &alpha, x, &incx, y, &incy); }
01721   
01722   template<typename OrdinalType>
01723   void BLAS<OrdinalType, std::complex<float> >::COPY(const OrdinalType n, const std::complex<float>* x, const OrdinalType incx, std::complex<float>* y, const OrdinalType incy) const
01724   { CCOPY_F77(&n, x, &incx, y, &incy); }
01725   
01726   template<typename OrdinalType>
01727   std::complex<float> BLAS<OrdinalType, std::complex<float> >::DOT(const OrdinalType n, const std::complex<float>* x, const OrdinalType incx, const std::complex<float>* y, const OrdinalType incy) const
01728   { return CDOT_F77(&n, x, &incx, y, &incy); }
01729   
01730   template<typename OrdinalType>
01731   OrdinalType BLAS<OrdinalType, std::complex<float> >::IAMAX(const OrdinalType n, const std::complex<float>* x, const OrdinalType incx) const
01732   { return ICAMAX_F77(&n, x, &incx); }
01733 
01734   template<typename OrdinalType>
01735   float BLAS<OrdinalType, std::complex<float> >::NRM2(const OrdinalType n, const std::complex<float>* x, const OrdinalType incx) const
01736   { return CNRM2_F77(&n, x, &incx); }
01737   
01738   template<typename OrdinalType>
01739   void BLAS<OrdinalType, std::complex<float> >::SCAL(const OrdinalType n, const std::complex<float> alpha, std::complex<float>* x, const OrdinalType incx) const
01740   { CSCAL_F77(&n, &alpha, x, &incx); }
01741   
01742   template<typename OrdinalType>
01743   void BLAS<OrdinalType, std::complex<float> >::GEMV(ETransp trans, const OrdinalType m, const OrdinalType n, const std::complex<float> alpha, const std::complex<float>* A, const OrdinalType lda, const std::complex<float>* x, const OrdinalType incx, const std::complex<float> beta, std::complex<float>* y, const OrdinalType incy) const
01744   { CGEMV_F77(CHAR_MACRO(ETranspChar[trans]), &m, &n, &alpha, A, &lda, x, &incx, &beta, y, &incy); }
01745   
01746   template<typename OrdinalType>
01747   void BLAS<OrdinalType, std::complex<float> >::GER(const OrdinalType m, const OrdinalType n, const std::complex<float> alpha, const std::complex<float>* x, const OrdinalType incx, const std::complex<float>* y, const OrdinalType incy, std::complex<float>* A, const OrdinalType lda) const
01748   { CGER_F77(&m, &n, &alpha, x, &incx, y, &incy, A, &lda); }
01749 
01750   template<typename OrdinalType>
01751   void BLAS<OrdinalType, std::complex<float> >::TRMV(EUplo uplo, ETransp trans, EDiag diag, const OrdinalType n, const std::complex<float>* A, const OrdinalType lda, std::complex<float>* x, const OrdinalType incx) const
01752   { CTRMV_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), CHAR_MACRO(EDiagChar[diag]), &n, A, &lda, x, &incx); }
01753   
01754   template<typename OrdinalType>
01755   void BLAS<OrdinalType, std::complex<float> >::GEMM(ETransp transa, ETransp transb, const OrdinalType m, const OrdinalType n, const OrdinalType k, const std::complex<float> alpha, const std::complex<float>* A, const OrdinalType lda, const std::complex<float>* B, const OrdinalType ldb, const std::complex<float> beta, std::complex<float>* C, const OrdinalType ldc) const
01756   { CGEMM_F77(CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(ETranspChar[transb]), &m, &n, &k, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); } 
01757  
01758   template<typename OrdinalType>
01759   void BLAS<OrdinalType, std::complex<float> >::SYMM(ESide side, EUplo uplo, const OrdinalType m, const OrdinalType n, const std::complex<float> alpha, const std::complex<float>* A, const OrdinalType lda, const std::complex<float>* B, const OrdinalType ldb, const std::complex<float> beta, std::complex<float>* C, const OrdinalType ldc) const
01760   { CSYMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), &m, &n, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
01761   
01762   template<typename OrdinalType>
01763   void BLAS<OrdinalType, std::complex<float> >::TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, const std::complex<float> alpha, const std::complex<float>* A, const OrdinalType lda, std::complex<float>* B, const OrdinalType ldb) const
01764   { CTRMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
01765   
01766   template<typename OrdinalType>
01767   void BLAS<OrdinalType, std::complex<float> >::TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, const std::complex<float> alpha, const std::complex<float>* A, const OrdinalType lda, std::complex<float>* B, const OrdinalType ldb) const
01768   { CTRSM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
01769 
01770 #endif // HAVE_TEUCHOS_BLASFLOAT
01771 
01772   template<typename OrdinalType>
01773   class BLAS<OrdinalType, std::complex<double> >
01774   {    
01775   public:
01776     inline BLAS(void) {}
01777     inline BLAS(const BLAS<OrdinalType, std::complex<double> >& BLAS_source) {}
01778     inline virtual ~BLAS(void) {}
01779     void ROTG(std::complex<double>* da, std::complex<double>* db, double* c, std::complex<double>* s) const;
01780     void ROT(const OrdinalType n, std::complex<double>* dx, const OrdinalType incx, std::complex<double>* dy, const OrdinalType incy, double* c, std::complex<double>* s) const;
01781     double ASUM(const OrdinalType n, const std::complex<double>* x, const OrdinalType incx) const;
01782     void AXPY(const OrdinalType n, const std::complex<double> alpha, const std::complex<double>* x, const OrdinalType incx, std::complex<double>* y, const OrdinalType incy) const;
01783     void COPY(const OrdinalType n, const std::complex<double>* x, const OrdinalType incx, std::complex<double>* y, const OrdinalType incy) const;
01784     std::complex<double> DOT(const OrdinalType n, const std::complex<double>* x, const OrdinalType incx, const std::complex<double>* y, const OrdinalType incy) const;
01785     double NRM2(const OrdinalType n, const std::complex<double>* x, const OrdinalType incx) const;
01786     void SCAL(const OrdinalType n, const std::complex<double> alpha, std::complex<double>* x, const OrdinalType incx) const;
01787     OrdinalType IAMAX(const OrdinalType n, const std::complex<double>* x, const OrdinalType incx) const;
01788     void GEMV(ETransp trans, const OrdinalType m, const OrdinalType n, const std::complex<double> alpha, const std::complex<double>* A, const OrdinalType lda, const std::complex<double>* x, const OrdinalType incx, const std::complex<double> beta, std::complex<double>* y, const OrdinalType incy) const;
01789     void TRMV(EUplo uplo, ETransp trans, EDiag diag, const OrdinalType n, const std::complex<double>* A, const OrdinalType lda, std::complex<double>* x, const OrdinalType incx) const;
01790     void GER(const OrdinalType m, const OrdinalType n, const std::complex<double> alpha, const std::complex<double>* x, const OrdinalType incx, const std::complex<double>* y, const OrdinalType incy, std::complex<double>* A, const OrdinalType lda) const;
01791     void GEMM(ETransp transa, ETransp transb, const OrdinalType m, const OrdinalType n, const OrdinalType k, const std::complex<double> alpha, const std::complex<double>* A, const OrdinalType lda, const std::complex<double>* B, const OrdinalType ldb, const std::complex<double> beta, std::complex<double>* C, const OrdinalType ldc) const;
01792     void SYMM(ESide side, EUplo uplo, const OrdinalType m, const OrdinalType n, const std::complex<double> alpha, const std::complex<double>* A, const OrdinalType lda, const std::complex<double> *B, const OrdinalType ldb, const std::complex<double> beta, std::complex<double> *C, const OrdinalType ldc) const;
01793     void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, const std::complex<double> alpha, const std::complex<double>* A, const OrdinalType lda, std::complex<double>* B, const OrdinalType ldb) const;
01794     void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, const std::complex<double> alpha, const std::complex<double>* A, const OrdinalType lda, std::complex<double>* B, const OrdinalType ldb) const;
01795   };
01796   
01797   template<typename OrdinalType>
01798   void BLAS<OrdinalType, std::complex<double> >::ROTG(std::complex<double>* da, std::complex<double>* db, double* c, std::complex<double>* s) const
01799   { ZROTG_F77(da, db, c, s); }
01800 
01801   template<typename OrdinalType>
01802   void BLAS<OrdinalType, std::complex<double> >::ROT(const OrdinalType n, std::complex<double>* dx, const OrdinalType incx, std::complex<double>* dy, const OrdinalType incy, double* c, std::complex<double>* s) const
01803   { ZROT_F77(&n, dx, &incx, dy, &incy, c, s); }
01804 
01805   template<typename OrdinalType>
01806   double BLAS<OrdinalType, std::complex<double> >::ASUM(const OrdinalType n, const std::complex<double>* x, const OrdinalType incx) const
01807   { return ZASUM_F77(&n, x, &incx); }
01808   
01809   template<typename OrdinalType>
01810   void BLAS<OrdinalType, std::complex<double> >::AXPY(const OrdinalType n, const std::complex<double> alpha, const std::complex<double>* x, const OrdinalType incx, std::complex<double>* y, const OrdinalType incy) const
01811   { ZAXPY_F77(&n, &alpha, x, &incx, y, &incy); }
01812   
01813   template<typename OrdinalType>
01814   void BLAS<OrdinalType, std::complex<double> >::COPY(const OrdinalType n, const std::complex<double>* x, const OrdinalType incx, std::complex<double>* y, const OrdinalType incy) const
01815   { ZCOPY_F77(&n, x, &incx, y, &incy); }
01816   
01817   template<typename OrdinalType>
01818   std::complex<double> BLAS<OrdinalType, std::complex<double> >::DOT(const OrdinalType n, const std::complex<double>* x, const OrdinalType incx, const std::complex<double>* y, const OrdinalType incy) const
01819   { return ZDOT_F77(&n, x, &incx, y, &incy); }
01820   
01821   template<typename OrdinalType>
01822   OrdinalType BLAS<OrdinalType, std::complex<double> >::IAMAX(const OrdinalType n, const std::complex<double>* x, const OrdinalType incx) const
01823   { return IZAMAX_F77(&n, x, &incx); }
01824 
01825   template<typename OrdinalType>
01826   double BLAS<OrdinalType, std::complex<double> >::NRM2(const OrdinalType n, const std::complex<double>* x, const OrdinalType incx) const
01827   { return ZNRM2_F77(&n, x, &incx); }
01828   
01829   template<typename OrdinalType>
01830   void BLAS<OrdinalType, std::complex<double> >::SCAL(const OrdinalType n, const std::complex<double> alpha, std::complex<double>* x, const OrdinalType incx) const
01831   { ZSCAL_F77(&n, &alpha, x, &incx); }
01832   
01833   template<typename OrdinalType>
01834   void BLAS<OrdinalType, std::complex<double> >::GEMV(ETransp trans, const OrdinalType m, const OrdinalType n, const std::complex<double> alpha, const std::complex<double>* A, const OrdinalType lda, const std::complex<double>* x, const OrdinalType incx, const std::complex<double> beta, std::complex<double>* y, const OrdinalType incy) const
01835   { ZGEMV_F77(CHAR_MACRO(ETranspChar[trans]), &m, &n, &alpha, A, &lda, x, &incx, &beta, y, &incy); }
01836   
01837   template<typename OrdinalType>
01838   void BLAS<OrdinalType, std::complex<double> >::GER(const OrdinalType m, const OrdinalType n, const std::complex<double> alpha, const std::complex<double>* x, const OrdinalType incx, const std::complex<double>* y, const OrdinalType incy, std::complex<double>* A, const OrdinalType lda) const
01839   { ZGER_F77(&m, &n, &alpha, x, &incx, y, &incy, A, &lda); }
01840 
01841   template<typename OrdinalType>
01842   void BLAS<OrdinalType, std::complex<double> >::TRMV(EUplo uplo, ETransp trans, EDiag diag, const OrdinalType n, const std::complex<double>* A, const OrdinalType lda, std::complex<double>* x, const OrdinalType incx) const
01843   { ZTRMV_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), CHAR_MACRO(EDiagChar[diag]), &n, A, &lda, x, &incx); }
01844   
01845   template<typename OrdinalType>
01846   void BLAS<OrdinalType, std::complex<double> >::GEMM(ETransp transa, ETransp transb, const OrdinalType m, const OrdinalType n, const OrdinalType k, const std::complex<double> alpha, const std::complex<double>* A, const OrdinalType lda, const std::complex<double>* B, const OrdinalType ldb, const std::complex<double> beta, std::complex<double>* C, const OrdinalType ldc) const
01847   { ZGEMM_F77(CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(ETranspChar[transb]), &m, &n, &k, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
01848   
01849   template<typename OrdinalType>
01850   void BLAS<OrdinalType, std::complex<double> >::SYMM(ESide side, EUplo uplo, const OrdinalType m, const OrdinalType n, const std::complex<double> alpha, const std::complex<double>* A, const OrdinalType lda, const std::complex<double> *B, const OrdinalType ldb, const std::complex<double> beta, std::complex<double> *C, const OrdinalType ldc) const
01851   { ZSYMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), &m, &n, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
01852   
01853   template<typename OrdinalType>
01854   void BLAS<OrdinalType, std::complex<double> >::TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, const std::complex<double> alpha, const std::complex<double>* A, const OrdinalType lda, std::complex<double>* B, const OrdinalType ldb) const
01855   { ZTRMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
01856 
01857   template<typename OrdinalType>
01858   void BLAS<OrdinalType, std::complex<double> >::TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, const std::complex<double> alpha, const std::complex<double>* A, const OrdinalType lda, std::complex<double>* B, const OrdinalType ldb) const
01859   { ZTRSM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
01860   
01861 #endif // HAVE_TEUCHOS_COMPLEX
01862 
01863 #endif // DOXYGEN_SHOULD_SKIP_THIS
01864 
01865 } // namespace Teuchos
01866 
01867 #endif // _TEUCHOS_BLAS_HPP_

Generated on Tue Oct 20 12:45:25 2009 for Teuchos - Trilinos Tools Package by doxygen 1.4.7