Teuchos_BLAS.hpp

Go to the documentation of this file.
00001 // @HEADER
00002 // ***********************************************************************
00003 // 
00004 //                    Teuchos: Common Tools Package
00005 //                 Copyright (2004) Sandia Corporation
00006 // 
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 // 
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //  
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //  
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00025 // 
00026 // ***********************************************************************
00027 // @HEADER
00028 
00029 // Kris
00030 // 06.16.03 -- Start over from scratch
00031 // 06.16.03 -- Initial templatization (Tpetra_BLAS.cpp is no longer needed)
00032 // 06.18.03 -- Changed xxxxx_() function calls to XXXXX_F77()
00033 //          -- Added warning messages for generic calls
00034 // 07.08.03 -- Move into Teuchos package/namespace
00035 // 07.24.03 -- The first iteration of BLAS generics is nearing completion. Caveats:
00036 //             * TRSM isn't finished yet; it works for one case at the moment (left side, upper tri., no transpose, no unit diag.)
00037 //             * Many of the generic implementations are quite inefficient, ugly, or both. I wrote these to be easy to debug, not for efficiency or legibility. The next iteration will improve both of these aspects as much as possible.
00038 //             * Very little verification of input parameters is done, save for the character-type arguments (TRANS, etc.) which is quite robust.
00039 //             * All of the routines that make use of both an incx and incy parameter (which includes much of the L1 BLAS) are set up to work iff incx == incy && incx > 0. Allowing for differing/negative values of incx/incy should be relatively trivial.
00040 //             * All of the L2/L3 routines assume that the entire matrix is being used (that is, if A is mxn, lda = m); they don't work on submatrices yet. This *should* be a reasonably trivial thing to fix, as well.
00041 //          -- Removed warning messages for generic calls
00042 // 08.08.03 -- TRSM now works for all cases where SIDE == L and DIAG == N. DIAG == U is implemented but does not work correctly; SIDE == R is not yet implemented.
00043 // 08.14.03 -- TRSM now works for all cases and accepts (and uses) leading-dimension information.
00044 // 09.26.03 -- character input replaced with enumerated input to cause compiling errors and not run-time errors ( suggested by RAB ).
00045 
00046 #ifndef _TEUCHOS_BLAS_HPP_
00047 #define _TEUCHOS_BLAS_HPP_
00048 
00056 /* for INTEL_CXML, the second arg may need to be changed to 'one'.  If so
00057 the appropriate declaration of one will need to be added back into
00058 functions that include the macro:
00059 */
00060 #if defined (INTEL_CXML)
00061         unsigned int one=1;
00062 #endif
00063 
00064 #ifdef CHAR_MACRO
00065 #undef CHAR_MACRO
00066 #endif
00067 #if defined (INTEL_CXML)
00068 #define CHAR_MACRO(char_var) &char_var, one
00069 #else
00070 #define CHAR_MACRO(char_var) &char_var
00071 #endif
00072 
00073 #include "Teuchos_ConfigDefs.hpp"
00074 #include "Teuchos_BLAS_wrappers.hpp"
00075 #include "Teuchos_BLAS_types.hpp"
00076 #include "Teuchos_ScalarTraits.hpp"
00077 #include "Teuchos_OrdinalTraits.hpp"
00078 
00112 namespace Teuchos
00113 {
00114   extern const char ESideChar[];
00115   extern const char ETranspChar[];
00116   extern const char EUploChar[];
00117   extern const char EDiagChar[];
00118 
00119   template<typename OrdinalType, typename ScalarType>
00120   class BLAS
00121   {    
00122   public:
00124 
00126     inline BLAS(void) {};
00127 
00129     inline BLAS(const BLAS<OrdinalType, ScalarType>& BLAS_source) {};
00130 
00132     inline virtual ~BLAS(void) {};
00134 
00136 
00138     void ROTG(ScalarType* da, ScalarType* db, ScalarType* c, ScalarType* s) const;
00139 
00141     void SCAL(const OrdinalType n, const ScalarType alpha, ScalarType* x, const OrdinalType incx) const;
00142 
00144     void COPY(const OrdinalType n, const ScalarType* x, const OrdinalType incx, ScalarType* y, const OrdinalType incy) const;
00145 
00147     void AXPY(const OrdinalType n, const ScalarType alpha, const ScalarType* x, const OrdinalType incx, ScalarType* y, const OrdinalType incy) const;
00148 
00150     ScalarType ASUM(const OrdinalType n, const ScalarType* x, const OrdinalType incx) const;
00151 
00153     ScalarType DOT(const OrdinalType n, const ScalarType* x, const OrdinalType incx, const ScalarType* y, const OrdinalType incy) const;
00154 
00156     ScalarType NRM2(const OrdinalType n, const ScalarType* x, const OrdinalType incx) const;
00157 
00159     OrdinalType IAMAX(const OrdinalType n, const ScalarType* x, const OrdinalType incx) const;
00160 
00162 
00164 
00166     void GEMV(ETransp trans, const OrdinalType m, const OrdinalType n, const ScalarType alpha, const ScalarType* A, 
00167         const OrdinalType lda, const ScalarType* x, const OrdinalType incx, const ScalarType beta, ScalarType* y, const OrdinalType incy) const;
00168 
00170     void TRMV(EUplo uplo, ETransp trans, EDiag diag, const OrdinalType n, const ScalarType* A, 
00171         const OrdinalType lda, ScalarType* x, const OrdinalType incx) const;
00172 
00174     void GER(const OrdinalType m, const OrdinalType n, const ScalarType alpha, const ScalarType* x, const OrdinalType incx, 
00175        const ScalarType* y, const OrdinalType incy, ScalarType* A, const OrdinalType lda) const;
00177     
00179 
00181     void GEMM(ETransp transa, ETransp transb, const OrdinalType m, const OrdinalType n, const OrdinalType k, const ScalarType alpha, const ScalarType* A, const OrdinalType lda, const ScalarType* B, const OrdinalType ldb, const ScalarType beta, ScalarType* C, const OrdinalType ldc) const;
00182 
00184     void SYMM(ESide side, EUplo uplo, const OrdinalType m, const OrdinalType n, const ScalarType alpha, const ScalarType* A, const OrdinalType lda, const ScalarType* B, const OrdinalType ldb, const ScalarType beta, ScalarType* C, const OrdinalType ldc) const;
00185 
00187     void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n,
00188                 const ScalarType alpha, const ScalarType* A, const OrdinalType lda, ScalarType* B, const OrdinalType ldb) const;
00189 
00191     void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n,
00192                 const ScalarType alpha, const ScalarType* A, const OrdinalType lda, ScalarType* B, const OrdinalType ldb) const;
00194   };
00195 
00196 //------------------------------------------------------------------------------------------
00197 //      LEVEL 1 BLAS ROUTINES  
00198 //------------------------------------------------------------------------------------------
00199     
00200   template<typename OrdinalType, typename ScalarType>
00201   void BLAS<OrdinalType, ScalarType>::ROTG(ScalarType* da, ScalarType* db, ScalarType* c, ScalarType* s) const
00202   {
00203     ScalarType roe, scale, r;
00204     ScalarType zero = ScalarTraits<ScalarType>::zero();
00205     ScalarType one = ScalarTraits<ScalarType>::one();
00206 
00207     if ( ScalarTraits<ScalarType>::magnitude( *da ) > ScalarTraits<ScalarType>::magnitude( *db ) ) { roe = *da; }
00208     scale = ScalarTraits<ScalarType>::magnitude( *da ) + ScalarTraits<ScalarType>::magnitude( *db );
00209     if ( scale == zero ) // There is nothing to do.
00210     {
00211       *c = one;
00212       *s = zero;
00213       *da = zero; *db = zero;
00214     } else { // Compute the Givens rotation.
00215       r = scale*ScalarTraits<ScalarType>::squareroot( ( *da/scale)*(*da/scale) + (*db/scale)*(*db/scale) );
00216       if ( roe < zero ) { r *= -one; }
00217       *c = *da / r;
00218       *s = *db / r;
00219       *db = ScalarTraits<ScalarType>::one();
00220       if( ScalarTraits<ScalarType>::magnitude( *da ) > ScalarTraits<ScalarType>::magnitude( *db ) ){ *db = *s; }
00221       if( ScalarTraits<ScalarType>::magnitude( *db ) >= ScalarTraits<ScalarType>::magnitude( *da ) &&
00222      *c != ScalarTraits<ScalarType>::zero() ) { *db = one / *c; }
00223       *da = r;
00224     }
00225   } /* end ROTG */
00226       
00227   template<typename OrdinalType, typename ScalarType>
00228   void BLAS<OrdinalType, ScalarType>::SCAL(const OrdinalType n, const ScalarType alpha, ScalarType* x, const OrdinalType incx) const
00229   {
00230     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00231     OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00232     OrdinalType i, ix = izero;
00233     if ( n > izero ) {
00234         // Set the initial index (ix).
00235         if (incx < izero) { ix = (-n+ione)*incx; } 
00236         // Scale the vector.
00237         for(i = izero; i < n; i++)
00238         {
00239             x[ix] *= alpha;
00240             ix += incx;
00241         }
00242     }
00243   } /* end SCAL */
00244 
00245   template<typename OrdinalType, typename ScalarType>
00246   void BLAS<OrdinalType, ScalarType>::COPY(const OrdinalType n, const ScalarType* x, const OrdinalType incx, ScalarType* y, const OrdinalType incy) const
00247   {
00248     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00249     OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00250     OrdinalType i, ix = izero, iy = izero;
00251     if ( n > izero ) {
00252   // Set the initial indices (ix, iy).
00253       if (incx < izero) { ix = (-n+ione)*incx; }
00254       if (incy < izero) { iy = (-n+ione)*incy; }
00255 
00256         for(i = izero; i < n; i++)
00257           {
00258       y[iy] = x[ix];
00259       ix += incx;
00260       iy += incy;
00261           }
00262       }
00263   } /* end COPY */
00264 
00265   template<typename OrdinalType, typename ScalarType>
00266   void BLAS<OrdinalType, ScalarType>::AXPY(const OrdinalType n, const ScalarType alpha, const ScalarType* x, const OrdinalType incx, ScalarType* y, const OrdinalType incy) const
00267   {
00268     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00269     OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00270     OrdinalType i, ix = izero, iy = izero;
00271     if( n > izero && alpha != ScalarTraits<ScalarType>::zero())
00272       {
00273   // Set the initial indices (ix, iy).
00274       if (incx < izero) { ix = (-n+ione)*incx; }
00275       if (incy < izero) { iy = (-n+ione)*incy; }
00276 
00277         for(i = izero; i < n; i++)
00278           {
00279       y[iy] += alpha * x[ix];
00280       ix += incx;
00281       iy += incy;
00282           }
00283       }
00284   } /* end AXPY */
00285 
00286   template<typename OrdinalType, typename ScalarType>
00287   ScalarType BLAS<OrdinalType, ScalarType>::ASUM(const OrdinalType n, const ScalarType* x, const OrdinalType incx) const
00288   {
00289     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00290     OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00291     ScalarType result = ScalarTraits<ScalarType>::zero();
00292     OrdinalType i, ix = izero;
00293     if( n > izero ) {
00294   // Set the initial indices
00295   if (incx < izero) { ix = (-n+ione)*incx; }
00296 
00297       for(i = izero; i < n; i++)
00298           {
00299       result += ScalarTraits<ScalarType>::magnitude(x[ix]);
00300       ix += incx;
00301           }
00302     } 
00303    return result;
00304   } /* end ASUM */
00305   
00306   template<typename OrdinalType, typename ScalarType>
00307   ScalarType BLAS<OrdinalType, ScalarType>::DOT(const OrdinalType n, const ScalarType* x, const OrdinalType incx, const ScalarType* y, const OrdinalType incy) const
00308   {
00309     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00310     OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00311     ScalarType result = ScalarTraits<ScalarType>::zero();
00312     OrdinalType i, ix = izero, iy = izero;
00313     if( n > izero ) 
00314       {
00315   // Set the initial indices (ix,iy).       
00316   if (incx < izero) { ix = (-n+ione)*incx; }
00317   if (incy < izero) { iy = (-n+ione)*incy; }
00318 
00319   for(i = izero; i < n; i++)
00320     {
00321       result += ScalarTraits<ScalarType>::conjugate(x[ix]) * y[iy];
00322       ix += incx;
00323       iy += incy;
00324     }
00325       }
00326     return result;
00327   } /* end DOT */
00328   
00329   template<typename OrdinalType, typename ScalarType>
00330   ScalarType BLAS<OrdinalType, ScalarType>::NRM2(const OrdinalType n, const ScalarType* x, const OrdinalType incx) const
00331   {
00332     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00333     OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00334     ScalarType result = ScalarTraits<ScalarType>::zero();
00335     OrdinalType i, ix = izero;
00336     if ( n > izero ) 
00337       {
00338   // Set the initial index.
00339   if (incx < izero) { ix = (-n+ione)*incx; }  
00340     
00341   for(i = izero; i < n; i++)
00342           {
00343       result += ScalarTraits<ScalarType>::conjugate(x[ix]) * x[ix];
00344       ix += incx;
00345           }
00346       result = ScalarTraits<ScalarType>::squareroot(result);
00347       } 
00348     return result;
00349   } /* end NRM2 */
00350   
00351   template<typename OrdinalType, typename ScalarType>
00352   OrdinalType BLAS<OrdinalType, ScalarType>::IAMAX(const OrdinalType n, const ScalarType* x, const OrdinalType incx) const
00353   {
00354     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00355     OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00356     OrdinalType result = izero, ix = izero, i;
00357     ScalarType maxval;
00358 
00359     if ( n > izero ) 
00360       {
00361   if (incx < izero) { ix = (-n+ione)*incx; }
00362   maxval = ScalarTraits<ScalarType>::magnitude(x[ix]);
00363   ix += incx;
00364       for(i = ione; i < n; i++)
00365           {
00366       if(ScalarTraits<ScalarType>::magnitude(x[ix]) > maxval)
00367         {
00368         result = i;
00369           maxval = ScalarTraits<ScalarType>::magnitude(x[ix]);
00370         }
00371       ix += incx;
00372     }
00373       }
00374     return result + 1; // the BLAS I?AMAX functions return 1-indexed (Fortran-style) values
00375   } /* end IAMAX */
00376 
00377 //------------------------------------------------------------------------------------------
00378 //      LEVEL 2 BLAS ROUTINES
00379 //------------------------------------------------------------------------------------------
00380 
00381   template<typename OrdinalType, typename ScalarType>
00382   void BLAS<OrdinalType, ScalarType>::GEMV(ETransp trans, const OrdinalType m, const OrdinalType n, const ScalarType alpha, const ScalarType* A, const OrdinalType lda, const ScalarType* x, const OrdinalType incx, const ScalarType beta, ScalarType* y, const OrdinalType incy) const
00383   {
00384     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00385     OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00386     ScalarType zero = ScalarTraits<ScalarType>::zero();
00387     ScalarType one = ScalarTraits<ScalarType>::one();
00388     bool BadArgument = false;
00389 
00390     // Quick return if there is nothing to do!
00391     if( m == izero || n == izero || ( alpha == zero && beta == one ) ){ return; }
00392     
00393     // Otherwise, we need to check the argument list.
00394     if( m < izero ) { 
00395   cout << "BLAS::GEMV Error: M == " << m << endl;     
00396   BadArgument = true;
00397     }
00398     if( n < izero ) { 
00399   cout << "BLAS::GEMV Error: N == " << n << endl;     
00400   BadArgument = true;
00401     }
00402     if( lda < m ) { 
00403   cout << "BLAS::GEMV Error: LDA < MAX(1,M)"<< endl;      
00404   BadArgument = true;
00405     }
00406     if( incx == izero ) {
00407   cout << "BLAS::GEMV Error: INCX == 0"<< endl;
00408   BadArgument = true;
00409     }
00410     if( incy == izero ) {
00411   cout << "BLAS::GEMV Error: INCY == 0"<< endl;
00412   BadArgument = true;
00413     }
00414 
00415     if(!BadArgument) {
00416       OrdinalType i, j, lenx, leny, ix, iy, jx, jy; 
00417       OrdinalType kx = izero, ky = izero;
00418       ScalarType temp;
00419 
00420       // Determine the lengths of the vectors x and y.
00421       if(ETranspChar[trans] == 'N') {
00422   lenx = n;
00423   leny = m;
00424       } else {
00425   lenx = m;
00426   leny = n;
00427       }
00428 
00429       // Set the starting pointers for the vectors x and y if incx/y < 0.
00430       if (incx < izero ) { kx =  (ione - lenx)*incx; }
00431       if (incy < izero ) { ky =  (ione - leny)*incy; }
00432 
00433       // Form y = beta*y
00434       ix = kx; iy = ky;
00435       if(beta != one) {
00436   if (incy == ione) {
00437     if (beta == zero) {
00438       for(i = izero; i < leny; i++) { y[i] = zero; }
00439     } else {
00440       for(i = izero; i < leny; i++) { y[i] *= beta; }
00441     }
00442   } else {
00443     if (beta == zero) {
00444       for(i = izero; i < leny; i++) {
00445         y[iy] = zero;
00446         iy += incy;
00447       }
00448     } else {
00449       for(i = izero; i < leny; i++) {
00450         y[iy] *= beta;
00451         iy += incy;
00452       }
00453     }
00454   }
00455       }
00456   
00457       // Return if we don't have to do anything more.
00458       if(alpha == zero) { return; }
00459 
00460       if( ETranspChar[trans] == 'N' ) {
00461   // Form y = alpha*A*y
00462   jx = kx;
00463   if (incy == ione) {
00464     for(j = izero; j < n; j++) {
00465       if (x[jx] != zero) {
00466         temp = alpha*x[jx];
00467         for(i = izero; i < m; i++) {
00468     y[i] += temp*A[j*lda + i];
00469         }
00470       }
00471       jx += incx;
00472     }
00473   } else {
00474     for(j = izero; j < n; j++) {
00475       if (x[jx] != zero) {
00476         temp = alpha*x[jx];
00477         iy = ky;
00478         for(i = izero; i < m; i++) {
00479     y[iy] += temp*A[j*lda + i];
00480     iy += incy;
00481         }
00482       }
00483       jx += incx;
00484     }
00485   }
00486       } else {
00487   jy = ky;
00488   if (incx == ione) {
00489     for(j = izero; j < n; j++) {
00490       temp = zero;
00491       for(i = izero; i < m; i++) {
00492         temp += A[j*lda + i]*x[i];
00493       }
00494       y[jy] += alpha*temp;
00495       jy += incy;
00496     }
00497   } else {
00498     for(j = izero; j < n; j++) {
00499       temp = zero;
00500       ix = kx;
00501       for (i = izero; i < m; i++) {
00502         temp += A[j*lda + i]*x[ix];
00503         ix += incx;
00504       }
00505       y[jy] += alpha*temp;
00506       jy += incy;
00507     }
00508   }
00509       }
00510     } /* if (!BadArgument) */
00511   } /* end GEMV */
00512 
00513  template<typename OrdinalType, typename ScalarType>
00514  void BLAS<OrdinalType, ScalarType>::TRMV(EUplo uplo, ETransp trans, EDiag diag, const OrdinalType n, const ScalarType* A, const OrdinalType lda, ScalarType* x, const OrdinalType incx) const
00515   {
00516     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00517     OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00518     ScalarType zero = ScalarTraits<ScalarType>::zero();
00519     ScalarType one = ScalarTraits<ScalarType>::one();
00520     bool BadArgument = false;
00521 
00522     // Quick return if there is nothing to do!
00523     if( n == izero ){ return; }
00524     
00525     // Otherwise, we need to check the argument list.
00526     if( n < izero ) { 
00527       cout << "BLAS::TRMV Error: N == " << n << endl;     
00528       BadArgument = true;
00529     }
00530     if( lda < n ) { 
00531       cout << "BLAS::TRMV Error: LDA < MAX(1,N)"<< endl;      
00532       BadArgument = true;
00533     }
00534     if( incx == izero ) {
00535       cout << "BLAS::TRMV Error: INCX == 0"<< endl;
00536       BadArgument = true;
00537     }
00538 
00539     if(!BadArgument) {
00540       OrdinalType i, j, ix, jx, kx = izero;
00541       ScalarType temp;
00542       bool NoUnit = (EDiagChar[diag] == 'N');
00543 
00544       // Set the starting pointer for the vector x if incx < 0.
00545       if (incx < izero) { kx = (-n+ione)*incx; }
00546 
00547       // Start the operations for a nontransposed triangular matrix 
00548       if (ETranspChar[trans] == 'N') {
00549   /* Compute x = A*x */
00550   if (EUploChar[uplo] == 'U') {
00551     /* A is an upper triangular matrix */
00552     if (incx == ione) {
00553       for (j=izero; j<n; j++) {
00554         if (x[j] != zero) {
00555     temp = x[j];
00556     for (i=izero; i<j; i++) {
00557       x[i] += temp*A[j*lda + i];
00558     }
00559     if (NoUnit) 
00560       x[j] *= A[j*lda + j];
00561         }
00562       }
00563     } else {
00564       jx = kx;
00565       for (j=izero; j<n; j++) {
00566         if (x[jx] != zero) {
00567     temp = x[jx];
00568     ix = kx;
00569     for (i=izero; i<j; i++) {
00570       x[ix] += temp*A[j*lda + i];
00571       ix += incx;
00572     }
00573     if (NoUnit)
00574       x[jx] *= A[j*lda + j];
00575         }
00576         jx += incx;
00577       }
00578     } /* if (incx == ione) */
00579   } else { /* A is a lower triangular matrix */
00580     if (incx == ione) {
00581       for (j=n-ione; j>-ione; j--) {
00582         if (x[j] != zero) {
00583     temp = x[j];
00584     for (i=n-ione; i>j; i--) {
00585       x[i] += temp*A[j*lda + i];
00586     }
00587     if (NoUnit)
00588       x[j] *= A[j*lda + j];
00589         }
00590       }
00591     } else {
00592       kx += (n-ione)*incx;
00593       jx = kx;
00594       for (j=n-ione; j>-ione; j--) {
00595         if (x[jx] != zero) {
00596     temp = x[jx];
00597     ix = kx;
00598     for (i=n-ione; i>j; i--) {
00599       x[ix] += temp*A[j*lda + i];
00600       ix -= incx;
00601     }
00602     if (NoUnit) 
00603       x[jx] *= A[j*lda + j];
00604         }
00605         jx -= incx;
00606       }
00607     }
00608   } /* if (EUploChar[uplo]=='U') */
00609       } else { /* A is transposed/conjugated */
00610   /* Compute x = A'*x */
00611   if (EUploChar[uplo]=='U') {
00612     /* A is an upper triangular matrix */
00613     if (incx == ione) {
00614       for (j=n-ione; j>-ione; j--) {
00615         temp = x[j];
00616         if (NoUnit)
00617     temp *= A[j*lda + j];
00618         for (i=j-ione; i>-ione; i--) {
00619     temp += A[j*lda + i]*x[i];
00620         }
00621         x[j] = temp;
00622       }
00623     } else {
00624       jx = kx + (n-ione)*incx;
00625       for (j=n-ione; j>-ione; j--) {
00626         temp = x[jx];
00627         ix = jx;
00628         if (NoUnit)
00629     temp *= A[j*lda + j];
00630         for (i=j-ione; i>-ione; i--) {
00631     ix -= incx;
00632     temp += A[j*lda + i]*x[ix];
00633         }
00634         x[jx] = temp;
00635         jx -= incx;
00636       }
00637     }
00638   } else {
00639     /* A is a lower triangular matrix */
00640     if (incx == ione) {
00641       for (j=izero; j<n; j++) {
00642         temp = x[j];
00643         if (NoUnit)
00644     temp *= A[j*lda + j];
00645         for (i=j+ione; i<n; i++) {
00646     temp += A[j*lda + i]*x[i];
00647         }
00648         x[j] = temp;
00649       }
00650     } else {
00651       jx = kx;
00652       for (j=izero; j<n; j++) {
00653         temp = x[jx];
00654         ix = jx;
00655         if (NoUnit) 
00656     temp *= A[j*lda + j];
00657         for (i=j+ione; i<n; i++) {
00658     ix += incx;
00659     temp += A[j*lda + i]*x[ix];
00660         }
00661         x[jx] = temp;
00662         jx += incx;       
00663       }
00664     }
00665   } /* if (EUploChar[uplo]=='U') */
00666       } /* if (ETranspChar[trans]=='N') */
00667     } /* if (!BadArgument) */
00668   } /* end TRMV */
00669         
00670   template<typename OrdinalType, typename ScalarType>
00671   void BLAS<OrdinalType, ScalarType>::GER(const OrdinalType m, const OrdinalType n, const ScalarType alpha, const ScalarType* x, const OrdinalType incx, const ScalarType* y, const OrdinalType incy, ScalarType* A, const OrdinalType lda) const
00672   {
00673     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00674     OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00675     ScalarType zero = ScalarTraits<ScalarType>::zero();
00676     ScalarType one = ScalarTraits<ScalarType>::one();
00677     bool BadArgument = false;
00678 
00679     // Quick return if there is nothing to do!
00680     if( m == izero || n == izero || alpha == zero ){ return; }
00681     
00682     // Otherwise, we need to check the argument list.
00683     if( m < izero ) { 
00684   cout << "BLAS::GER Error: M == " << m << endl;      
00685   BadArgument = true;
00686     }
00687     if( n < izero ) { 
00688   cout << "BLAS::GER Error: N == " << n << endl;      
00689   BadArgument = true;
00690     }
00691     if( lda < m ) { 
00692   cout << "BLAS::GER Error: LDA < MAX(1,M)"<< endl;     
00693   BadArgument = true;
00694     }
00695     if( incx == 0 ) {
00696   cout << "BLAS::GER Error: INCX == 0"<< endl;
00697   BadArgument = true;
00698     }
00699     if( incy == 0 ) {
00700   cout << "BLAS::GER Error: INCY == 0"<< endl;
00701   BadArgument = true;
00702     }
00703 
00704     if(!BadArgument) {
00705       OrdinalType i, j, ix, jy = izero, kx = izero;
00706       ScalarType temp;
00707 
00708       // Set the starting pointers for the vectors x and y if incx/y < 0.
00709       if (incx < izero) { kx = (-m+ione)*incx; }
00710       if (incy < izero) { jy = (-n+ione)*incy; }
00711 
00712       // Start the operations for incx == 1
00713       if( incx == ione ) {
00714   for( j=izero; j<n; j++ ) {
00715     if ( y[jy] != zero ) {
00716       temp = alpha*y[jy];
00717       for ( i=izero; i<m; i++ ) {
00718         A[j*lda + i] += x[i]*temp;
00719       }
00720     }
00721     jy += incy;
00722   }
00723       } 
00724       else { // Start the operations for incx != 1
00725   for( j=izero; j<n; j++ ) {
00726     if ( y[jy] != zero ) {
00727       temp = alpha*y[jy];
00728       ix = kx;
00729       for( i=izero; i<m; i++ ) {
00730         A[j*lda + i] += x[ix]*temp;
00731         ix += incx;
00732       }
00733     }
00734     jy += incy;
00735   }
00736       }
00737     } /* if(!BadArgument) */
00738   } /* end GER */
00739   
00740 //------------------------------------------------------------------------------------------
00741 //      LEVEL 3 BLAS ROUTINES
00742 //------------------------------------------------------------------------------------------
00743         
00744   template<typename OrdinalType, typename ScalarType>
00745   void BLAS<OrdinalType, ScalarType>::GEMM(ETransp transa, ETransp transb, const OrdinalType m, const OrdinalType n, const OrdinalType k, const ScalarType alpha, const ScalarType* A, const OrdinalType lda, const ScalarType* B, const OrdinalType ldb, const ScalarType beta, ScalarType* C, const OrdinalType ldc) const
00746   {
00747     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00748     ScalarType zero = ScalarTraits<ScalarType>::zero();
00749     ScalarType one = ScalarTraits<ScalarType>::one();
00750     OrdinalType i, j, p;
00751     OrdinalType NRowA = m, NRowB = k;
00752     ScalarType temp;
00753     bool BadArgument = false;
00754 
00755     // Change dimensions of matrix if either matrix is transposed
00756     if( !(ETranspChar[transa]=='N') ) {
00757       NRowA = k;
00758     }
00759     if( !(ETranspChar[transb]=='N') ) {
00760       NRowB = n;
00761     }
00762 
00763     // Quick return if there is nothing to do!
00764     if( (m==izero) || (n==izero) || (((alpha==zero)||(k==izero)) && (beta==one)) ){ return; }
00765     if( m < izero ) { 
00766       cout << "BLAS::GEMM Error: M == " << m << endl;     
00767       BadArgument = true;
00768     }
00769     if( n < izero ) { 
00770       cout << "BLAS::GEMM Error: N == " << n << endl;     
00771       BadArgument = true;
00772     }
00773     if( k < izero ) { 
00774       cout << "BLAS::GEMM Error: K == " << k << endl;     
00775       BadArgument = true;
00776     }
00777     if( lda < NRowA ) { 
00778       cout << "BLAS::GEMM Error: LDA < MAX(1,M)"<< endl;      
00779       BadArgument = true;
00780     }
00781     if( ldb < NRowB ) { 
00782       cout << "BLAS::GEMM Error: LDB < MAX(1,K)"<< endl;      
00783       BadArgument = true;
00784     }
00785      if( ldc < m ) { 
00786       cout << "BLAS::GEMM Error: LDC < MAX(1,M)"<< endl;      
00787       BadArgument = true;
00788     }
00789 
00790     if(!BadArgument) {
00791 
00792       // Only need to scale the resulting matrix C.
00793       if( alpha == zero ) {
00794   if( beta == zero ) {
00795     for (j=izero; j<n; j++) {
00796       for (i=izero; i<m; i++) {
00797         C[j*ldc + i] = zero;
00798       }
00799     }
00800   } else {
00801     for (j=izero; j<n; j++) {
00802       for (i=izero; i<m; i++) {
00803         C[j*ldc + i] *= beta;
00804       }
00805     }
00806   }
00807   return;
00808       }
00809       //
00810       // Now start the operations.
00811       //
00812       if ( ETranspChar[transb]=='N' ) {
00813   if ( ETranspChar[transa]=='N' ) {
00814     // Compute C = alpha*A*B + beta*C
00815     for (j=izero; j<n; j++) {
00816       if( beta == zero ) {
00817         for (i=izero; i<m; i++){
00818     C[j*ldc + i] = zero;
00819         }
00820       } else if( beta != one ) {
00821         for (i=izero; i<m; i++){
00822     C[j*ldc + i] *= beta;
00823         }
00824       }
00825       for (p=izero; p<k; p++){
00826         if (B[j*ldb + p] != zero ){
00827     temp = alpha*B[j*ldb + p];
00828     for (i=izero; i<m; i++) {
00829       C[j*ldc + i] += temp*A[p*lda + i];
00830     }
00831         }
00832       }
00833     }
00834   } else {
00835     // Compute C = alpha*A'*B + beta*C
00836     for (j=izero; j<n; j++) {
00837       for (i=izero; i<m; i++) {
00838         temp = zero;
00839         for (p=izero; p<k; p++) {
00840     temp += A[i*lda + p]*B[j*ldb + p];
00841         }
00842         if (beta == zero) {
00843     C[j*ldc + i] = alpha*temp;
00844         } else {
00845     C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
00846         }
00847       }
00848     }
00849   }
00850       } else {
00851   if ( ETranspChar[transa]=='N' ) {
00852     // Compute C = alpha*A*B' + beta*C
00853     for (j=izero; j<n; j++) {
00854       if (beta == zero) {
00855         for (i=izero; i<m; i++) {
00856     C[j*ldc + i] = zero;
00857         } 
00858       } else if ( beta != one ) {
00859         for (i=izero; i<m; i++) {
00860     C[j*ldc + i] *= beta;
00861         }
00862       }
00863       for (p=izero; p<k; p++) {
00864         if (B[p*ldb + j] != zero) {
00865     temp = alpha*B[p*ldb + j];
00866     for (i=izero; i<m; i++) {
00867       C[j*ldc + i] += temp*A[p*lda + i];
00868     }
00869         }
00870       }
00871     }
00872   } else {
00873     // Compute C += alpha*A'*B' + beta*C
00874     for (j=izero; j<n; j++) {
00875       for (i=izero; i<m; i++) {
00876         temp = zero;
00877         for (p=izero; p<k; p++) {
00878     temp += A[i*lda + p]*B[p*ldb + j];
00879         }
00880         if (beta == zero) {
00881     C[j*ldc + i] = alpha*temp;
00882         } else {
00883     C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
00884         }
00885       }
00886     }
00887   } // end if (ETranspChar[transa]=='N') ...
00888       } // end if (ETranspChar[transb]=='N') ...
00889     } // end if (!BadArgument) ...
00890   } // end of GEMM
00891 
00892 
00893   template<typename OrdinalType, typename ScalarType>
00894   void BLAS<OrdinalType, ScalarType>::SYMM(ESide side, EUplo uplo, const OrdinalType m, const OrdinalType n, const ScalarType alpha, const ScalarType* A, const OrdinalType lda, const ScalarType* B, const OrdinalType ldb, const ScalarType beta, ScalarType* C, const OrdinalType ldc) const
00895   {
00896     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00897     OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00898     ScalarType zero = ScalarTraits<ScalarType>::zero();
00899     ScalarType one = ScalarTraits<ScalarType>::one();
00900     OrdinalType i, j, k, NRowA = m;
00901     ScalarType temp1, temp2;
00902     bool BadArgument = false;
00903     bool Upper = (EUploChar[uplo] == 'U');
00904     if (ESideChar[side] == 'R') { NRowA = n; }
00905     
00906     // Quick return.
00907     if ( (m==izero) || (n==izero) || ( (alpha==zero)&&(beta==one) ) ) { return; }
00908     if( m < 0 ) { 
00909       cout << "BLAS::SYMM Error: M == "<< m << endl;
00910       BadArgument = true; }
00911     if( n < 0 ) {
00912       cout << "BLAS::SYMM Error: N == "<< n << endl;
00913       BadArgument = true; }
00914     if( lda < NRowA ) {
00915       cout << "BLAS::SYMM Error: LDA == "<<lda<<endl;
00916       BadArgument = true; }
00917     if( ldb < m ) {
00918       cout << "BLAS::SYMM Error: LDB == "<<ldb<<endl;
00919       BadArgument = true; }
00920     if( ldc < m ) {
00921       cout << "BLAS::SYMM Error: LDC == "<<ldc<<endl;
00922       BadArgument = true; }
00923 
00924     if(!BadArgument) {
00925 
00926       // Only need to scale C and return.
00927       if (alpha == zero) {
00928   if (beta == zero ) {
00929     for (j=izero; j<n; j++) {
00930       for (i=izero; i<m; i++) {
00931         C[j*ldc + i] = zero;
00932       }
00933     }
00934   } else {
00935     for (j=izero; j<n; j++) {
00936       for (i=izero; i<m; i++) {
00937         C[j*ldc + i] *= beta;
00938       }
00939     }
00940   }
00941   return;
00942       }
00943 
00944       if ( ESideChar[side] == 'L') {
00945   // Compute C = alpha*A*B + beta*C
00946 
00947   if (Upper) {
00948     // The symmetric part of A is stored in the upper triangular part of the matrix.
00949     for (j=izero; j<n; j++) {
00950       for (i=izero; i<m; i++) {
00951         temp1 = alpha*B[j*ldb + i];
00952         temp2 = zero;
00953         for (k=izero; k<i; k++) {
00954     C[j*ldc + k] += temp1*A[i*lda + k];
00955     temp2 += B[j*ldb + k]*A[i*lda + k];
00956         }
00957         if (beta == zero) {
00958     C[j*ldc + i] = temp1*A[i*lda + i] + alpha*temp2;
00959         } else {
00960     C[j*ldc + i] = beta*C[j*ldc + i] + temp1*A[i*lda + i] + alpha*temp2;
00961         }
00962       }
00963     }
00964   } else {
00965     // The symmetric part of A is stored in the lower triangular part of the matrix.
00966     for (j=izero; j<n; j++) {
00967       for (i=m-ione; i>-ione; i--) {
00968         temp1 = alpha*B[j*ldb + i];
00969         temp2 = zero;
00970         for (k=i+ione; k<m; k++) {
00971     C[j*ldc + k] += temp1*A[i*lda + k];
00972     temp2 += B[j*ldb + k]*A[i*lda + k];
00973         }
00974         if (beta == zero) {
00975     C[j*ldc + i] = temp1*A[i*lda + i] + alpha*temp2;
00976         } else {
00977     C[j*ldc + i] = beta*C[j*ldc + i] + temp1*A[i*lda + i] + alpha*temp2;
00978         }
00979       }
00980     }
00981   }
00982       } else {
00983   // Compute C = alpha*B*A + beta*C.
00984   for (j=izero; j<n; j++) {
00985     temp1 = alpha*A[j*lda + j];
00986     if (beta == zero) {
00987       for (i=izero; i<m; i++) {
00988         C[j*ldc + i] = temp1*B[j*ldb + i];
00989       }
00990     } else {
00991       for (i=izero; i<m; i++) {
00992         C[j*ldc + i] = beta*C[j*ldc + i] + temp1*B[j*ldb + i];
00993       }
00994     }
00995     for (k=izero; k<j; k++) {
00996       if (Upper) {
00997         temp1 = alpha*A[j*lda + k];
00998       } else {
00999         temp1 = alpha*A[k*lda + j];
01000       }
01001       for (i=izero; i<m; i++) {
01002         C[j*ldc + i] += temp1*B[k*ldb + i];
01003       }
01004     }
01005     for (k=j+ione; k<n; k++) {
01006       if (Upper) {
01007         temp1 = alpha*A[k*lda + j];
01008       } else {
01009         temp1 = alpha*A[j*lda + k];
01010       }
01011       for (i=izero; i<m; i++) {
01012         C[j*ldc + i] += temp1*B[k*ldb + i];
01013       }
01014     }
01015   }
01016       } // end if (ESideChar[side]=='L') ...
01017     } // end if(!BadArgument) ...
01018 } // end SYMM
01019   
01020   template<typename OrdinalType, typename ScalarType>
01021   void BLAS<OrdinalType, ScalarType>::TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, const ScalarType alpha, const ScalarType* A, const OrdinalType lda, ScalarType* B, const OrdinalType ldb) const
01022   {
01023     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
01024     OrdinalType ione = OrdinalTraits<OrdinalType>::one();
01025     ScalarType zero = ScalarTraits<ScalarType>::zero();
01026     ScalarType one = ScalarTraits<ScalarType>::one();
01027     OrdinalType i, j, k, NRowA = m;
01028     ScalarType temp;
01029     bool BadArgument = false;
01030     bool LSide = (ESideChar[side] == 'L');
01031     bool NoUnit = (EDiagChar[diag] == 'N');
01032     bool Upper = (EUploChar[uplo] == 'U');
01033 
01034     if(!LSide) { NRowA = n; }
01035 
01036     // Quick return.
01037     if (n==izero || m==izero) { return; }
01038     if( m < 0 ) {
01039       cout << "BLAS::TRMM Error: M == "<< m <<endl;
01040       BadArgument = true; }
01041     if( n < 0 ) {
01042       cout << "BLAS::TRMM Error: N == "<< n <<endl;
01043       BadArgument = true; }
01044     if( lda < NRowA ) {
01045       cout << "BLAS::TRMM Error: LDA == "<< lda << endl;
01046       BadArgument = true; }
01047     if( ldb < m ) {
01048       cout << "BLAS::TRMM Error: M == "<< ldb << endl;
01049       BadArgument = true; }
01050 
01051     if(!BadArgument) {
01052 
01053       // B only needs to be zeroed out.
01054       if( alpha == zero ) {
01055   for( j=izero; j<n; j++ ) {
01056     for( i=izero; i<m; i++ ) {
01057       B[j*ldb + i] = zero;
01058     }
01059   }
01060   return;
01061       }
01062       
01063       //  Start the computations. 
01064       if ( LSide ) {
01065   // A is on the left side of B.
01066   
01067   if ( ETranspChar[transa]=='N' ) {
01068     // Compute B = alpha*A*B
01069 
01070     if ( Upper ) {
01071       // A is upper triangular
01072       for( j=izero; j<n; j++ ) {
01073         for( k=izero; k<m; k++) {
01074     if ( B[j*ldb + k] != zero ) {
01075       temp = alpha*B[j*ldb + k];
01076       for( i=izero; i<k; i++ ) {
01077         B[j*ldb + i] += temp*A[k*lda + i];
01078       }
01079       if ( NoUnit )
01080         temp *=A[k*lda + k];
01081       B[j*ldb + k] = temp;
01082     }
01083         }
01084       }
01085     } else {
01086       // A is lower triangular
01087       for( j=izero; j<n; j++ ) {
01088         for( k=m-ione; k>-ione; k-- ) {
01089     if( B[j*ldb + k] != zero ) {
01090       temp = alpha*B[j*ldb + k];
01091       B[j*ldb + k] = temp;
01092       if ( NoUnit )
01093         B[j*ldb + k] *= A[k*lda + k];
01094       for( i=k+ione; i<m; i++ ) {
01095         B[j*ldb + i] += temp*A[k*lda + i];
01096       }
01097     }
01098         }
01099       }
01100     }
01101   } else {
01102     // Compute B = alpha*A'*B
01103     if( Upper ) {
01104       for( j=izero; j<n; j++ ) {
01105         for( i=m-ione; i>-ione; i-- ) {
01106     temp = B[j*ldb + i];
01107     if( NoUnit )
01108       temp *= A[i*lda + i];
01109     for( k=izero; k<i; k++ ) {
01110       temp += A[i*lda + k]*B[j*ldb + k];
01111     }
01112     B[j*ldb + i] = alpha*temp;
01113         }
01114       }
01115     } else {
01116       for( j=izero; j<n; j++ ) {
01117         for( i=izero; i<m; i++ ) {
01118     temp = B[j*ldb + i];
01119     if( NoUnit ) 
01120       temp *= A[i*lda + i];
01121     for( k=i+ione; k<m; k++ ) {
01122       temp += A[i*lda + k]*B[j*ldb + k];
01123     }
01124     B[j*ldb + i] = alpha*temp;
01125         }
01126       }
01127     }
01128   }
01129       } else {
01130   // A is on the right hand side of B.
01131   
01132   if( ETranspChar[transa] == 'N' ) {
01133     // Compute B = alpha*B*A
01134 
01135     if( Upper ) {
01136       // A is upper triangular.
01137       for( j=n-ione; j>-ione; j-- ) {
01138         temp = alpha;
01139         if( NoUnit )
01140     temp *= A[j*lda + j];
01141         for( i=izero; i<m; i++ ) {
01142     B[j*ldb + i] *= temp;
01143         }
01144         for( k=izero; k<j; k++ ) {
01145     if( A[j*lda + k] != zero ) {
01146       temp = alpha*A[j*lda + k];
01147       for( i=izero; i<m; i++ ) {
01148         B[j*ldb + i] += temp*B[k*ldb + i];
01149       }
01150     }
01151         }
01152       }
01153     } else {
01154       // A is lower triangular.
01155       for( j=izero; j<n; j++ ) {
01156         temp = alpha;
01157         if( NoUnit )
01158     temp *= A[j*lda + j];
01159         for( i=izero; i<m; i++ ) {
01160     B[j*ldb + i] *= temp;
01161         }
01162         for( k=j+ione; k<n; k++ ) {
01163     if( A[j*lda + k] != zero ) {
01164       temp = alpha*A[j*lda + k];
01165       for( i=izero; i<m; i++ ) {
01166         B[j*ldb + i] += temp*B[k*ldb + i];
01167       }
01168     }
01169         }
01170       }
01171     }
01172   } else {
01173     // Compute B = alpha*B*A'
01174 
01175     if( Upper ) {
01176       for( k=izero; k<n; k++ ) {
01177         for( j=izero; j<k; j++ ) {
01178     if( A[k*lda + j] != zero ) {
01179       temp = alpha*A[k*lda + j];
01180       for( i=izero; i<m; i++ ) {
01181         B[j*ldb + i] += temp*B[k*ldb + i];
01182       }
01183     }
01184         }
01185         temp = alpha;
01186         if( NoUnit ) 
01187     temp *= A[k*lda + k];
01188         if( temp != one ) {
01189     for( i=izero; i<m; i++ ) {
01190       B[k*ldb + i] *= temp;
01191     }
01192         }
01193       }
01194     } else {
01195       for( k=n-ione; k>-ione; k-- ) {
01196         for( j=k+ione; j<n; j++ ) {
01197     if( A[k*lda + j] != zero ) {
01198       temp = alpha*A[k*lda + j];
01199       for( i=izero; i<m; i++ ) {
01200         B[j*ldb + i] += temp*B[k*ldb + i];
01201       }
01202     }
01203         }
01204         temp = alpha;
01205         if( NoUnit )
01206     temp *= A[k*lda + k];
01207         if( temp != one ) {
01208     for( i=izero; i<m; i++ ) {
01209       B[k*ldb + i] *= temp;
01210     }
01211         }
01212       }
01213     }
01214   } // end if( ETranspChar[transa] == 'N' ) ...
01215       } // end if ( LSide ) ...
01216     } // end if (!BadArgument)
01217   } // end TRMM
01218   
01219   template<typename OrdinalType, typename ScalarType>
01220   void BLAS<OrdinalType, ScalarType>::TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, const ScalarType alpha, const ScalarType* A, const OrdinalType lda, ScalarType* B, const OrdinalType ldb) const
01221   {
01222     OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
01223     OrdinalType ione = OrdinalTraits<OrdinalType>::one();
01224     ScalarType zero = ScalarTraits<ScalarType>::zero();
01225     ScalarType one = ScalarTraits<ScalarType>::one();
01226     ScalarType temp;
01227     OrdinalType NRowA = m;
01228     bool BadArgument = false;
01229     bool NoUnit = (EDiagChar[diag]=='N');
01230     
01231     if (!(ESideChar[side] == 'L')) { NRowA = n; }
01232 
01233     // Quick return.
01234     if (n == izero || m == izero) { return; }
01235     if( m < izero ) {
01236       cout << "BLAS::TRSM Error: M == "<<m<<endl;
01237       BadArgument = true; }
01238     if( n < izero ) {
01239       cout << "BLAS::TRSM Error: N == "<<n<<endl;
01240       BadArgument = true; }
01241     if( lda < NRowA ) {
01242       cout << "BLAS::TRSM Error: LDA == "<<lda<<endl;
01243       BadArgument = true; }
01244     if( ldb < m ) {
01245       cout << "BLAS::TRSM Error: LDB == "<<ldb<<endl;
01246       BadArgument = true; }
01247 
01248     if(!BadArgument)
01249       {
01250   int i, j, k;
01251   // Set the solution to the zero vector.
01252   if(alpha == zero) {
01253       for(j = izero; j < n; j++) {
01254         for( i = izero; i < m; i++) {
01255         B[j*ldb+i] = zero;
01256           }
01257       }
01258   }
01259   else 
01260   { // Start the operations.
01261       if(ESideChar[side] == 'L') {
01262     //
01263         // Perform computations for OP(A)*X = alpha*B     
01264     //
01265     if(ETranspChar[transa] == 'N') {
01266         //
01267         //  Compute B = alpha*inv( A )*B
01268         //
01269         if(EUploChar[uplo] == 'U') { 
01270       // A is upper triangular.
01271       for(j = izero; j < n; j++) {
01272               // Perform alpha*B if alpha is not 1.
01273               if(alpha != one) {
01274                 for( i = izero; i < m; i++) {
01275                 B[j*ldb+i] *= alpha;
01276             }
01277           }
01278           // Perform a backsolve for column j of B.
01279           for(k = (m - ione); k > -ione; k--) {
01280         // If this entry is zero, we don't have to do anything.
01281         if (B[j*ldb + k] != zero) {
01282             if (NoUnit) {
01283           B[j*ldb + k] /= A[k*lda + k];
01284             }
01285             for(i = izero; i < k; i++) {
01286           B[j*ldb + i] -= B[j*ldb + k] * A[k*lda + i];
01287             }
01288         }
01289           }
01290       }
01291         }
01292         else 
01293         { // A is lower triangular.
01294                         for(j = izero; j < n; j++) {
01295                             // Perform alpha*B if alpha is not 1.
01296                             if(alpha != one) {
01297                                 for( i = izero; i < m; i++) {
01298                                     B[j*ldb+i] *= alpha;
01299                                 }
01300                             }
01301                             // Perform a forward solve for column j of B.
01302                             for(k = izero; k < m; k++) {
01303                                 // If this entry is zero, we don't have to do anything.
01304                                 if (B[j*ldb + k] != zero) {   
01305                                     if (NoUnit) {
01306                                         B[j*ldb + k] /= A[k*lda + k];
01307                                     }
01308                                     for(i = k+ione; i < m; i++) {
01309                                         B[j*ldb + i] -= B[j*ldb + k] * A[k*lda + i];
01310                                     }
01311                                 }
01312                             }
01313                         }
01314         } // end if (uplo == 'U')
01315     }  // if (transa =='N') 
01316         else { 
01317         //
01318         //  Compute B = alpha*inv( A' )*B
01319         //
01320         if(EUploChar[uplo] == 'U') { 
01321       // A is upper triangular.
01322       for(j = izero; j < n; j++) {
01323                   for( i = izero; i < m; i++) {
01324             temp = alpha*B[j*ldb+i];
01325             for(k = izero; k < i; k++) {
01326             temp -= A[i*lda + k] * B[j*ldb + k];
01327         }
01328         if (NoUnit) {
01329             temp /= A[i*lda + i];
01330         }
01331         B[j*ldb + i] = temp;
01332           }
01333       }
01334         }
01335         else
01336         { // A is lower triangular.
01337                         for(j = izero; j < n; j++) {
01338                             for(i = (m - ione) ; i > -ione; i--) {
01339                                 temp = alpha*B[j*ldb+i];
01340                               for(k = i+ione; k < m; k++) {
01341             temp -= A[i*lda + k] * B[j*ldb + k];
01342         }
01343         if (NoUnit) {
01344             temp /= A[i*lda + i];
01345         }
01346         B[j*ldb + i] = temp;
01347                             }
01348                         }
01349         }
01350     }
01351       }  // if (side == 'L')
01352       else { 
01353          // side == 'R'
01354          //
01355          // Perform computations for X*OP(A) = alpha*B      
01356          //
01357         if (ETranspChar[transa] == 'N') {
01358         //
01359         //  Compute B = alpha*B*inv( A )
01360         //
01361         if(EUploChar[uplo] == 'U') { 
01362       // A is upper triangular.
01363           // Perform a backsolve for column j of B.
01364       for(j = izero; j < n; j++) {
01365               // Perform alpha*B if alpha is not 1.
01366               if(alpha != one) {
01367                 for( i = izero; i < m; i++) {
01368                 B[j*ldb+i] *= alpha;
01369             }
01370           }
01371           for(k = izero; k < j; k++) {
01372         // If this entry is zero, we don't have to do anything.
01373         if (A[j*lda + k] != zero) {
01374             for(i = izero; i < m; i++) {
01375           B[j*ldb + i] -= A[j*lda + k] * B[k*ldb + i];
01376             }
01377         }
01378           }
01379           if (NoUnit) {
01380         temp = one/A[j*lda + j];
01381         for(i = izero; i < m; i++) {
01382             B[j*ldb + i] *= temp;
01383         }
01384           }
01385       }
01386         }
01387         else 
01388         { // A is lower triangular.
01389                         for(j = (n - ione); j > -ione; j--) {
01390                             // Perform alpha*B if alpha is not 1.
01391                             if(alpha != one) {
01392                                 for( i = izero; i < m; i++) {
01393                                     B[j*ldb+i] *= alpha;
01394                                 }
01395                             }
01396                             // Perform a forward solve for column j of B.
01397                             for(k = j+ione; k < n; k++) {
01398                                 // If this entry is zero, we don't have to do anything.
01399         if (A[j*lda + k] != zero) {
01400             for(i = izero; i < m; i++) {
01401                                         B[j*ldb + i] -= A[j*lda + k] * B[k*ldb + i]; 
01402                                     }
01403                                 } 
01404                             }
01405           if (NoUnit) {
01406         temp = one/A[j*lda + j];
01407         for(i = izero; i < m; i++) {
01408             B[j*ldb + i] *= temp;
01409         }
01410           }     
01411                         }
01412         } // end if (uplo == 'U')
01413     }  // if (transa =='N') 
01414         else { 
01415         //
01416         //  Compute B = alpha*B*inv( A' )
01417         //
01418         if(EUploChar[uplo] == 'U') { 
01419       // A is upper triangular.
01420       for(k = (n - ione); k > -ione; k--) {
01421           if (NoUnit) {
01422         temp = one/A[k*lda + k];
01423                     for(i = izero; i < m; i++) {
01424                 B[k*ldb + i] *= temp;
01425         }
01426           }
01427           for(j = izero; j < k; j++) {
01428         if (A[k*lda + j] != zero) {
01429             temp = A[k*lda + j];
01430             for(i = izero; i < m; i++) {
01431           B[j*ldb + i] -= temp*B[k*ldb + i];
01432             }
01433         }
01434           }
01435           if (alpha != one) {
01436         for (i = izero; i < m; i++) {
01437             B[k*ldb + i] *= alpha;
01438         }
01439           }
01440       }
01441         }
01442         else
01443         { // A is lower triangular.
01444       for(k = izero; k < n; k++) {
01445           if (NoUnit) {
01446         temp = one/A[k*lda + k];
01447         for (i = izero; i < m; i++) {
01448             B[k*ldb + i] *= temp;
01449         }
01450           }
01451           for(j = k+ione; j < n; j++) {
01452         if(A[k*lda + j] != zero) {
01453             temp = A[k*lda + j];
01454             for(i = izero; i < m; i++) {
01455           B[j*ldb + i] -= temp*B[k*ldb + i];
01456             }
01457         }
01458           }
01459           if (alpha != one) {
01460         for (i = izero; i < m; i++) {
01461             B[k*ldb + i] *= alpha;
01462         }
01463           }
01464                         }
01465         }
01466     }   
01467       }
01468   }
01469     }
01470   }
01471   
01472 #ifndef DOXYGEN_SHOULD_SKIP_THIS
01473 
01474   template<typename OrdinalType>
01475   class BLAS<OrdinalType, float>
01476   {    
01477   public:
01478     inline BLAS(void) {};
01479     inline BLAS(const BLAS<OrdinalType, float>& BLAS_source) {};
01480     inline virtual ~BLAS(void) {};
01481     void ROTG(float* da, float* db, float* c, float* s) const;
01482     float ASUM(const OrdinalType n, const float* x, const OrdinalType incx) const;
01483     void AXPY(const OrdinalType n, const float alpha, const float* x, const OrdinalType incx, float* y, const OrdinalType incy) const;
01484     void COPY(const OrdinalType n, const float* x, const OrdinalType incx, float* y, const OrdinalType incy) const;
01485     float DOT(const OrdinalType n, const float* x, const OrdinalType incx, const float* y, const OrdinalType incy) const;
01486     float NRM2(const OrdinalType n, const float* x, const OrdinalType incx) const;
01487     void SCAL(const OrdinalType n, const float alpha, float* x, const OrdinalType incx) const;
01488     OrdinalType IAMAX(const OrdinalType n, const float* x, const OrdinalType incx) const;
01489     void GEMV(ETransp trans, const OrdinalType m, const OrdinalType n, const float alpha, const float* A, const OrdinalType lda, const float* x, const OrdinalType incx, const float beta, float* y, const OrdinalType incy) const;
01490     void TRMV(EUplo uplo, ETransp trans, EDiag diag, const OrdinalType n, const float* A, const OrdinalType lda, float* x, const OrdinalType incx) const;
01491     void GER(const OrdinalType m, const OrdinalType n, const float alpha, const float* x, const OrdinalType incx, const float* y, const OrdinalType incy, float* A, const OrdinalType lda) const;
01492     void GEMM(ETransp transa, ETransp transb, const OrdinalType m, const OrdinalType n, const OrdinalType k, const float alpha, const float* A, const OrdinalType lda, const float* B, const OrdinalType ldb, const float beta, float* C, const OrdinalType ldc) const;
01493     void SYMM(ESide side, EUplo uplo, const OrdinalType m, const OrdinalType n, const float alpha, const float* A, const OrdinalType lda, const float *B, const OrdinalType ldb, const float beta, float *C, const OrdinalType ldc) const;
01494     void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, const float alpha, const float* A, const OrdinalType lda, float* B, const OrdinalType ldb) const;
01495     void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, const float alpha, const float* A, const OrdinalType lda, float* B, const OrdinalType ldb) const;
01496   };
01497 
01498   template<typename OrdinalType>
01499   void BLAS<OrdinalType, float>::ROTG(float* da, float* db, float* c, float* s) const
01500   { SROTG_F77(da, db, c, s ); }
01501 
01502   template<typename OrdinalType>
01503   float BLAS<OrdinalType, float>::ASUM(const OrdinalType n, const float* x, const OrdinalType incx) const
01504   { return SASUM_F77(&n, x, &incx); }
01505   
01506   template<typename OrdinalType>
01507   void BLAS<OrdinalType, float>::AXPY(const OrdinalType n, const float alpha, const float* x, const OrdinalType incx, float* y, const OrdinalType incy) const
01508   { SAXPY_F77(&n, &alpha, x, &incx, y, &incy); }
01509   
01510   template<typename OrdinalType>
01511   void BLAS<OrdinalType, float>::COPY(const OrdinalType n, const float* x, const OrdinalType incx, float* y, const OrdinalType incy) const 
01512   { SCOPY_F77(&n, x, &incx, y, &incy); }
01513   
01514   template<typename OrdinalType>
01515   float BLAS<OrdinalType, float>::DOT(const OrdinalType n, const float* x, const OrdinalType incx, const float* y, const OrdinalType incy) const
01516   { return SDOT_F77(&n, x, &incx, y, &incy); }
01517   
01518   template<typename OrdinalType>
01519   OrdinalType BLAS<OrdinalType, float>::IAMAX(const OrdinalType n, const float* x, const OrdinalType incx) const
01520   { return ISAMAX_F77(&n, x, &incx); }
01521 
01522   template<typename OrdinalType>
01523   float BLAS<OrdinalType, float>::NRM2(const OrdinalType n, const float* x, const OrdinalType incx) const
01524   { return SNRM2_F77(&n, x, &incx); }
01525   
01526   template<typename OrdinalType>
01527   void BLAS<OrdinalType, float>::SCAL(const OrdinalType n, const float alpha, float* x, const OrdinalType incx) const
01528   { SSCAL_F77(&n, &alpha, x, &incx); }
01529   
01530   template<typename OrdinalType>
01531   void BLAS<OrdinalType, float>::GEMV(ETransp trans, const OrdinalType m, const OrdinalType n, const float alpha, const float* A, const OrdinalType lda, const float* x, const OrdinalType incx, const float beta, float* y, const OrdinalType incy) const
01532   { SGEMV_F77(CHAR_MACRO(ETranspChar[trans]), &m, &n, &alpha, A, &lda, x, &incx, &beta, y, &incy); }
01533   
01534   template<typename OrdinalType>
01535   void BLAS<OrdinalType, float>::GER(const OrdinalType m, const OrdinalType n, const float alpha, const float* x, const OrdinalType incx, const float* y, const OrdinalType incy, float* A, const OrdinalType lda) const
01536   { SGER_F77(&m, &n, &alpha, x, &incx, y, &incy, A, &lda); }
01537 
01538   template<typename OrdinalType>
01539   void BLAS<OrdinalType, float>::TRMV(EUplo uplo, ETransp trans, EDiag diag, const OrdinalType n, const float* A, const OrdinalType lda, float* x, const OrdinalType incx) const
01540   { STRMV_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), CHAR_MACRO(EDiagChar[diag]), &n, A, &lda, x, &incx); }
01541   
01542   template<typename OrdinalType>
01543   void BLAS<OrdinalType, float>::GEMM(ETransp transa, ETransp transb, const OrdinalType m, const OrdinalType n, const OrdinalType k, const float alpha, const float* A, const OrdinalType lda, const float* B, const OrdinalType ldb, const float beta, float* C, const OrdinalType ldc) const
01544   { SGEMM_F77(CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(ETranspChar[transb]), &m, &n, &k, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
01545   
01546   template<typename OrdinalType>
01547   void BLAS<OrdinalType, float>::SYMM(ESide side, EUplo uplo, const OrdinalType m, const OrdinalType n, const float alpha, const float* A, const OrdinalType lda, const float* B, const OrdinalType ldb, const float beta, float* C, const OrdinalType ldc) const
01548   { SSYMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), &m, &n, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
01549   
01550   template<typename OrdinalType>
01551   void BLAS<OrdinalType, float>::TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, const float alpha, const float* A, const OrdinalType lda, float* B, const OrdinalType ldb) const
01552   { STRMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
01553   
01554   template<typename OrdinalType>
01555   void BLAS<OrdinalType, float>::TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, const float alpha, const float* A, const OrdinalType lda, float* B, const OrdinalType ldb) const
01556   { STRSM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
01557 
01558 
01559   template<typename OrdinalType>
01560   class BLAS<OrdinalType, double>
01561   {    
01562   public:
01563     inline BLAS(void) {};
01564     inline BLAS(const BLAS<OrdinalType, double>& BLAS_source) {};
01565     inline virtual ~BLAS(void) {};
01566     void ROTG(double* da, double* db, double* c, double* s) const;
01567     double ASUM(const OrdinalType n, const double* x, const OrdinalType incx) const;
01568     void AXPY(const OrdinalType n, const double alpha, const double* x, const OrdinalType incx, double* y, const OrdinalType incy) const;
01569     void COPY(const OrdinalType n, const double* x, const OrdinalType incx, double* y, const OrdinalType incy) const;
01570     double DOT(const OrdinalType n, const double* x, const OrdinalType incx, const double* y, const OrdinalType incy) const;
01571     double NRM2(const OrdinalType n, const double* x, const OrdinalType incx) const;
01572     void SCAL(const OrdinalType n, const double alpha, double* x, const OrdinalType incx) const;
01573     OrdinalType IAMAX(const OrdinalType n, const double* x, const OrdinalType incx) const;
01574     void GEMV(ETransp trans, const OrdinalType m, const OrdinalType n, const double alpha, const double* A, const OrdinalType lda, const double* x, const OrdinalType incx, const double beta, double* y, const OrdinalType incy) const;
01575     void TRMV(EUplo uplo, ETransp trans, EDiag diag, const OrdinalType n, const double* A, const OrdinalType lda, double* x, const OrdinalType incx) const;
01576     void GER(const OrdinalType m, const OrdinalType n, const double alpha, const double* x, const OrdinalType incx, const double* y, const OrdinalType incy, double* A, const OrdinalType lda) const;
01577     void GEMM(ETransp transa, ETransp transb, const OrdinalType m, const OrdinalType n, const OrdinalType k, const double alpha, const double* A, const OrdinalType lda, const double* B, const OrdinalType ldb, const double beta, double* C, const OrdinalType ldc) const;
01578     void SYMM(ESide side, EUplo uplo, const OrdinalType m, const OrdinalType n, const double alpha, const double* A, const OrdinalType lda, const double *B, const OrdinalType ldb, const double beta, double *C, const OrdinalType ldc) const;
01579     void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, const double alpha, const double* A, const OrdinalType lda, double* B, const OrdinalType ldb) const;
01580     void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, const double alpha, const double* A, const OrdinalType lda, double* B, const OrdinalType ldb) const;
01581   };
01582   
01583   template<typename OrdinalType>
01584   void BLAS<OrdinalType, double>::ROTG(double* da, double* db, double* c, double* s) const
01585   { DROTG_F77(da, db, c, s); }
01586 
01587   template<typename OrdinalType>
01588   double BLAS<OrdinalType, double>::ASUM(const OrdinalType n, const double* x, const OrdinalType incx) const
01589   { return DASUM_F77(&n, x, &incx); }
01590   
01591   template<typename OrdinalType>
01592   void BLAS<OrdinalType, double>::AXPY(const OrdinalType n, const double alpha, const double* x, const OrdinalType incx, double* y, const OrdinalType incy) const
01593   { DAXPY_F77(&n, &alpha, x, &incx, y, &incy); }
01594   
01595   template<typename OrdinalType>
01596   void BLAS<OrdinalType, double>::COPY(const OrdinalType n, const double* x, const OrdinalType incx, double* y, const OrdinalType incy) const
01597   { DCOPY_F77(&n, x, &incx, y, &incy); }
01598   
01599   template<typename OrdinalType>
01600   double BLAS<OrdinalType, double>::DOT(const OrdinalType n, const double* x, const OrdinalType incx, const double* y, const OrdinalType incy) const
01601   { return DDOT_F77(&n, x, &incx, y, &incy); }
01602   
01603   template<typename OrdinalType>
01604   OrdinalType BLAS<OrdinalType, double>::IAMAX(const OrdinalType n, const double* x, const OrdinalType incx) const
01605   { return IDAMAX_F77(&n, x, &incx); }
01606 
01607   template<typename OrdinalType>
01608   double BLAS<OrdinalType, double>::NRM2(const OrdinalType n, const double* x, const OrdinalType incx) const
01609   { return DNRM2_F77(&n, x, &incx); }
01610   
01611   template<typename OrdinalType>
01612   void BLAS<OrdinalType, double>::SCAL(const OrdinalType n, const double alpha, double* x, const OrdinalType incx) const
01613   { DSCAL_F77(&n, &alpha, x, &incx); }
01614   
01615   template<typename OrdinalType>
01616   void BLAS<OrdinalType, double>::GEMV(ETransp trans, const OrdinalType m, const OrdinalType n, const double alpha, const double* A, const OrdinalType lda, const double* x, const OrdinalType incx, const double beta, double* y, const OrdinalType incy) const
01617   { DGEMV_F77(CHAR_MACRO(ETranspChar[trans]), &m, &n, &alpha, A, &lda, x, &incx, &beta, y, &incy); }
01618   
01619   template<typename OrdinalType>
01620   void BLAS<OrdinalType, double>::GER(const OrdinalType m, const OrdinalType n, const double alpha, const double* x, const OrdinalType incx, const double* y, const OrdinalType incy, double* A, const OrdinalType lda) const
01621   { DGER_F77(&m, &n, &alpha, x, &incx, y, &incy, A, &lda); }
01622 
01623   template<typename OrdinalType>
01624   void BLAS<OrdinalType, double>::TRMV(EUplo uplo, ETransp trans, EDiag diag, const OrdinalType n, const double* A, const OrdinalType lda, double* x, const OrdinalType incx) const
01625   { DTRMV_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), CHAR_MACRO(EDiagChar[diag]), &n, A, &lda, x, &incx); }
01626   
01627   template<typename OrdinalType>
01628   void BLAS<OrdinalType, double>::GEMM(ETransp transa, ETransp transb, const OrdinalType m, const OrdinalType n, const OrdinalType k, const double alpha, const double* A, const OrdinalType lda, const double* B, const OrdinalType ldb, const double beta, double* C, const OrdinalType ldc) const
01629   { DGEMM_F77(CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(ETranspChar[transb]), &m, &n, &k, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
01630   
01631   template<typename OrdinalType>
01632   void BLAS<OrdinalType, double>::SYMM(ESide side, EUplo uplo, const OrdinalType m, const OrdinalType n, const double alpha, const double* A, const OrdinalType lda, const double *B, const OrdinalType ldb, const double beta, double *C, const OrdinalType ldc) const
01633   { DSYMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), &m, &n, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
01634   
01635   template<typename OrdinalType>
01636   void BLAS<OrdinalType, double>::TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, const double alpha, const double* A, const OrdinalType lda, double* B, const OrdinalType ldb) const
01637   { DTRMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
01638 
01639   template<typename OrdinalType>
01640   void BLAS<OrdinalType, double>::TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, const double alpha, const double* A, const OrdinalType lda, double* B, const OrdinalType ldb) const
01641   { DTRSM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
01642   
01643 #ifdef HAVE_TEUCHOS_COMPLEX
01644 
01645   template<typename OrdinalType>
01646   class BLAS<OrdinalType, complex<float> >
01647   {    
01648   public:
01649     inline BLAS(void) {};
01650     inline BLAS(const BLAS<OrdinalType, complex<float> >& BLAS_source) {};
01651     inline virtual ~BLAS(void) {};
01652     void ROTG(complex<float>* da, complex<float>* db, float* c, complex<float>* s) const;
01653     float ASUM(const OrdinalType n, const complex<float>* x, const OrdinalType incx) const;
01654     void AXPY(const OrdinalType n, const complex<float> alpha, const complex<float>* x, const OrdinalType incx, complex<float>* y, const OrdinalType incy) const;
01655     void COPY(const OrdinalType n, const complex<float>* x, const OrdinalType incx, complex<float>* y, const OrdinalType incy) const;
01656     complex<float> DOT(const OrdinalType n, const complex<float>* x, const OrdinalType incx, const complex<float>* y, const OrdinalType incy) const;
01657     float NRM2(const OrdinalType n, const complex<float>* x, const OrdinalType incx) const;
01658     void SCAL(const OrdinalType n, const complex<float> alpha, complex<float>* x, const OrdinalType incx) const;
01659     OrdinalType IAMAX(const OrdinalType n, const complex<float>* x, const OrdinalType incx) const;
01660     void GEMV(ETransp trans, const OrdinalType m, const OrdinalType n, const complex<float> alpha, const complex<float>* A, const OrdinalType lda, const complex<float>* x, const OrdinalType incx, const complex<float> beta, complex<float>* y, const OrdinalType incy) const;
01661     void TRMV(EUplo uplo, ETransp trans, EDiag diag, const OrdinalType n, const complex<float>* A, const OrdinalType lda, complex<float>* x, const OrdinalType incx) const;
01662     void GER(const OrdinalType m, const OrdinalType n, const complex<float> alpha, const complex<float>* x, const OrdinalType incx, const complex<float>* y, const OrdinalType incy, complex<float>* A, const OrdinalType lda) const;
01663     void GEMM(ETransp transa, ETransp transb, const OrdinalType m, const OrdinalType n, const OrdinalType k, const complex<float> alpha, const complex<float>* A, const OrdinalType lda, const complex<float>* B, const OrdinalType ldb, const complex<float> beta, complex<float>* C, const OrdinalType ldc) const;
01664     void SYMM(ESide side, EUplo uplo, const OrdinalType m, const OrdinalType n, const complex<float> alpha, const complex<float>* A, const OrdinalType lda, const complex<float> *B, const OrdinalType ldb, const complex<float> beta, complex<float> *C, const OrdinalType ldc) const;
01665     void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, const complex<float> alpha, const complex<float>* A, const OrdinalType lda, complex<float>* B, const OrdinalType ldb) const;
01666     void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, const complex<float> alpha, const complex<float>* A, const OrdinalType lda, complex<float>* B, const OrdinalType ldb) const;
01667   };
01668 
01669   template<typename OrdinalType>
01670   void BLAS<OrdinalType, complex<float> >::ROTG(complex<float>* da, complex<float>* db, float* c, complex<float>* s) const
01671   { CROTG_F77(da, db, c, s ); }
01672 
01673   template<typename OrdinalType>
01674   float BLAS<OrdinalType, complex<float> >::ASUM(const OrdinalType n, const complex<float>* x, const OrdinalType incx) const
01675   { return CASUM_F77(&n, x, &incx); }
01676   
01677   template<typename OrdinalType>
01678   void BLAS<OrdinalType, complex<float> >::AXPY(const OrdinalType n, const complex<float> alpha, const complex<float>* x, const OrdinalType incx, complex<float>* y, const OrdinalType incy) const
01679   { CAXPY_F77(&n, &alpha, x, &incx, y, &incy); }
01680   
01681   template<typename OrdinalType>
01682   void BLAS<OrdinalType, complex<float> >::COPY(const OrdinalType n, const complex<float>* x, const OrdinalType incx, complex<float>* y, const OrdinalType incy) const
01683   { CCOPY_F77(&n, x, &incx, y, &incy); }
01684   
01685   template<typename OrdinalType>
01686   complex<float> BLAS<OrdinalType, complex<float> >::DOT(const OrdinalType n, const complex<float>* x, const OrdinalType incx, const complex<float>* y, const OrdinalType incy) const
01687   { return CDOT_F77(&n, x, &incx, y, &incy); }
01688   
01689   template<typename OrdinalType>
01690   OrdinalType BLAS<OrdinalType, complex<float> >::IAMAX(const OrdinalType n, const complex<float>* x, const OrdinalType incx) const
01691   { return ICAMAX_F77(&n, x, &incx); }
01692 
01693   template<typename OrdinalType>
01694   float BLAS<OrdinalType, complex<float> >::NRM2(const OrdinalType n, const complex<float>* x, const OrdinalType incx) const
01695   { return CNRM2_F77(&n, x, &incx); }
01696   
01697   template<typename OrdinalType>
01698   void BLAS<OrdinalType, complex<float> >::SCAL(const OrdinalType n, const complex<float> alpha, complex<float>* x, const OrdinalType incx) const
01699   { CSCAL_F77(&n, &alpha, x, &incx); }
01700   
01701   template<typename OrdinalType>
01702   void BLAS<OrdinalType, complex<float> >::GEMV(ETransp trans, const OrdinalType m, const OrdinalType n, const complex<float> alpha, const complex<float>* A, const OrdinalType lda, const complex<float>* x, const OrdinalType incx, const complex<float> beta, complex<float>* y, const OrdinalType incy) const
01703   { CGEMV_F77(CHAR_MACRO(ETranspChar[trans]), &m, &n, &alpha, A, &lda, x, &incx, &beta, y, &incy); }
01704   
01705   template<typename OrdinalType>
01706   void BLAS<OrdinalType, complex<float> >::GER(const OrdinalType m, const OrdinalType n, const complex<float> alpha, const complex<float>* x, const OrdinalType incx, const complex<float>* y, const OrdinalType incy, complex<float>* A, const OrdinalType lda) const
01707   { CGER_F77(&m, &n, &alpha, x, &incx, y, &incy, A, &lda); }
01708 
01709   template<typename OrdinalType>
01710   void BLAS<OrdinalType, complex<float> >::TRMV(EUplo uplo, ETransp trans, EDiag diag, const OrdinalType n, const complex<float>* A, const OrdinalType lda, complex<float>* x, const OrdinalType incx) const
01711   { CTRMV_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), CHAR_MACRO(EDiagChar[diag]), &n, A, &lda, x, &incx); }
01712   
01713   template<typename OrdinalType>
01714   void BLAS<OrdinalType, complex<float> >::GEMM(ETransp transa, ETransp transb, const OrdinalType m, const OrdinalType n, const OrdinalType k, const complex<float> alpha, const complex<float>* A, const OrdinalType lda, const complex<float>* B, const OrdinalType ldb, const complex<float> beta, complex<float>* C, const OrdinalType ldc) const
01715   { CGEMM_F77(CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(ETranspChar[transb]), &m, &n, &k, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); } 
01716  
01717   template<typename OrdinalType>
01718   void BLAS<OrdinalType, complex<float> >::SYMM(ESide side, EUplo uplo, const OrdinalType m, const OrdinalType n, const complex<float> alpha, const complex<float>* A, const OrdinalType lda, const complex<float>* B, const OrdinalType ldb, const complex<float> beta, complex<float>* C, const OrdinalType ldc) const
01719   { CSYMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), &m, &n, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
01720   
01721   template<typename OrdinalType>
01722   void BLAS<OrdinalType, complex<float> >::TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, const complex<float> alpha, const complex<float>* A, const OrdinalType lda, complex<float>* B, const OrdinalType ldb) const
01723   { CTRMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
01724   
01725   template<typename OrdinalType>
01726   void BLAS<OrdinalType, complex<float> >::TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, const complex<float> alpha, const complex<float>* A, const OrdinalType lda, complex<float>* B, const OrdinalType ldb) const
01727   { CTRSM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
01728 
01729 
01730   template<typename OrdinalType>
01731   class BLAS<OrdinalType, complex<double> >
01732   {    
01733   public:
01734     inline BLAS(void) {};
01735     inline BLAS(const BLAS<OrdinalType, complex<double> >& BLAS_source) {};
01736     inline virtual ~BLAS(void) {};
01737     void ROTG(complex<double>* da, complex<double>* db, double* c, complex<double>* s) const;
01738     double ASUM(const OrdinalType n, const complex<double>* x, const OrdinalType incx) const;
01739     void AXPY(const OrdinalType n, const complex<double> alpha, const complex<double>* x, const OrdinalType incx, complex<double>* y, const OrdinalType incy) const;
01740     void COPY(const OrdinalType n, const complex<double>* x, const OrdinalType incx, complex<double>* y, const OrdinalType incy) const;
01741     complex<double> DOT(const OrdinalType n, const complex<double>* x, const OrdinalType incx, const complex<double>* y, const OrdinalType incy) const;
01742     double NRM2(const OrdinalType n, const complex<double>* x, const OrdinalType incx) const;
01743     void SCAL(const OrdinalType n, const complex<double> alpha, complex<double>* x, const OrdinalType incx) const;
01744     OrdinalType IAMAX(const OrdinalType n, const complex<double>* x, const OrdinalType incx) const;
01745     void GEMV(ETransp trans, const OrdinalType m, const OrdinalType n, const complex<double> alpha, const complex<double>* A, const OrdinalType lda, const complex<double>* x, const OrdinalType incx, const complex<double> beta, complex<double>* y, const OrdinalType incy) const;
01746     void TRMV(EUplo uplo, ETransp trans, EDiag diag, const OrdinalType n, const complex<double>* A, const OrdinalType lda, complex<double>* x, const OrdinalType incx) const;
01747     void GER(const OrdinalType m, const OrdinalType n, const complex<double> alpha, const complex<double>* x, const OrdinalType incx, const complex<double>* y, const OrdinalType incy, complex<double>* A, const OrdinalType lda) const;
01748     void GEMM(ETransp transa, ETransp transb, const OrdinalType m, const OrdinalType n, const OrdinalType k, const complex<double> alpha, const complex<double>* A, const OrdinalType lda, const complex<double>* B, const OrdinalType ldb, const complex<double> beta, complex<double>* C, const OrdinalType ldc) const;
01749     void SYMM(ESide side, EUplo uplo, const OrdinalType m, const OrdinalType n, const complex<double> alpha, const complex<double>* A, const OrdinalType lda, const complex<double> *B, const OrdinalType ldb, const complex<double> beta, complex<double> *C, const OrdinalType ldc) const;
01750     void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, const complex<double> alpha, const complex<double>* A, const OrdinalType lda, complex<double>* B, const OrdinalType ldb) const;
01751     void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, const complex<double> alpha, const complex<double>* A, const OrdinalType lda, complex<double>* B, const OrdinalType ldb) const;
01752   };
01753   
01754   template<typename OrdinalType>
01755   void BLAS<OrdinalType, complex<double> >::ROTG(complex<double>* da, complex<double>* db, double* c, complex<double>* s) const
01756   { ZROTG_F77(da, db, c, s); }
01757 
01758   template<typename OrdinalType>
01759   double BLAS<OrdinalType, complex<double> >::ASUM(const OrdinalType n, const complex<double>* x, const OrdinalType incx) const
01760   { return ZASUM_F77(&n, x, &incx); }
01761   
01762   template<typename OrdinalType>
01763   void BLAS<OrdinalType, complex<double> >::AXPY(const OrdinalType n, const complex<double> alpha, const complex<double>* x, const OrdinalType incx, complex<double>* y, const OrdinalType incy) const
01764   { ZAXPY_F77(&n, &alpha, x, &incx, y, &incy); }
01765   
01766   template<typename OrdinalType>
01767   void BLAS<OrdinalType, complex<double> >::COPY(const OrdinalType n, const complex<double>* x, const OrdinalType incx, complex<double>* y, const OrdinalType incy) const
01768   { ZCOPY_F77(&n, x, &incx, y, &incy); }
01769   
01770   template<typename OrdinalType>
01771   complex<double> BLAS<OrdinalType, complex<double> >::DOT(const OrdinalType n, const complex<double>* x, const OrdinalType incx, const complex<double>* y, const OrdinalType incy) const
01772   { return ZDOT_F77(&n, x, &incx, y, &incy); }
01773   
01774   template<typename OrdinalType>
01775   OrdinalType BLAS<OrdinalType, complex<double> >::IAMAX(const OrdinalType n, const complex<double>* x, const OrdinalType incx) const
01776   { return IZAMAX_F77(&n, x, &incx); }
01777 
01778   template<typename OrdinalType>
01779   double BLAS<OrdinalType, complex<double> >::NRM2(const OrdinalType n, const complex<double>* x, const OrdinalType incx) const
01780   { return ZNRM2_F77(&n, x, &incx); }
01781   
01782   template<typename OrdinalType>
01783   void BLAS<OrdinalType, complex<double> >::SCAL(const OrdinalType n, const complex<double> alpha, complex<double>* x, const OrdinalType incx) const
01784   { ZSCAL_F77(&n, &alpha, x, &incx); }
01785   
01786   template<typename OrdinalType>
01787   void BLAS<OrdinalType, complex<double> >::GEMV(ETransp trans, const OrdinalType m, const OrdinalType n, const complex<double> alpha, const complex<double>* A, const OrdinalType lda, const complex<double>* x, const OrdinalType incx, const complex<double> beta, complex<double>* y, const OrdinalType incy) const
01788   { ZGEMV_F77(CHAR_MACRO(ETranspChar[trans]), &m, &n, &alpha, A, &lda, x, &incx, &beta, y, &incy); }
01789   
01790   template<typename OrdinalType>
01791   void BLAS<OrdinalType, complex<double> >::GER(const OrdinalType m, const OrdinalType n, const complex<double> alpha, const complex<double>* x, const OrdinalType incx, const complex<double>* y, const OrdinalType incy, complex<double>* A, const OrdinalType lda) const
01792   { ZGER_F77(&m, &n, &alpha, x, &incx, y, &incy, A, &lda); }
01793 
01794   template<typename OrdinalType>
01795   void BLAS<OrdinalType, complex<double> >::TRMV(EUplo uplo, ETransp trans, EDiag diag, const OrdinalType n, const complex<double>* A, const OrdinalType lda, complex<double>* x, const OrdinalType incx) const
01796   { ZTRMV_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), CHAR_MACRO(EDiagChar[diag]), &n, A, &lda, x, &incx); }
01797   
01798   template<typename OrdinalType>
01799   void BLAS<OrdinalType, complex<double> >::GEMM(ETransp transa, ETransp transb, const OrdinalType m, const OrdinalType n, const OrdinalType k, const complex<double> alpha, const complex<double>* A, const OrdinalType lda, const complex<double>* B, const OrdinalType ldb, const complex<double> beta, complex<double>* C, const OrdinalType ldc) const
01800   { ZGEMM_F77(CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(ETranspChar[transb]), &m, &n, &k, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
01801   
01802   template<typename OrdinalType>
01803   void BLAS<OrdinalType, complex<double> >::SYMM(ESide side, EUplo uplo, const OrdinalType m, const OrdinalType n, const complex<double> alpha, const complex<double>* A, const OrdinalType lda, const complex<double> *B, const OrdinalType ldb, const complex<double> beta, complex<double> *C, const OrdinalType ldc) const
01804   { ZSYMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), &m, &n, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
01805   
01806   template<typename OrdinalType>
01807   void BLAS<OrdinalType, complex<double> >::TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, const complex<double> alpha, const complex<double>* A, const OrdinalType lda, complex<double>* B, const OrdinalType ldb) const
01808   { ZTRMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
01809 
01810   template<typename OrdinalType>
01811   void BLAS<OrdinalType, complex<double> >::TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, const complex<double> alpha, const complex<double>* A, const OrdinalType lda, complex<double>* B, const OrdinalType ldb) const
01812   { ZTRSM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
01813   
01814 #endif // HAVE_TEUCHOS_COMPLEX
01815 
01816 #endif // DOXYGEN_SHOULD_SKIP_THIS
01817 
01818 } // namespace Teuchos
01819 
01820 #endif // _TEUCHOS_BLAS_HPP_

Generated on Thu Sep 18 12:41:17 2008 for Teuchos - Trilinos Tools Package by doxygen 1.3.9.1