Teuchos Package Browser (Single Doxygen Collection) Version of the Day
Teuchos_BLAS.cpp
Go to the documentation of this file.
00001 // @HEADER
00002 // ***********************************************************************
00003 //
00004 //                    Teuchos: Common Tools Package
00005 //                 Copyright (2004) Sandia Corporation
00006 //
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 //
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00038 //
00039 // ***********************************************************************
00040 // @HEADER
00041 
00042 #include "Teuchos_BLAS.hpp"
00043 #include "Teuchos_BLAS_wrappers.hpp"
00044 
00045 /* for INTEL_CXML, the second arg may need to be changed to 'one'.  If so
00046 the appropriate declaration of one will need to be added back into
00047 functions that include the macro:
00048 */
00049 
00050 namespace {
00051 #if defined (INTEL_CXML)
00052         unsigned int one=1;
00053 #endif
00054 } // namespace
00055 
00056 #ifdef CHAR_MACRO
00057 #undef CHAR_MACRO
00058 #endif
00059 #if defined (INTEL_CXML)
00060 #define CHAR_MACRO(char_var) &char_var, one
00061 #else
00062 #define CHAR_MACRO(char_var) &char_var
00063 #endif
00064 
00065 
00066 const char Teuchos::ESideChar[] = {'L' , 'R' };
00067 const char Teuchos::ETranspChar[] = {'N' , 'T' , 'C' };
00068 const char Teuchos::EUploChar[] = {'U' , 'L' };
00069 const char Teuchos::EDiagChar[] = {'U' , 'N' };
00070 const char Teuchos::ETypeChar[] = {'G' , 'L', 'U', 'H', 'B', 'Q', 'Z' };
00071 //const char Teuchos::EFactChar[] = {'F', 'N' };
00072 //const char Teuchos::ENormChar[] = {'O', 'I' };
00073 //const char Teuchos::ECompQChar[] = {'N', 'I', 'V' };
00074 //const char Teuchos::EJobChar[] = {'E', 'V', 'B' };
00075 //const char Teuchos::EJobSChar[] = {'E', 'S' };
00076 //const char Teuchos::EJobVSChar[] = {'V', 'N' };
00077 //const char Teuchos::EHowmnyChar[] = {'A', 'S' };
00078 //const char Teuchos::ECMachChar[] = {'E', 'S', 'B', 'P', 'N', 'R', 'M', 'U', 'L', 'O' };
00079 //const char Teuchos::ESortChar[] = {'N', 'S'};
00080 
00081 
00082 namespace {
00083 
00084 
00085 template<typename Scalar>
00086 Scalar generic_dot(const int n, const Scalar* x, const int incx,
00087   const Scalar* y, const int incy)
00088 {
00089   typedef Teuchos::ScalarTraits<Scalar> ST;
00090   Scalar dot = 0.0;
00091   if (incx==1 && incy==1) {
00092     for (int i = 0; i < n; ++i)
00093       dot += (*x++)*ST::conjugate(*y++);
00094   }
00095   else {
00096     if (incx < 0)
00097       x = x - incx*(n-1);
00098     if (incy < 0)
00099       y = y - incy*(n-1);
00100     for (int i = 0; i < n; ++i, x+=incx, y+=incy)
00101       dot += (*x)*ST::conjugate(*y);
00102   }
00103   return dot;
00104 }
00105 
00106 
00107 } // namespace
00108 
00109 
00110 namespace Teuchos {
00111 
00112 //Explicitly instantiating these templates for windows due to an issue with
00113 //resolving them when linking dlls.
00114 #ifdef _WIN32
00115 #  ifdef HAVE_TEUCHOS_COMPLEX
00116      template BLAS<long int, std::complex<float> >;
00117      template BLAS<long int, std::complex<double> >;
00118 #  endif
00119      template BLAS<long int, float>;
00120      template BLAS<long int, double>;
00121 #endif
00122 
00123   // *************************** BLAS<int,float> DEFINITIONS ******************************
00124 
00125   void BLAS<int, float>::ROTG(float* da, float* db, float* c, float* s) const
00126   { SROTG_F77(da, db, c, s ); }
00127 
00128   void BLAS<int, float>::ROT(const int n, float* dx, const int incx, float* dy, const int incy, float* c, float* s) const
00129   { SROT_F77(&n, dx, &incx, dy, &incy, c, s); }
00130 
00131 
00132   float BLAS<int, float>::ASUM(const int n, const float* x, const int incx) const
00133   {
00134 #if defined(HAVE_TEUCHOS_BLASFLOAT_APPLE_VECLIB_BUGFIX)
00135     return cblas_sasum(n, x, incx);
00136 #elif defined(HAVE_TEUCHOS_BLASFLOAT)
00137     float tmp = SASUM_F77(&n, x, &incx);
00138     return tmp;
00139 #else
00140     typedef ScalarTraits<float> ST;
00141     float sum = 0.0;
00142     if (incx == 1) {
00143       for (int i = 0; i < n; ++i)
00144         sum += ST::magnitude(*x++);
00145     }
00146     else {
00147       for (int i = 0; i < n; ++i, x+=incx)
00148         sum += ST::magnitude(*x);
00149     }
00150     return sum;
00151 #endif
00152   }
00153 
00154   void BLAS<int, float>::AXPY(const int n, const float alpha, const float* x, const int incx, float* y, const int incy) const
00155   { SAXPY_F77(&n, &alpha, x, &incx, y, &incy); }
00156 
00157   void BLAS<int, float>::COPY(const int n, const float* x, const int incx, float* y, const int incy) const
00158   { SCOPY_F77(&n, x, &incx, y, &incy); }
00159 
00160   float BLAS<int, float>::DOT(const int n, const float* x, const int incx, const float* y, const int incy) const
00161   {
00162 #if defined(HAVE_TEUCHOS_BLASFLOAT_APPLE_VECLIB_BUGFIX)
00163     return cblas_sdot(n, x, incx, y, incy);
00164 #elif defined(HAVE_TEUCHOS_BLASFLOAT)
00165     return SDOT_F77(&n, x, &incx, y, &incy);
00166 #else
00167     return generic_dot(n, x, incx, y, incy);
00168 #endif
00169   }
00170 
00171   int BLAS<int, float>::IAMAX(const int n, const float* x, const int incx) const
00172   { return ISAMAX_F77(&n, x, &incx); }
00173 
00174   float BLAS<int, float>::NRM2(const int n, const float* x, const int incx) const
00175   {
00176 #if defined(HAVE_TEUCHOS_BLASFLOAT_APPLE_VECLIB_BUGFIX)
00177     return cblas_snrm2(n, x, incx);
00178 #elif defined(HAVE_TEUCHOS_BLASFLOAT)
00179     return SNRM2_F77(&n, x, &incx);
00180 #else
00181     return ScalarTraits<float>::squareroot(generic_dot(n, x, incx, x, incx));
00182 #endif
00183   }
00184 
00185   void BLAS<int, float>::SCAL(const int n, const float alpha, float* x, const int incx) const
00186   { SSCAL_F77(&n, &alpha, x, &incx); }
00187 
00188   void BLAS<int, float>::GEMV(ETransp trans, const int m, const int n, const float alpha, const float* A, const int lda, const float* x, const int incx, const float beta, float* y, const int incy) const
00189   { SGEMV_F77(CHAR_MACRO(ETranspChar[trans]), &m, &n, &alpha, A, &lda, x, &incx, &beta, y, &incy); }
00190 
00191   void BLAS<int, float>::GER(const int m, const int n, const float alpha, const float* x, const int incx, const float* y, const int incy, float* A, const int lda) const
00192   { SGER_F77(&m, &n, &alpha, x, &incx, y, &incy, A, &lda); }
00193 
00194   void BLAS<int, float>::TRMV(EUplo uplo, ETransp trans, EDiag diag, const int n, const float* A, const int lda, float* x, const int incx) const
00195   { STRMV_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), CHAR_MACRO(EDiagChar[diag]), &n, A, &lda, x, &incx); }
00196 
00197   void BLAS<int, float>::GEMM(ETransp transa, ETransp transb, const int m, const int n, const int k, const float alpha, const float* A, const int lda, const float* B, const int ldb, const float beta, float* C, const int ldc) const
00198   { SGEMM_F77(CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(ETranspChar[transb]), &m, &n, &k, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
00199 
00200   void BLAS<int, float>::SYMM(ESide side, EUplo uplo, const int m, const int n, const float alpha, const float* A, const int lda, const float* B, const int ldb, const float beta, float* C, const int ldc) const
00201   { SSYMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), &m, &n, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
00202 
00203   void BLAS<int, float>::SYRK(EUplo uplo, ETransp trans, const int n, const int k, const float alpha, const float* A, const int lda, const float beta, float* C, const int ldc) const
00204   { SSYRK_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), &n, &k, &alpha, A, &lda, &beta, C, &ldc); }
00205 
00206   void BLAS<int, float>::TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const float alpha, const float* A, const int lda, float* B, const int ldb) const
00207   { STRMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
00208 
00209   void BLAS<int, float>::TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const float alpha, const float* A, const int lda, float* B, const int ldb) const
00210   { STRSM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
00211 
00212   // *************************** BLAS<int,double> DEFINITIONS ******************************
00213 
00214   void BLAS<int, double>::ROTG(double* da, double* db, double* c, double* s) const
00215   { DROTG_F77(da, db, c, s); }
00216 
00217   void BLAS<int, double>::ROT(const int n, double* dx, const int incx, double* dy, const int incy, double* c, double* s) const
00218   { DROT_F77(&n, dx, &incx, dy, &incy, c, s); }
00219 
00220   double BLAS<int, double>::ASUM(const int n, const double* x, const int incx) const
00221   { return DASUM_F77(&n, x, &incx); }
00222 
00223   void BLAS<int, double>::AXPY(const int n, const double alpha, const double* x, const int incx, double* y, const int incy) const
00224   { DAXPY_F77(&n, &alpha, x, &incx, y, &incy); }
00225 
00226   void BLAS<int, double>::COPY(const int n, const double* x, const int incx, double* y, const int incy) const
00227   { DCOPY_F77(&n, x, &incx, y, &incy); }
00228 
00229   double BLAS<int, double>::DOT(const int n, const double* x, const int incx, const double* y, const int incy) const
00230   {
00231     return DDOT_F77(&n, x, &incx, y, &incy);
00232   }
00233 
00234   int BLAS<int, double>::IAMAX(const int n, const double* x, const int incx) const
00235   { return IDAMAX_F77(&n, x, &incx); }
00236 
00237   double BLAS<int, double>::NRM2(const int n, const double* x, const int incx) const
00238   { return DNRM2_F77(&n, x, &incx); }
00239 
00240   void BLAS<int, double>::SCAL(const int n, const double alpha, double* x, const int incx) const
00241   { DSCAL_F77(&n, &alpha, x, &incx); }
00242 
00243   void BLAS<int, double>::GEMV(ETransp trans, const int m, const int n, const double alpha, const double* A, const int lda, const double* x, const int incx, const double beta, double* y, const int incy) const
00244   { DGEMV_F77(CHAR_MACRO(ETranspChar[trans]), &m, &n, &alpha, A, &lda, x, &incx, &beta, y, &incy); }
00245 
00246   void BLAS<int, double>::GER(const int m, const int n, const double alpha, const double* x, const int incx, const double* y, const int incy, double* A, const int lda) const
00247   { DGER_F77(&m, &n, &alpha, x, &incx, y, &incy, A, &lda); }
00248 
00249   void BLAS<int, double>::TRMV(EUplo uplo, ETransp trans, EDiag diag, const int n, const double* A, const int lda, double* x, const int incx) const
00250   { DTRMV_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), CHAR_MACRO(EDiagChar[diag]), &n, A, &lda, x, &incx); }
00251 
00252   void BLAS<int, double>::GEMM(ETransp transa, ETransp transb, const int m, const int n, const int k, const double alpha, const double* A, const int lda, const double* B, const int ldb, const double beta, double* C, const int ldc) const
00253   { DGEMM_F77(CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(ETranspChar[transb]), &m, &n, &k, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
00254 
00255   void BLAS<int, double>::SYMM(ESide side, EUplo uplo, const int m, const int n, const double alpha, const double* A, const int lda, const double *B, const int ldb, const double beta, double *C, const int ldc) const
00256   { DSYMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), &m, &n, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
00257 
00258   void BLAS<int, double>::SYRK(EUplo uplo, ETransp trans, const int n, const int k, const double alpha, const double* A, const int lda, const double beta, double* C, const int ldc) const
00259   { DSYRK_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), &n, &k, &alpha, A, &lda, &beta, C, &ldc); }
00260 
00261   void BLAS<int, double>::TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const double alpha, const double* A, const int lda, double* B, const int ldb) const
00262   { DTRMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
00263 
00264   void BLAS<int, double>::TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const double alpha, const double* A, const int lda, double* B, const int ldb) const
00265   { DTRSM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
00266 
00267 #ifdef HAVE_TEUCHOS_COMPLEX
00268 
00269   // *************************** BLAS<int,std::complex<float> > DEFINITIONS ******************************
00270 
00271   void BLAS<int, std::complex<float> >::ROTG(std::complex<float>* da, std::complex<float>* db, float* c, std::complex<float>* s) const
00272   { CROTG_F77(da, db, c, s ); }
00273 
00274   void BLAS<int, std::complex<float> >::ROT(const int n, std::complex<float>* dx, const int incx, std::complex<float>* dy, const int incy, float* c, std::complex<float>* s) const
00275   { CROT_F77(&n, dx, &incx, dy, &incy, c, s); }
00276 
00277   float BLAS<int, std::complex<float> >::ASUM(const int n, const std::complex<float>* x, const int incx) const
00278   {
00279 #if defined(HAVE_TEUCHOS_BLASFLOAT_APPLE_VECLIB_BUGFIX)
00280     return cblas_scasum(n, x, incx);
00281 #elif defined(HAVE_TEUCHOS_BLASFLOAT_DOUBLE_RETURN)
00282     return (float) SCASUM_F77(&n, x, &incx);
00283 #elif defined(HAVE_TEUCHOS_BLASFLOAT)
00284     return SCASUM_F77(&n, x, &incx);
00285 #else // Wow, you just plain don't have this routine.
00286     // mfh 01 Feb 2013: See www.netlib.org/blas/scasum.f.
00287     // I've enhanced this by accumulating in double precision.
00288     double result = 0;
00289     if (incx == 1) {
00290       for (int i = 0; i < n; ++i) {
00291   result += std::abs (std::real (x[i])) + std::abs (std::imag (x[i]));
00292       }
00293     } else {
00294       const int nincx = n * incx;
00295       for (int i = 0; i < nincx; i += incx) {
00296   result += std::abs (std::real (x[i])) + std::abs (std::imag (x[i]));
00297       }
00298     }
00299     return static_cast<float> (result);
00300 #endif
00301   }
00302 
00303   void BLAS<int, std::complex<float> >::AXPY(const int n, const std::complex<float> alpha, const std::complex<float>* x, const int incx, std::complex<float>* y, const int incy) const
00304   { CAXPY_F77(&n, &alpha, x, &incx, y, &incy); }
00305 
00306   void BLAS<int, std::complex<float> >::COPY(const int n, const std::complex<float>* x, const int incx, std::complex<float>* y, const int incy) const
00307   { CCOPY_F77(&n, x, &incx, y, &incy); }
00308 
00309   std::complex<float> BLAS<int, std::complex<float> >::DOT(const int n, const std::complex<float>* x, const int incx, const std::complex<float>* y, const int incy) const
00310   {
00311 #if defined(HAVE_TEUCHOS_BLASFLOAT_APPLE_VECLIB_BUGFIX)
00312     std::complex<float> z;
00313     cblas_cdotc_sub(n,x,incx,y,incy,&z);
00314     return z;
00315 #elif defined(HAVE_COMPLEX_BLAS_PROBLEM) && defined(HAVE_FIXABLE_COMPLEX_BLAS_PROBLEM)
00316     std::complex<float> z;
00317     CDOT_F77(&z, &n, x, &incx, y, &incy);
00318     return z;
00319 #elif defined(HAVE_TEUCHOS_BLASFLOAT)
00320     return CDOT_F77(&n, x, &incx, y, &incy);
00321 #else // Wow, you just plain don't have this routine.
00322     // mfh 01 Feb 2013: See www.netlib.org/blas/cdotc.f.
00323     // I've enhanced this by accumulating in double precision.
00324     std::complex<double> result (0, 0);
00325     if (n >= 0) {
00326       if (incx == 1 && incy == 1) {
00327   for (int i = 0; i < n; ++i) {
00328     result += std::conj (x[i]) * y[i];
00329   }
00330       } else {
00331   int ix = 0;
00332   int iy = 0;
00333   if (incx < 0) {
00334     ix = (1-n) * incx;
00335   }
00336   if (incy < 0) {
00337     iy = (1-n) * incy;    
00338   }
00339   for (int i = 0; i < n; ++i) {
00340     result += std::conj (x[ix]) * y[iy];
00341     ix += incx;
00342     iy += incy;
00343   }
00344       }
00345     }
00346     return static_cast<std::complex<float> > (result);
00347 #endif
00348   }
00349 
00350   int BLAS<int, std::complex<float> >::IAMAX(const int n, const std::complex<float>* x, const int incx) const
00351   { return ICAMAX_F77(&n, x, &incx); }
00352 
00353   float BLAS<int, std::complex<float> >::NRM2(const int n, const std::complex<float>* x, const int incx) const
00354   {
00355 #if defined(HAVE_TEUCHOS_BLASFLOAT_APPLE_VECLIB_BUGFIX)
00356     return cblas_scnrm2(n, x, incx);
00357 #elif defined(HAVE_TEUCHOS_BLASFLOAT_DOUBLE_RETURN)
00358     return (float) SCNRM2_F77(&n, x, &incx);
00359 #elif defined(HAVE_TEUCHOS_BLASFLOAT)
00360     return SCNRM2_F77(&n, x, &incx);
00361 #else // Wow, you just plain don't have this routine.
00362     // mfh 01 Feb 2013: See www.netlib.org/blas/scnrm2.f.
00363     // I've enhanced this by accumulating in double precision.
00364     if (n < 1 || incx < 1) {
00365       return 0;
00366     } else {
00367       double scale = 0;
00368       double ssq = 1;
00369 
00370       const int upper = 1 + (n-1)*incx;
00371       for (int ix = 0; ix < upper; ix += incx) {
00372   // The reference BLAS implementation cleverly scales the
00373   // intermediate result. so that even if the square of the norm
00374   // would overflow, computing the norm itself does not.  Hence,
00375   // "ssq" for "scaled square root."
00376   if (std::real (x[ix]) != 0) {
00377     const double temp = std::abs (std::real (x[ix]));
00378     if (scale < temp) {
00379       const double scale_over_temp = scale / temp;
00380       ssq = 1 + ssq * scale_over_temp*scale_over_temp;
00381       // New scaling factor: biggest (in magnitude) real or imaginary part seen thus far.
00382       scale = temp;
00383     } else {
00384       const double temp_over_scale = temp / scale;
00385       ssq = ssq + temp_over_scale*temp_over_scale;
00386     }
00387   }
00388   if (std::imag (x[ix]) != 0) {
00389     const double temp = std::abs (std::imag (x[ix]));
00390     if (scale < temp) {
00391       const double scale_over_temp = scale / temp;
00392       ssq = 1 + ssq * scale_over_temp*scale_over_temp;
00393       // New scaling factor: biggest (in magnitude) real or imaginary part seen thus far.
00394       scale = temp;
00395     } else {
00396       const double temp_over_scale = temp / scale;
00397       ssq = ssq + temp_over_scale*temp_over_scale;
00398     }
00399   }
00400       }
00401       return static_cast<float> (scale * std::sqrt (ssq));
00402     }
00403 #endif
00404   }
00405 
00406   void BLAS<int, std::complex<float> >::SCAL(const int n, const std::complex<float> alpha, std::complex<float>* x, const int incx) const
00407   { CSCAL_F77(&n, &alpha, x, &incx); }
00408 
00409   void BLAS<int, std::complex<float> >::GEMV(ETransp trans, const int m, const int n, const std::complex<float> alpha, const std::complex<float>* A, const int lda, const std::complex<float>* x, const int incx, const std::complex<float> beta, std::complex<float>* y, const int incy) const
00410   { CGEMV_F77(CHAR_MACRO(ETranspChar[trans]), &m, &n, &alpha, A, &lda, x, &incx, &beta, y, &incy); }
00411 
00412   void BLAS<int, std::complex<float> >::GER(const int m, const int n, const std::complex<float> alpha, const std::complex<float>* x, const int incx, const std::complex<float>* y, const int incy, std::complex<float>* A, const int lda) const
00413   { CGER_F77(&m, &n, &alpha, x, &incx, y, &incy, A, &lda); }
00414 
00415   void BLAS<int, std::complex<float> >::TRMV(EUplo uplo, ETransp trans, EDiag diag, const int n, const std::complex<float>* A, const int lda, std::complex<float>* x, const int incx) const
00416   { CTRMV_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), CHAR_MACRO(EDiagChar[diag]), &n, A, &lda, x, &incx); }
00417 
00418   void BLAS<int, std::complex<float> >::GEMM(ETransp transa, ETransp transb, const int m, const int n, const int k, const std::complex<float> alpha, const std::complex<float>* A, const int lda, const std::complex<float>* B, const int ldb, const std::complex<float> beta, std::complex<float>* C, const int ldc) const
00419   { CGEMM_F77(CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(ETranspChar[transb]), &m, &n, &k, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
00420 
00421   void BLAS<int, std::complex<float> >::SYMM(ESide side, EUplo uplo, const int m, const int n, const std::complex<float> alpha, const std::complex<float>* A, const int lda, const std::complex<float>* B, const int ldb, const std::complex<float> beta, std::complex<float>* C, const int ldc) const
00422   { CSYMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), &m, &n, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
00423 
00424   void BLAS<int, std::complex<float> >::SYRK(EUplo uplo, ETransp trans, const int n, const int k, const std::complex<float> alpha, const std::complex<float>* A, const int lda, const std::complex<float> beta, std::complex<float>* C, const int ldc) const
00425   { CSYRK_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), &n, &k, &alpha, A, &lda, &beta, C, &ldc); }
00426 
00427   void BLAS<int, std::complex<float> >::TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const std::complex<float> alpha, const std::complex<float>* A, const int lda, std::complex<float>* B, const int ldb) const
00428   { CTRMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
00429 
00430   void BLAS<int, std::complex<float> >::TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const std::complex<float> alpha, const std::complex<float>* A, const int lda, std::complex<float>* B, const int ldb) const
00431   { CTRSM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
00432 
00433   // *************************** BLAS<int,std::complex<double> > DEFINITIONS ******************************
00434 
00435   void BLAS<int, std::complex<double> >::ROTG(std::complex<double>* da, std::complex<double>* db, double* c, std::complex<double>* s) const
00436   { ZROTG_F77(da, db, c, s); }
00437 
00438   void BLAS<int, std::complex<double> >::ROT(const int n, std::complex<double>* dx, const int incx, std::complex<double>* dy, const int incy, double* c, std::complex<double>* s) const
00439   { ZROT_F77(&n, dx, &incx, dy, &incy, c, s); }
00440 
00441   double BLAS<int, std::complex<double> >::ASUM(const int n, const std::complex<double>* x, const int incx) const
00442   { return ZASUM_F77(&n, x, &incx); }
00443 
00444   void BLAS<int, std::complex<double> >::AXPY(const int n, const std::complex<double> alpha, const std::complex<double>* x, const int incx, std::complex<double>* y, const int incy) const
00445   { ZAXPY_F77(&n, &alpha, x, &incx, y, &incy); }
00446 
00447   void BLAS<int, std::complex<double> >::COPY(const int n, const std::complex<double>* x, const int incx, std::complex<double>* y, const int incy) const
00448   { ZCOPY_F77(&n, x, &incx, y, &incy); }
00449 
00450   std::complex<double> BLAS<int, std::complex<double> >::DOT(const int n, const std::complex<double>* x, const int incx, const std::complex<double>* y, const int incy) const
00451   {
00452 #if defined(HAVE_TEUCHOS_BLASFLOAT_APPLE_VECLIB_BUGFIX)
00453     std::complex<double> z;
00454     cblas_zdotc_sub(n,x,incx,y,incy,&z);
00455     return z;
00456 #elif defined(HAVE_COMPLEX_BLAS_PROBLEM)
00457 #  if defined(HAVE_FIXABLE_COMPLEX_BLAS_PROBLEM)
00458     std::complex<double> z;
00459     ZDOT_F77(&z, &n, x, &incx, y, &incy);
00460     return z;
00461 #  else 
00462     // mfh 01 Feb 2013: Your complex BLAS is broken, but the problem
00463     // doesn't have the easy workaround.  I'll just reimplement the
00464     // missing routine here.  See www.netlib.org/blas/zdotc.f.
00465     std::complex<double> ztemp (0, 0);
00466     if (n > 0) {
00467       if (incx == 1 && incy == 1) {
00468   for (int i = 0; i < n; ++i) {
00469     ztemp += std::conj (x[i]) * y[i];
00470   }
00471       } else {
00472   int ix = 0;
00473   int iy = 0;
00474   if (incx < 0) {
00475     ix = (1-n)*incx;
00476   }
00477   if (incy < 0) {
00478     iy = (1-n)*incy;
00479   }
00480   for (int i = 0; i < n; ++i) {
00481     ztemp += std::conj (x[ix]) * y[iy];
00482     ix += incx;
00483     iy += incy;
00484   }
00485       }
00486     }
00487     return ztemp;
00488 
00489 #  endif // defined(HAVE_FIXABLE_COMPLEX_BLAS_PROBLEM)
00490 #else
00491     return ZDOT_F77(&n, x, &incx, y, &incy);
00492 #endif
00493   }
00494 
00495   int BLAS<int, std::complex<double> >::IAMAX(const int n, const std::complex<double>* x, const int incx) const
00496   { return IZAMAX_F77(&n, x, &incx); }
00497 
00498   double BLAS<int, std::complex<double> >::NRM2(const int n, const std::complex<double>* x, const int incx) const
00499   { return ZNRM2_F77(&n, x, &incx); }
00500 
00501   void BLAS<int, std::complex<double> >::SCAL(const int n, const std::complex<double> alpha, std::complex<double>* x, const int incx) const
00502   { ZSCAL_F77(&n, &alpha, x, &incx); }
00503 
00504   void BLAS<int, std::complex<double> >::GEMV(ETransp trans, const int m, const int n, const std::complex<double> alpha, const std::complex<double>* A, const int lda, const std::complex<double>* x, const int incx, const std::complex<double> beta, std::complex<double>* y, const int incy) const
00505   { ZGEMV_F77(CHAR_MACRO(ETranspChar[trans]), &m, &n, &alpha, A, &lda, x, &incx, &beta, y, &incy); }
00506 
00507   void BLAS<int, std::complex<double> >::GER(const int m, const int n, const std::complex<double> alpha, const std::complex<double>* x, const int incx, const std::complex<double>* y, const int incy, std::complex<double>* A, const int lda) const
00508   { ZGER_F77(&m, &n, &alpha, x, &incx, y, &incy, A, &lda); }
00509 
00510   void BLAS<int, std::complex<double> >::TRMV(EUplo uplo, ETransp trans, EDiag diag, const int n, const std::complex<double>* A, const int lda, std::complex<double>* x, const int incx) const
00511   { ZTRMV_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), CHAR_MACRO(EDiagChar[diag]), &n, A, &lda, x, &incx); }
00512 
00513   void BLAS<int, std::complex<double> >::GEMM(ETransp transa, ETransp transb, const int m, const int n, const int k, const std::complex<double> alpha, const std::complex<double>* A, const int lda, const std::complex<double>* B, const int ldb, const std::complex<double> beta, std::complex<double>* C, const int ldc) const
00514   { ZGEMM_F77(CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(ETranspChar[transb]), &m, &n, &k, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
00515 
00516   void BLAS<int, std::complex<double> >::SYMM(ESide side, EUplo uplo, const int m, const int n, const std::complex<double> alpha, const std::complex<double>* A, const int lda, const std::complex<double> *B, const int ldb, const std::complex<double> beta, std::complex<double> *C, const int ldc) const
00517   { ZSYMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), &m, &n, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
00518 
00519   void BLAS<int, std::complex<double> >::SYRK(EUplo uplo, ETransp trans, const int n, const int k, const std::complex<double> alpha, const std::complex<double>* A, const int lda, const std::complex<double> beta, std::complex<double>* C, const int ldc) const
00520   { ZSYRK_F77(CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[trans]), &n, &k, &alpha, A, &lda, &beta, C, &ldc); }
00521 
00522   void BLAS<int, std::complex<double> >::TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const std::complex<double> alpha, const std::complex<double>* A, const int lda, std::complex<double>* B, const int ldb) const
00523   { ZTRMM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
00524 
00525   void BLAS<int, std::complex<double> >::TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const std::complex<double> alpha, const std::complex<double>* A, const int lda, std::complex<double>* B, const int ldb) const
00526   { ZTRSM_F77(CHAR_MACRO(ESideChar[side]), CHAR_MACRO(EUploChar[uplo]), CHAR_MACRO(ETranspChar[transa]), CHAR_MACRO(EDiagChar[diag]), &m, &n, &alpha, A, &lda, B, &ldb); }
00527 
00528 #endif // HAVE_TEUCHOS_COMPLEX
00529 
00530 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines