00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046 #ifndef _TEUCHOS_BLAS_HPP_
00047 #define _TEUCHOS_BLAS_HPP_
00048
00056
00057
00058
00059
00060 #if defined (INTEL_CXML)
00061 unsigned int one=1;
00062 #endif
00063
00064 #ifdef CHAR_MACRO
00065 #undef CHAR_MACRO
00066 #endif
00067 #if defined (INTEL_CXML)
00068 #define CHAR_MACRO(char_var) &char_var, one
00069 #else
00070 #define CHAR_MACRO(char_var) &char_var
00071 #endif
00072
00073 #include "Teuchos_ConfigDefs.hpp"
00074 #include "Teuchos_ScalarTraits.hpp"
00075 #include "Teuchos_OrdinalTraits.hpp"
00076 #include "Teuchos_BLAS_types.hpp"
00077 #include "Teuchos_TestForException.hpp"
00078
00112 namespace Teuchos
00113 {
00114 extern TEUCHOS_LIB_DLL_EXPORT const char ESideChar[];
00115 extern TEUCHOS_LIB_DLL_EXPORT const char ETranspChar[];
00116 extern TEUCHOS_LIB_DLL_EXPORT const char EUploChar[];
00117 extern TEUCHOS_LIB_DLL_EXPORT const char EDiagChar[];
00118
00120
00125 template<typename OrdinalType, typename ScalarType>
00126 class DefaultBLASImpl
00127 {
00128
00129 typedef typename Teuchos::ScalarTraits<ScalarType>::magnitudeType MagnitudeType;
00130
00131 public:
00133
00134
00136 inline DefaultBLASImpl(void) {}
00137
00139 inline DefaultBLASImpl(const DefaultBLASImpl<OrdinalType, ScalarType>& ) {}
00140
00142 inline virtual ~DefaultBLASImpl(void) {}
00144
00146
00147
00149 void ROTG(ScalarType* da, ScalarType* db, MagnitudeType* c, ScalarType* s) const;
00150
00152 void ROT(const OrdinalType n, ScalarType* dx, const OrdinalType incx, ScalarType* dy, const OrdinalType incy, MagnitudeType* c, ScalarType* s) const;
00153
00155 void SCAL(const OrdinalType n, const ScalarType alpha, ScalarType* x, const OrdinalType incx) const;
00156
00158 void COPY(const OrdinalType n, const ScalarType* x, const OrdinalType incx, ScalarType* y, const OrdinalType incy) const;
00159
00161 template <typename alpha_type, typename x_type>
00162 void AXPY(const OrdinalType n, const alpha_type alpha, const x_type* x, const OrdinalType incx, ScalarType* y, const OrdinalType incy) const;
00163
00165 typename ScalarTraits<ScalarType>::magnitudeType ASUM(const OrdinalType n, const ScalarType* x, const OrdinalType incx) const;
00166
00168 template <typename x_type, typename y_type>
00169 ScalarType DOT(const OrdinalType n, const x_type* x, const OrdinalType incx, const y_type* y, const OrdinalType incy) const;
00170
00172 typename ScalarTraits<ScalarType>::magnitudeType NRM2(const OrdinalType n, const ScalarType* x, const OrdinalType incx) const;
00173
00175 OrdinalType IAMAX(const OrdinalType n, const ScalarType* x, const OrdinalType incx) const;
00176
00178
00180
00181
00183 template <typename alpha_type, typename A_type, typename x_type, typename beta_type>
00184 void GEMV(ETransp trans, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type* A,
00185 const OrdinalType lda, const x_type* x, const OrdinalType incx, const beta_type beta, ScalarType* y, const OrdinalType incy) const;
00186
00188 template <typename A_type>
00189 void TRMV(EUplo uplo, ETransp trans, EDiag diag, const OrdinalType n, const A_type* A,
00190 const OrdinalType lda, ScalarType* x, const OrdinalType incx) const;
00191
00193 template <typename alpha_type, typename x_type, typename y_type>
00194 void GER(const OrdinalType m, const OrdinalType n, const alpha_type alpha, const x_type* x, const OrdinalType incx,
00195 const y_type* y, const OrdinalType incy, ScalarType* A, const OrdinalType lda) const;
00197
00199
00200
00202 template <typename alpha_type, typename A_type, typename B_type, typename beta_type>
00203 void GEMM(ETransp transa, ETransp transb, const OrdinalType m, const OrdinalType n, const OrdinalType k, const alpha_type alpha, const A_type* A, const OrdinalType lda, const B_type* B, const OrdinalType ldb, const beta_type beta, ScalarType* C, const OrdinalType ldc) const;
00204
00206 template <typename alpha_type, typename A_type, typename B_type, typename beta_type>
00207 void SYMM(ESide side, EUplo uplo, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type* A, const OrdinalType lda, const B_type* B, const OrdinalType ldb, const beta_type beta, ScalarType* C, const OrdinalType ldc) const;
00208
00210 template <typename alpha_type, typename A_type>
00211 void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n,
00212 const alpha_type alpha, const A_type* A, const OrdinalType lda, ScalarType* B, const OrdinalType ldb) const;
00213
00215 template <typename alpha_type, typename A_type>
00216 void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n,
00217 const alpha_type alpha, const A_type* A, const OrdinalType lda, ScalarType* B, const OrdinalType ldb) const;
00219 };
00220
00221 template<typename OrdinalType, typename ScalarType>
00222 class TEUCHOS_LIB_DLL_EXPORT BLAS : public DefaultBLASImpl<OrdinalType,ScalarType>
00223 {
00224
00225 typedef typename Teuchos::ScalarTraits<ScalarType>::magnitudeType MagnitudeType;
00226
00227 public:
00229
00230
00232 inline BLAS(void) {}
00233
00235 inline BLAS(const BLAS<OrdinalType, ScalarType>& ) {}
00236
00238 inline virtual ~BLAS(void) {}
00240 };
00241
00242
00243
00244
00245
00246 template<typename OrdinalType, typename ScalarType>
00247 void DefaultBLASImpl<OrdinalType, ScalarType>::ROTG(ScalarType* da, ScalarType* db, MagnitudeType* c, ScalarType* s) const
00248 {
00249 ScalarType roe, alpha;
00250 ScalarType zero = ScalarTraits<ScalarType>::zero();
00251 ScalarType one = ScalarTraits<ScalarType>::one();
00252 MagnitudeType scale, norm;
00253 MagnitudeType m_one = ScalarTraits<MagnitudeType>::one();
00254 MagnitudeType m_zero = ScalarTraits<MagnitudeType>::zero();
00255
00256 roe = *db;
00257 if ( ScalarTraits<ScalarType>::magnitude( *da ) > ScalarTraits<ScalarType>::magnitude( *db ) ) { roe = *da; }
00258 scale = ScalarTraits<ScalarType>::magnitude( *da ) + ScalarTraits<ScalarType>::magnitude( *db );
00259 if ( scale == m_zero )
00260 {
00261 *c = m_one;
00262 *s = zero;
00263 *da = zero; *db = zero;
00264 }
00265 else if ( *da == zero )
00266 {
00267 *c = m_zero;
00268 *s = one;
00269 *da = *db; *db = zero;
00270 }
00271 else
00272 {
00273 norm = scale*ScalarTraits<ScalarType>::magnitude(ScalarTraits<ScalarType>::squareroot( ( *da/scale)*(*da/scale) + (*db/scale)*(*db/scale) ) );
00274 alpha = roe / ScalarTraits<ScalarType>::magnitude(roe);
00275 *c = ScalarTraits<ScalarType>::magnitude(*da) / norm;
00276 *s = alpha * ScalarTraits<ScalarType>::conjugate(*db) / norm;
00277 *db = one;
00278 if( ScalarTraits<ScalarType>::magnitude( *da ) > ScalarTraits<ScalarType>::magnitude( *db ) ){ *db = *s; }
00279 if( ScalarTraits<ScalarType>::magnitude( *db ) >= ScalarTraits<ScalarType>::magnitude( *da ) &&
00280 *c != ScalarTraits<MagnitudeType>::zero() ) { *db = one / *c; }
00281 *da = norm * alpha;
00282 }
00283 }
00284
00285 template<typename OrdinalType, typename ScalarType>
00286 void DefaultBLASImpl<OrdinalType,ScalarType>::ROT(const OrdinalType n, ScalarType* dx, const OrdinalType incx, ScalarType* dy, const OrdinalType incy, MagnitudeType* c, ScalarType* s) const
00287 {
00288 ScalarType sconj = Teuchos::ScalarTraits<ScalarType>::conjugate(*s);
00289 if (n <= 0) return;
00290 if (incx==1 && incy==1) {
00291 for(OrdinalType i=0; i<n; ++i) {
00292 ScalarType temp = *c*dx[i] + sconj*dy[i];
00293 dy[i] = *c*dy[i] - sconj*dx[i];
00294 dx[i] = temp;
00295 }
00296 }
00297 else {
00298 OrdinalType ix = 0, iy = 0;
00299 if (incx < 0) ix = (-n+1)*incx;
00300 if (incy < 0) iy = (-n+1)*incy;
00301 for(OrdinalType i=0; i<n; ++i) {
00302 ScalarType temp = *c*dx[ix] + sconj*dy[iy];
00303 dy[iy] = *c*dy[iy] - sconj*dx[ix];
00304 dx[ix] = temp;
00305 ix += incx;
00306 iy += incy;
00307 }
00308 }
00309 }
00310
00311 template<typename OrdinalType, typename ScalarType>
00312 void DefaultBLASImpl<OrdinalType, ScalarType>::SCAL(const OrdinalType n, const ScalarType alpha, ScalarType* x, const OrdinalType incx) const
00313 {
00314 OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00315 OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00316 OrdinalType i, ix = izero;
00317 if ( n > izero ) {
00318
00319 if (incx < izero) { ix = (-n+ione)*incx; }
00320
00321 for(i = izero; i < n; i++)
00322 {
00323 x[ix] *= alpha;
00324 ix += incx;
00325 }
00326 }
00327 }
00328
00329 template<typename OrdinalType, typename ScalarType>
00330 void DefaultBLASImpl<OrdinalType, ScalarType>::COPY(const OrdinalType n, const ScalarType* x, const OrdinalType incx, ScalarType* y, const OrdinalType incy) const
00331 {
00332 OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00333 OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00334 OrdinalType i, ix = izero, iy = izero;
00335 if ( n > izero ) {
00336
00337 if (incx < izero) { ix = (-n+ione)*incx; }
00338 if (incy < izero) { iy = (-n+ione)*incy; }
00339
00340 for(i = izero; i < n; i++)
00341 {
00342 y[iy] = x[ix];
00343 ix += incx;
00344 iy += incy;
00345 }
00346 }
00347 }
00348
00349 template<typename OrdinalType, typename ScalarType>
00350 template <typename alpha_type, typename x_type>
00351 void DefaultBLASImpl<OrdinalType, ScalarType>::AXPY(const OrdinalType n, const alpha_type alpha, const x_type* x, const OrdinalType incx, ScalarType* y, const OrdinalType incy) const
00352 {
00353 OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00354 OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00355 OrdinalType i, ix = izero, iy = izero;
00356 if( n > izero && alpha != ScalarTraits<alpha_type>::zero())
00357 {
00358
00359 if (incx < izero) { ix = (-n+ione)*incx; }
00360 if (incy < izero) { iy = (-n+ione)*incy; }
00361
00362 for(i = izero; i < n; i++)
00363 {
00364 y[iy] += alpha * x[ix];
00365 ix += incx;
00366 iy += incy;
00367 }
00368 }
00369 }
00370
00371 template<typename OrdinalType, typename ScalarType>
00372 typename ScalarTraits<ScalarType>::magnitudeType DefaultBLASImpl<OrdinalType, ScalarType>::ASUM(const OrdinalType n, const ScalarType* x, const OrdinalType incx) const
00373 {
00374 OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00375 OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00376 typename ScalarTraits<ScalarType>::magnitudeType result =
00377 ScalarTraits<typename ScalarTraits<ScalarType>::magnitudeType>::zero();
00378 OrdinalType i, ix = izero;
00379 if( n > izero ) {
00380
00381 if (incx < izero) { ix = (-n+ione)*incx; }
00382
00383 for(i = izero; i < n; i++)
00384 {
00385 result += ScalarTraits<ScalarType>::magnitude(x[ix]);
00386 ix += incx;
00387 }
00388 }
00389 return result;
00390 }
00391
00392 template<typename OrdinalType, typename ScalarType>
00393 template <typename x_type, typename y_type>
00394 ScalarType DefaultBLASImpl<OrdinalType, ScalarType>::DOT(const OrdinalType n, const x_type* x, const OrdinalType incx, const y_type* y, const OrdinalType incy) const
00395 {
00396 OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00397 OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00398 ScalarType result = ScalarTraits<ScalarType>::zero();
00399 OrdinalType i, ix = izero, iy = izero;
00400 if( n > izero )
00401 {
00402
00403 if (incx < izero) { ix = (-n+ione)*incx; }
00404 if (incy < izero) { iy = (-n+ione)*incy; }
00405
00406 for(i = izero; i < n; i++)
00407 {
00408 result += ScalarTraits<x_type>::conjugate(x[ix]) * y[iy];
00409 ix += incx;
00410 iy += incy;
00411 }
00412 }
00413 return result;
00414 }
00415
00416 template<typename OrdinalType, typename ScalarType>
00417 typename ScalarTraits<ScalarType>::magnitudeType DefaultBLASImpl<OrdinalType, ScalarType>::NRM2(const OrdinalType n, const ScalarType* x, const OrdinalType incx) const
00418 {
00419 OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00420 OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00421 typename ScalarTraits<ScalarType>::magnitudeType result =
00422 ScalarTraits<typename ScalarTraits<ScalarType>::magnitudeType>::zero();
00423 OrdinalType i, ix = izero;
00424 if ( n > izero )
00425 {
00426
00427 if (incx < izero) { ix = (-n+ione)*incx; }
00428
00429 for(i = izero; i < n; i++)
00430 {
00431 result += ScalarTraits<ScalarType>::magnitude(ScalarTraits<ScalarType>::conjugate(x[ix]) * x[ix]);
00432 ix += incx;
00433 }
00434 result = ScalarTraits<typename ScalarTraits<ScalarType>::magnitudeType>::squareroot(result);
00435 }
00436 return result;
00437 }
00438
00439 template<typename OrdinalType, typename ScalarType>
00440 OrdinalType DefaultBLASImpl<OrdinalType, ScalarType>::IAMAX(const OrdinalType n, const ScalarType* x, const OrdinalType incx) const
00441 {
00442 OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00443 OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00444 OrdinalType result = izero, ix = izero, i;
00445 typename ScalarTraits<ScalarType>::magnitudeType maxval =
00446 ScalarTraits<typename ScalarTraits<ScalarType>::magnitudeType>::zero();
00447
00448 if ( n > izero )
00449 {
00450 if (incx < izero) { ix = (-n+ione)*incx; }
00451 maxval = ScalarTraits<ScalarType>::magnitude(x[ix]);
00452 ix += incx;
00453 for(i = ione; i < n; i++)
00454 {
00455 if(ScalarTraits<ScalarType>::magnitude(x[ix]) > maxval)
00456 {
00457 result = i;
00458 maxval = ScalarTraits<ScalarType>::magnitude(x[ix]);
00459 }
00460 ix += incx;
00461 }
00462 }
00463 return result + 1;
00464 }
00465
00466
00467
00468
00469 template<typename OrdinalType, typename ScalarType>
00470 template <typename alpha_type, typename A_type, typename x_type, typename beta_type>
00471 void DefaultBLASImpl<OrdinalType, ScalarType>::GEMV(ETransp trans, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type* A, const OrdinalType lda, const x_type* x, const OrdinalType incx, const beta_type beta, ScalarType* y, const OrdinalType incy) const
00472 {
00473 OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00474 OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00475 alpha_type alpha_zero = ScalarTraits<alpha_type>::zero();
00476 beta_type beta_zero = ScalarTraits<beta_type>::zero();
00477 x_type x_zero = ScalarTraits<x_type>::zero();
00478 ScalarType y_zero = ScalarTraits<ScalarType>::zero();
00479 beta_type beta_one = ScalarTraits<beta_type>::one();
00480 bool BadArgument = false;
00481
00482 TEST_FOR_EXCEPTION(Teuchos::ScalarTraits<ScalarType>::isComplex && trans == CONJ_TRANS, std::logic_error,
00483 "Teuchos::BLAS::GEMV() does not currently support CONJ_TRANS for complex data types.");
00484
00485
00486 if( m == izero || n == izero || ( alpha == alpha_zero && beta == beta_one ) ){ return; }
00487
00488
00489 if( m < izero ) {
00490 std::cout << "BLAS::GEMV Error: M == " << m << std::endl;
00491 BadArgument = true;
00492 }
00493 if( n < izero ) {
00494 std::cout << "BLAS::GEMV Error: N == " << n << std::endl;
00495 BadArgument = true;
00496 }
00497 if( lda < m ) {
00498 std::cout << "BLAS::GEMV Error: LDA < MAX(1,M)"<< std::endl;
00499 BadArgument = true;
00500 }
00501 if( incx == izero ) {
00502 std::cout << "BLAS::GEMV Error: INCX == 0"<< std::endl;
00503 BadArgument = true;
00504 }
00505 if( incy == izero ) {
00506 std::cout << "BLAS::GEMV Error: INCY == 0"<< std::endl;
00507 BadArgument = true;
00508 }
00509
00510 if(!BadArgument) {
00511 OrdinalType i, j, lenx, leny, ix, iy, jx, jy;
00512 OrdinalType kx = izero, ky = izero;
00513 ScalarType temp;
00514
00515
00516 if(ETranspChar[trans] == 'N') {
00517 lenx = n;
00518 leny = m;
00519 } else {
00520 lenx = m;
00521 leny = n;
00522 }
00523
00524
00525 if (incx < izero ) { kx = (ione - lenx)*incx; }
00526 if (incy < izero ) { ky = (ione - leny)*incy; }
00527
00528
00529 ix = kx; iy = ky;
00530 if(beta != beta_one) {
00531 if (incy == ione) {
00532 if (beta == beta_zero) {
00533 for(i = izero; i < leny; i++) { y[i] = y_zero; }
00534 } else {
00535 for(i = izero; i < leny; i++) { y[i] *= beta; }
00536 }
00537 } else {
00538 if (beta == beta_zero) {
00539 for(i = izero; i < leny; i++) {
00540 y[iy] = y_zero;
00541 iy += incy;
00542 }
00543 } else {
00544 for(i = izero; i < leny; i++) {
00545 y[iy] *= beta;
00546 iy += incy;
00547 }
00548 }
00549 }
00550 }
00551
00552
00553 if(alpha == alpha_zero) { return; }
00554
00555 if( ETranspChar[trans] == 'N' ) {
00556
00557 jx = kx;
00558 if (incy == ione) {
00559 for(j = izero; j < n; j++) {
00560 if (x[jx] != x_zero) {
00561 temp = alpha*x[jx];
00562 for(i = izero; i < m; i++) {
00563 y[i] += temp*A[j*lda + i];
00564 }
00565 }
00566 jx += incx;
00567 }
00568 } else {
00569 for(j = izero; j < n; j++) {
00570 if (x[jx] != x_zero) {
00571 temp = alpha*x[jx];
00572 iy = ky;
00573 for(i = izero; i < m; i++) {
00574 y[iy] += temp*A[j*lda + i];
00575 iy += incy;
00576 }
00577 }
00578 jx += incx;
00579 }
00580 }
00581 } else {
00582 jy = ky;
00583 if (incx == ione) {
00584 for(j = izero; j < n; j++) {
00585 temp = y_zero;
00586 for(i = izero; i < m; i++) {
00587 temp += A[j*lda + i]*x[i];
00588 }
00589 y[jy] += alpha*temp;
00590 jy += incy;
00591 }
00592 } else {
00593 for(j = izero; j < n; j++) {
00594 temp = y_zero;
00595 ix = kx;
00596 for (i = izero; i < m; i++) {
00597 temp += A[j*lda + i]*x[ix];
00598 ix += incx;
00599 }
00600 y[jy] += alpha*temp;
00601 jy += incy;
00602 }
00603 }
00604 }
00605 }
00606 }
00607
00608 template<typename OrdinalType, typename ScalarType>
00609 template <typename A_type>
00610 void DefaultBLASImpl<OrdinalType, ScalarType>::TRMV(EUplo uplo, ETransp trans, EDiag diag, const OrdinalType n, const A_type* A, const OrdinalType lda, ScalarType* x, const OrdinalType incx) const
00611 {
00612 OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00613 OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00614 ScalarType zero = ScalarTraits<ScalarType>::zero();
00615 bool BadArgument = false;
00616
00617 TEST_FOR_EXCEPTION(Teuchos::ScalarTraits<ScalarType>::isComplex && trans == CONJ_TRANS, std::logic_error,
00618 "Teuchos::BLAS::TRMV() does not currently support CONJ_TRANS for complex data types.");
00619
00620
00621 if( n == izero ){ return; }
00622
00623
00624 if( n < izero ) {
00625 std::cout << "BLAS::TRMV Error: N == " << n << std::endl;
00626 BadArgument = true;
00627 }
00628 if( lda < n ) {
00629 std::cout << "BLAS::TRMV Error: LDA < MAX(1,N)"<< std::endl;
00630 BadArgument = true;
00631 }
00632 if( incx == izero ) {
00633 std::cout << "BLAS::TRMV Error: INCX == 0"<< std::endl;
00634 BadArgument = true;
00635 }
00636
00637 if(!BadArgument) {
00638 OrdinalType i, j, ix, jx, kx = izero;
00639 ScalarType temp;
00640 bool NoUnit = (EDiagChar[diag] == 'N');
00641
00642
00643 if (incx < izero) { kx = (-n+ione)*incx; }
00644
00645
00646 if (ETranspChar[trans] == 'N') {
00647
00648 if (EUploChar[uplo] == 'U') {
00649
00650 if (incx == ione) {
00651 for (j=izero; j<n; j++) {
00652 if (x[j] != zero) {
00653 temp = x[j];
00654 for (i=izero; i<j; i++) {
00655 x[i] += temp*A[j*lda + i];
00656 }
00657 if (NoUnit)
00658 x[j] *= A[j*lda + j];
00659 }
00660 }
00661 } else {
00662 jx = kx;
00663 for (j=izero; j<n; j++) {
00664 if (x[jx] != zero) {
00665 temp = x[jx];
00666 ix = kx;
00667 for (i=izero; i<j; i++) {
00668 x[ix] += temp*A[j*lda + i];
00669 ix += incx;
00670 }
00671 if (NoUnit)
00672 x[jx] *= A[j*lda + j];
00673 }
00674 jx += incx;
00675 }
00676 }
00677 } else {
00678 if (incx == ione) {
00679 for (j=n-ione; j>-ione; j--) {
00680 if (x[j] != zero) {
00681 temp = x[j];
00682 for (i=n-ione; i>j; i--) {
00683 x[i] += temp*A[j*lda + i];
00684 }
00685 if (NoUnit)
00686 x[j] *= A[j*lda + j];
00687 }
00688 }
00689 } else {
00690 kx += (n-ione)*incx;
00691 jx = kx;
00692 for (j=n-ione; j>-ione; j--) {
00693 if (x[jx] != zero) {
00694 temp = x[jx];
00695 ix = kx;
00696 for (i=n-ione; i>j; i--) {
00697 x[ix] += temp*A[j*lda + i];
00698 ix -= incx;
00699 }
00700 if (NoUnit)
00701 x[jx] *= A[j*lda + j];
00702 }
00703 jx -= incx;
00704 }
00705 }
00706 }
00707 } else {
00708
00709 if (EUploChar[uplo]=='U') {
00710
00711 if (incx == ione) {
00712 for (j=n-ione; j>-ione; j--) {
00713 temp = x[j];
00714 if (NoUnit)
00715 temp *= A[j*lda + j];
00716 for (i=j-ione; i>-ione; i--) {
00717 temp += A[j*lda + i]*x[i];
00718 }
00719 x[j] = temp;
00720 }
00721 } else {
00722 jx = kx + (n-ione)*incx;
00723 for (j=n-ione; j>-ione; j--) {
00724 temp = x[jx];
00725 ix = jx;
00726 if (NoUnit)
00727 temp *= A[j*lda + j];
00728 for (i=j-ione; i>-ione; i--) {
00729 ix -= incx;
00730 temp += A[j*lda + i]*x[ix];
00731 }
00732 x[jx] = temp;
00733 jx -= incx;
00734 }
00735 }
00736 } else {
00737
00738 if (incx == ione) {
00739 for (j=izero; j<n; j++) {
00740 temp = x[j];
00741 if (NoUnit)
00742 temp *= A[j*lda + j];
00743 for (i=j+ione; i<n; i++) {
00744 temp += A[j*lda + i]*x[i];
00745 }
00746 x[j] = temp;
00747 }
00748 } else {
00749 jx = kx;
00750 for (j=izero; j<n; j++) {
00751 temp = x[jx];
00752 ix = jx;
00753 if (NoUnit)
00754 temp *= A[j*lda + j];
00755 for (i=j+ione; i<n; i++) {
00756 ix += incx;
00757 temp += A[j*lda + i]*x[ix];
00758 }
00759 x[jx] = temp;
00760 jx += incx;
00761 }
00762 }
00763 }
00764 }
00765 }
00766 }
00767
00768 template<typename OrdinalType, typename ScalarType>
00769 template <typename alpha_type, typename x_type, typename y_type>
00770 void DefaultBLASImpl<OrdinalType, ScalarType>::GER(const OrdinalType m, const OrdinalType n, const alpha_type alpha, const x_type* x, const OrdinalType incx, const y_type* y, const OrdinalType incy, ScalarType* A, const OrdinalType lda) const
00771 {
00772 OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00773 OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00774 alpha_type alpha_zero = ScalarTraits<alpha_type>::zero();
00775 y_type y_zero = ScalarTraits<y_type>::zero();
00776 bool BadArgument = false;
00777
00778 TEST_FOR_EXCEPTION(Teuchos::ScalarTraits<ScalarType>::isComplex, std::logic_error,
00779 "Teuchos::BLAS::GER() does not currently support complex data types.");
00780
00781
00782 if( m == izero || n == izero || alpha == alpha_zero ){ return; }
00783
00784
00785 if( m < izero ) {
00786 std::cout << "BLAS::GER Error: M == " << m << std::endl;
00787 BadArgument = true;
00788 }
00789 if( n < izero ) {
00790 std::cout << "BLAS::GER Error: N == " << n << std::endl;
00791 BadArgument = true;
00792 }
00793 if( lda < m ) {
00794 std::cout << "BLAS::GER Error: LDA < MAX(1,M)"<< std::endl;
00795 BadArgument = true;
00796 }
00797 if( incx == 0 ) {
00798 std::cout << "BLAS::GER Error: INCX == 0"<< std::endl;
00799 BadArgument = true;
00800 }
00801 if( incy == 0 ) {
00802 std::cout << "BLAS::GER Error: INCY == 0"<< std::endl;
00803 BadArgument = true;
00804 }
00805
00806 if(!BadArgument) {
00807 OrdinalType i, j, ix, jy = izero, kx = izero;
00808 ScalarType temp;
00809
00810
00811 if (incx < izero) { kx = (-m+ione)*incx; }
00812 if (incy < izero) { jy = (-n+ione)*incy; }
00813
00814
00815 if( incx == ione ) {
00816 for( j=izero; j<n; j++ ) {
00817 if ( y[jy] != y_zero ) {
00818 temp = alpha*y[jy];
00819 for ( i=izero; i<m; i++ ) {
00820 A[j*lda + i] += x[i]*temp;
00821 }
00822 }
00823 jy += incy;
00824 }
00825 }
00826 else {
00827 for( j=izero; j<n; j++ ) {
00828 if ( y[jy] != y_zero ) {
00829 temp = alpha*y[jy];
00830 ix = kx;
00831 for( i=izero; i<m; i++ ) {
00832 A[j*lda + i] += x[ix]*temp;
00833 ix += incx;
00834 }
00835 }
00836 jy += incy;
00837 }
00838 }
00839 }
00840 }
00841
00842
00843
00844
00845
00846 template<typename OrdinalType, typename ScalarType>
00847 template <typename alpha_type, typename A_type, typename B_type, typename beta_type>
00848 void DefaultBLASImpl<OrdinalType, ScalarType>::GEMM(ETransp transa, ETransp transb, const OrdinalType m, const OrdinalType n, const OrdinalType k, const alpha_type alpha, const A_type* A, const OrdinalType lda, const B_type* B, const OrdinalType ldb, const beta_type beta, ScalarType* C, const OrdinalType ldc) const
00849 {
00850
00851 typedef TypeNameTraits<OrdinalType> OTNT;
00852 typedef TypeNameTraits<ScalarType> STNT;
00853
00854 OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00855 alpha_type alpha_zero = ScalarTraits<alpha_type>::zero();
00856 beta_type beta_zero = ScalarTraits<beta_type>::zero();
00857 B_type B_zero = ScalarTraits<B_type>::zero();
00858 ScalarType C_zero = ScalarTraits<ScalarType>::zero();
00859 beta_type beta_one = ScalarTraits<beta_type>::one();
00860 OrdinalType i, j, p;
00861 OrdinalType NRowA = m, NRowB = k;
00862 ScalarType temp;
00863 bool BadArgument = false;
00864
00865 TEST_FOR_EXCEPTION(
00866 Teuchos::ScalarTraits<ScalarType>::isComplex
00867 && (transa == CONJ_TRANS || transb == CONJ_TRANS),
00868 std::logic_error,
00869 "Teuchos::BLAS<"<<OTNT::name()<<","<<STNT::name()<<">::GEMM()"
00870 " does not currently support CONJ_TRANS for complex data types."
00871 );
00872
00873
00874 if( !(ETranspChar[transa]=='N') ) {
00875 NRowA = k;
00876 }
00877 if( !(ETranspChar[transb]=='N') ) {
00878 NRowB = n;
00879 }
00880
00881
00882 if( (m==izero) || (n==izero) || (((alpha==alpha_zero)||(k==izero)) && (beta==beta_one)) ){ return; }
00883 if( m < izero ) {
00884 std::cout << "BLAS::GEMM Error: M == " << m << std::endl;
00885 BadArgument = true;
00886 }
00887 if( n < izero ) {
00888 std::cout << "BLAS::GEMM Error: N == " << n << std::endl;
00889 BadArgument = true;
00890 }
00891 if( k < izero ) {
00892 std::cout << "BLAS::GEMM Error: K == " << k << std::endl;
00893 BadArgument = true;
00894 }
00895 if( lda < NRowA ) {
00896 std::cout << "BLAS::GEMM Error: LDA < MAX(1,M)"<< std::endl;
00897 BadArgument = true;
00898 }
00899 if( ldb < NRowB ) {
00900 std::cout << "BLAS::GEMM Error: LDB < MAX(1,K)"<< std::endl;
00901 BadArgument = true;
00902 }
00903 if( ldc < m ) {
00904 std::cout << "BLAS::GEMM Error: LDC < MAX(1,M)"<< std::endl;
00905 BadArgument = true;
00906 }
00907
00908 if(!BadArgument) {
00909
00910
00911 if( alpha == alpha_zero ) {
00912 if( beta == beta_zero ) {
00913 for (j=izero; j<n; j++) {
00914 for (i=izero; i<m; i++) {
00915 C[j*ldc + i] = C_zero;
00916 }
00917 }
00918 } else {
00919 for (j=izero; j<n; j++) {
00920 for (i=izero; i<m; i++) {
00921 C[j*ldc + i] *= beta;
00922 }
00923 }
00924 }
00925 return;
00926 }
00927
00928
00929
00930 if ( ETranspChar[transb]=='N' ) {
00931 if ( ETranspChar[transa]=='N' ) {
00932
00933 for (j=izero; j<n; j++) {
00934 if( beta == beta_zero ) {
00935 for (i=izero; i<m; i++){
00936 C[j*ldc + i] = C_zero;
00937 }
00938 } else if( beta != beta_one ) {
00939 for (i=izero; i<m; i++){
00940 C[j*ldc + i] *= beta;
00941 }
00942 }
00943 for (p=izero; p<k; p++){
00944 if (B[j*ldb + p] != B_zero ){
00945 temp = alpha*B[j*ldb + p];
00946 for (i=izero; i<m; i++) {
00947 C[j*ldc + i] += temp*A[p*lda + i];
00948 }
00949 }
00950 }
00951 }
00952 } else {
00953
00954 for (j=izero; j<n; j++) {
00955 for (i=izero; i<m; i++) {
00956 temp = C_zero;
00957 for (p=izero; p<k; p++) {
00958 temp += A[i*lda + p]*B[j*ldb + p];
00959 }
00960 if (beta == beta_zero) {
00961 C[j*ldc + i] = alpha*temp;
00962 } else {
00963 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
00964 }
00965 }
00966 }
00967 }
00968 } else {
00969 if ( ETranspChar[transa]=='N' ) {
00970
00971 for (j=izero; j<n; j++) {
00972 if (beta == beta_zero) {
00973 for (i=izero; i<m; i++) {
00974 C[j*ldc + i] = C_zero;
00975 }
00976 } else if ( beta != beta_one ) {
00977 for (i=izero; i<m; i++) {
00978 C[j*ldc + i] *= beta;
00979 }
00980 }
00981 for (p=izero; p<k; p++) {
00982 if (B[p*ldb + j] != B_zero) {
00983 temp = alpha*B[p*ldb + j];
00984 for (i=izero; i<m; i++) {
00985 C[j*ldc + i] += temp*A[p*lda + i];
00986 }
00987 }
00988 }
00989 }
00990 } else {
00991
00992 for (j=izero; j<n; j++) {
00993 for (i=izero; i<m; i++) {
00994 temp = C_zero;
00995 for (p=izero; p<k; p++) {
00996 temp += A[i*lda + p]*B[p*ldb + j];
00997 }
00998 if (beta == beta_zero) {
00999 C[j*ldc + i] = alpha*temp;
01000 } else {
01001 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
01002 }
01003 }
01004 }
01005 }
01006 }
01007 }
01008 }
01009
01010
01011 template<typename OrdinalType, typename ScalarType>
01012 template <typename alpha_type, typename A_type, typename B_type, typename beta_type>
01013 void DefaultBLASImpl<OrdinalType, ScalarType>::SYMM(ESide side, EUplo uplo, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type* A, const OrdinalType lda, const B_type* B, const OrdinalType ldb, const beta_type beta, ScalarType* C, const OrdinalType ldc) const
01014 {
01015 OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
01016 OrdinalType ione = OrdinalTraits<OrdinalType>::one();
01017 alpha_type alpha_zero = ScalarTraits<alpha_type>::zero();
01018 beta_type beta_zero = ScalarTraits<beta_type>::zero();
01019 ScalarType C_zero = ScalarTraits<ScalarType>::zero();
01020 beta_type beta_one = ScalarTraits<beta_type>::one();
01021 OrdinalType i, j, k, NRowA = m;
01022 ScalarType temp1, temp2;
01023 bool BadArgument = false;
01024 bool Upper = (EUploChar[uplo] == 'U');
01025 if (ESideChar[side] == 'R') { NRowA = n; }
01026
01027 TEST_FOR_EXCEPTION(Teuchos::ScalarTraits<ScalarType>::isComplex, std::logic_error,
01028 "Teuchos::BLAS::SYMM() does not currently support complex data types.");
01029
01030
01031 if ( (m==izero) || (n==izero) || ( (alpha==alpha_zero)&&(beta==beta_one) ) ) { return; }
01032 if( m < 0 ) {
01033 std::cout << "BLAS::SYMM Error: M == "<< m << std::endl;
01034 BadArgument = true; }
01035 if( n < 0 ) {
01036 std::cout << "BLAS::SYMM Error: N == "<< n << std::endl;
01037 BadArgument = true; }
01038 if( lda < NRowA ) {
01039 std::cout << "BLAS::SYMM Error: LDA == "<<lda<<std::endl;
01040 BadArgument = true; }
01041 if( ldb < m ) {
01042 std::cout << "BLAS::SYMM Error: LDB == "<<ldb<<std::endl;
01043 BadArgument = true; }
01044 if( ldc < m ) {
01045 std::cout << "BLAS::SYMM Error: LDC == "<<ldc<<std::endl;
01046 BadArgument = true; }
01047
01048 if(!BadArgument) {
01049
01050
01051 if (alpha == alpha_zero) {
01052 if (beta == beta_zero ) {
01053 for (j=izero; j<n; j++) {
01054 for (i=izero; i<m; i++) {
01055 C[j*ldc + i] = C_zero;
01056 }
01057 }
01058 } else {
01059 for (j=izero; j<n; j++) {
01060 for (i=izero; i<m; i++) {
01061 C[j*ldc + i] *= beta;
01062 }
01063 }
01064 }
01065 return;
01066 }
01067
01068 if ( ESideChar[side] == 'L') {
01069
01070
01071 if (Upper) {
01072
01073 for (j=izero; j<n; j++) {
01074 for (i=izero; i<m; i++) {
01075 temp1 = alpha*B[j*ldb + i];
01076 temp2 = C_zero;
01077 for (k=izero; k<i; k++) {
01078 C[j*ldc + k] += temp1*A[i*lda + k];
01079 temp2 += B[j*ldb + k]*A[i*lda + k];
01080 }
01081 if (beta == beta_zero) {
01082 C[j*ldc + i] = temp1*A[i*lda + i] + alpha*temp2;
01083 } else {
01084 C[j*ldc + i] = beta*C[j*ldc + i] + temp1*A[i*lda + i] + alpha*temp2;
01085 }
01086 }
01087 }
01088 } else {
01089
01090 for (j=izero; j<n; j++) {
01091 for (i=m-ione; i>-ione; i--) {
01092 temp1 = alpha*B[j*ldb + i];
01093 temp2 = C_zero;
01094 for (k=i+ione; k<m; k++) {
01095 C[j*ldc + k] += temp1*A[i*lda + k];
01096 temp2 += B[j*ldb + k]*A[i*lda + k];
01097 }
01098 if (beta == beta_zero) {
01099 C[j*ldc + i] = temp1*A[i*lda + i] + alpha*temp2;
01100 } else {
01101 C[j*ldc + i] = beta*C[j*ldc + i] + temp1*A[i*lda + i] + alpha*temp2;
01102 }
01103 }
01104 }
01105 }
01106 } else {
01107
01108 for (j=izero; j<n; j++) {
01109 temp1 = alpha*A[j*lda + j];
01110 if (beta == beta_zero) {
01111 for (i=izero; i<m; i++) {
01112 C[j*ldc + i] = temp1*B[j*ldb + i];
01113 }
01114 } else {
01115 for (i=izero; i<m; i++) {
01116 C[j*ldc + i] = beta*C[j*ldc + i] + temp1*B[j*ldb + i];
01117 }
01118 }
01119 for (k=izero; k<j; k++) {
01120 if (Upper) {
01121 temp1 = alpha*A[j*lda + k];
01122 } else {
01123 temp1 = alpha*A[k*lda + j];
01124 }
01125 for (i=izero; i<m; i++) {
01126 C[j*ldc + i] += temp1*B[k*ldb + i];
01127 }
01128 }
01129 for (k=j+ione; k<n; k++) {
01130 if (Upper) {
01131 temp1 = alpha*A[k*lda + j];
01132 } else {
01133 temp1 = alpha*A[j*lda + k];
01134 }
01135 for (i=izero; i<m; i++) {
01136 C[j*ldc + i] += temp1*B[k*ldb + i];
01137 }
01138 }
01139 }
01140 }
01141 }
01142 }
01143
01144 template<typename OrdinalType, typename ScalarType>
01145 template <typename alpha_type, typename A_type>
01146 void DefaultBLASImpl<OrdinalType, ScalarType>::TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type* A, const OrdinalType lda, ScalarType* B, const OrdinalType ldb) const
01147 {
01148 OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
01149 OrdinalType ione = OrdinalTraits<OrdinalType>::one();
01150 alpha_type alpha_zero = ScalarTraits<alpha_type>::zero();
01151 A_type A_zero = ScalarTraits<A_type>::zero();
01152 ScalarType B_zero = ScalarTraits<ScalarType>::zero();
01153 ScalarType one = ScalarTraits<ScalarType>::one();
01154 OrdinalType i, j, k, NRowA = m;
01155 ScalarType temp;
01156 bool BadArgument = false;
01157 bool LSide = (ESideChar[side] == 'L');
01158 bool NoUnit = (EDiagChar[diag] == 'N');
01159 bool Upper = (EUploChar[uplo] == 'U');
01160
01161 TEST_FOR_EXCEPTION(Teuchos::ScalarTraits<ScalarType>::isComplex && transa == CONJ_TRANS, std::logic_error,
01162 "Teuchos::BLAS::TRMM() does not currently support CONJ_TRANS for complex data types.");
01163
01164 if(!LSide) { NRowA = n; }
01165
01166
01167 if (n==izero || m==izero) { return; }
01168 if( m < 0 ) {
01169 std::cout << "BLAS::TRMM Error: M == "<< m <<std::endl;
01170 BadArgument = true; }
01171 if( n < 0 ) {
01172 std::cout << "BLAS::TRMM Error: N == "<< n <<std::endl;
01173 BadArgument = true; }
01174 if( lda < NRowA ) {
01175 std::cout << "BLAS::TRMM Error: LDA == "<< lda << std::endl;
01176 BadArgument = true; }
01177 if( ldb < m ) {
01178 std::cout << "BLAS::TRMM Error: M == "<< ldb << std::endl;
01179 BadArgument = true; }
01180
01181 if(!BadArgument) {
01182
01183
01184 if( alpha == alpha_zero ) {
01185 for( j=izero; j<n; j++ ) {
01186 for( i=izero; i<m; i++ ) {
01187 B[j*ldb + i] = B_zero;
01188 }
01189 }
01190 return;
01191 }
01192
01193
01194 if ( LSide ) {
01195
01196
01197 if ( ETranspChar[transa]=='N' ) {
01198
01199
01200 if ( Upper ) {
01201
01202 for( j=izero; j<n; j++ ) {
01203 for( k=izero; k<m; k++) {
01204 if ( B[j*ldb + k] != B_zero ) {
01205 temp = alpha*B[j*ldb + k];
01206 for( i=izero; i<k; i++ ) {
01207 B[j*ldb + i] += temp*A[k*lda + i];
01208 }
01209 if ( NoUnit )
01210 temp *=A[k*lda + k];
01211 B[j*ldb + k] = temp;
01212 }
01213 }
01214 }
01215 } else {
01216
01217 for( j=izero; j<n; j++ ) {
01218 for( k=m-ione; k>-ione; k-- ) {
01219 if( B[j*ldb + k] != B_zero ) {
01220 temp = alpha*B[j*ldb + k];
01221 B[j*ldb + k] = temp;
01222 if ( NoUnit )
01223 B[j*ldb + k] *= A[k*lda + k];
01224 for( i=k+ione; i<m; i++ ) {
01225 B[j*ldb + i] += temp*A[k*lda + i];
01226 }
01227 }
01228 }
01229 }
01230 }
01231 } else {
01232
01233 if( Upper ) {
01234 for( j=izero; j<n; j++ ) {
01235 for( i=m-ione; i>-ione; i-- ) {
01236 temp = B[j*ldb + i];
01237 if( NoUnit )
01238 temp *= A[i*lda + i];
01239 for( k=izero; k<i; k++ ) {
01240 temp += A[i*lda + k]*B[j*ldb + k];
01241 }
01242 B[j*ldb + i] = alpha*temp;
01243 }
01244 }
01245 } else {
01246 for( j=izero; j<n; j++ ) {
01247 for( i=izero; i<m; i++ ) {
01248 temp = B[j*ldb + i];
01249 if( NoUnit )
01250 temp *= A[i*lda + i];
01251 for( k=i+ione; k<m; k++ ) {
01252 temp += A[i*lda + k]*B[j*ldb + k];
01253 }
01254 B[j*ldb + i] = alpha*temp;
01255 }
01256 }
01257 }
01258 }
01259 } else {
01260
01261
01262 if( ETranspChar[transa] == 'N' ) {
01263
01264
01265 if( Upper ) {
01266
01267 for( j=n-ione; j>-ione; j-- ) {
01268 temp = alpha;
01269 if( NoUnit )
01270 temp *= A[j*lda + j];
01271 for( i=izero; i<m; i++ ) {
01272 B[j*ldb + i] *= temp;
01273 }
01274 for( k=izero; k<j; k++ ) {
01275 if( A[j*lda + k] != A_zero ) {
01276 temp = alpha*A[j*lda + k];
01277 for( i=izero; i<m; i++ ) {
01278 B[j*ldb + i] += temp*B[k*ldb + i];
01279 }
01280 }
01281 }
01282 }
01283 } else {
01284
01285 for( j=izero; j<n; j++ ) {
01286 temp = alpha;
01287 if( NoUnit )
01288 temp *= A[j*lda + j];
01289 for( i=izero; i<m; i++ ) {
01290 B[j*ldb + i] *= temp;
01291 }
01292 for( k=j+ione; k<n; k++ ) {
01293 if( A[j*lda + k] != A_zero ) {
01294 temp = alpha*A[j*lda + k];
01295 for( i=izero; i<m; i++ ) {
01296 B[j*ldb + i] += temp*B[k*ldb + i];
01297 }
01298 }
01299 }
01300 }
01301 }
01302 } else {
01303
01304
01305 if( Upper ) {
01306 for( k=izero; k<n; k++ ) {
01307 for( j=izero; j<k; j++ ) {
01308 if( A[k*lda + j] != A_zero ) {
01309 temp = alpha*A[k*lda + j];
01310 for( i=izero; i<m; i++ ) {
01311 B[j*ldb + i] += temp*B[k*ldb + i];
01312 }
01313 }
01314 }
01315 temp = alpha;
01316 if( NoUnit )
01317 temp *= A[k*lda + k];
01318 if( temp != one ) {
01319 for( i=izero; i<m; i++ ) {
01320 B[k*ldb + i] *= temp;
01321 }
01322 }
01323 }
01324 } else {
01325 for( k=n-ione; k>-ione; k-- ) {
01326 for( j=k+ione; j<n; j++ ) {
01327 if( A[k*lda + j] != A_zero ) {
01328 temp = alpha*A[k*lda + j];
01329 for( i=izero; i<m; i++ ) {
01330 B[j*ldb + i] += temp*B[k*ldb + i];
01331 }
01332 }
01333 }
01334 temp = alpha;
01335 if( NoUnit )
01336 temp *= A[k*lda + k];
01337 if( temp != one ) {
01338 for( i=izero; i<m; i++ ) {
01339 B[k*ldb + i] *= temp;
01340 }
01341 }
01342 }
01343 }
01344 }
01345 }
01346 }
01347 }
01348
01349 template<typename OrdinalType, typename ScalarType>
01350 template <typename alpha_type, typename A_type>
01351 void DefaultBLASImpl<OrdinalType, ScalarType>::TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type* A, const OrdinalType lda, ScalarType* B, const OrdinalType ldb) const
01352 {
01353 OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
01354 OrdinalType ione = OrdinalTraits<OrdinalType>::one();
01355 alpha_type alpha_zero = ScalarTraits<alpha_type>::zero();
01356 A_type A_zero = ScalarTraits<A_type>::zero();
01357 ScalarType B_zero = ScalarTraits<ScalarType>::zero();
01358 alpha_type alpha_one = ScalarTraits<alpha_type>::one();
01359 ScalarType B_one = ScalarTraits<ScalarType>::one();
01360 ScalarType temp;
01361 OrdinalType NRowA = m;
01362 bool BadArgument = false;
01363 bool NoUnit = (EDiagChar[diag]=='N');
01364
01365 TEST_FOR_EXCEPTION(Teuchos::ScalarTraits<ScalarType>::isComplex && transa == CONJ_TRANS, std::logic_error,
01366 "Teuchos::BLAS::TRSM() does not currently support CONJ_TRANS for complex data types.");
01367
01368 if (!(ESideChar[side] == 'L')) { NRowA = n; }
01369
01370
01371 if (n == izero || m == izero) { return; }
01372 if( m < izero ) {
01373 std::cout << "BLAS::TRSM Error: M == "<<m<<std::endl;
01374 BadArgument = true; }
01375 if( n < izero ) {
01376 std::cout << "BLAS::TRSM Error: N == "<<n<<std::endl;
01377 BadArgument = true; }
01378 if( lda < NRowA ) {
01379 std::cout << "BLAS::TRSM Error: LDA == "<<lda<<std::endl;
01380 BadArgument = true; }
01381 if( ldb < m ) {
01382 std::cout << "BLAS::TRSM Error: LDB == "<<ldb<<std::endl;
01383 BadArgument = true; }
01384
01385 if(!BadArgument)
01386 {
01387 int i, j, k;
01388
01389 if(alpha == alpha_zero) {
01390 for(j = izero; j < n; j++) {
01391 for( i = izero; i < m; i++) {
01392 B[j*ldb+i] = B_zero;
01393 }
01394 }
01395 }
01396 else
01397 {
01398 if(ESideChar[side] == 'L') {
01399
01400
01401
01402 if(ETranspChar[transa] == 'N') {
01403
01404
01405
01406 if(EUploChar[uplo] == 'U') {
01407
01408 for(j = izero; j < n; j++) {
01409
01410 if(alpha != alpha_one) {
01411 for( i = izero; i < m; i++) {
01412 B[j*ldb+i] *= alpha;
01413 }
01414 }
01415
01416 for(k = (m - ione); k > -ione; k--) {
01417
01418 if (B[j*ldb + k] != B_zero) {
01419 if (NoUnit) {
01420 B[j*ldb + k] /= A[k*lda + k];
01421 }
01422 for(i = izero; i < k; i++) {
01423 B[j*ldb + i] -= B[j*ldb + k] * A[k*lda + i];
01424 }
01425 }
01426 }
01427 }
01428 }
01429 else
01430 {
01431 for(j = izero; j < n; j++) {
01432
01433 if(alpha != alpha_one) {
01434 for( i = izero; i < m; i++) {
01435 B[j*ldb+i] *= alpha;
01436 }
01437 }
01438
01439 for(k = izero; k < m; k++) {
01440
01441 if (B[j*ldb + k] != B_zero) {
01442 if (NoUnit) {
01443 B[j*ldb + k] /= A[k*lda + k];
01444 }
01445 for(i = k+ione; i < m; i++) {
01446 B[j*ldb + i] -= B[j*ldb + k] * A[k*lda + i];
01447 }
01448 }
01449 }
01450 }
01451 }
01452 }
01453 else {
01454
01455
01456
01457 if(EUploChar[uplo] == 'U') {
01458
01459 for(j = izero; j < n; j++) {
01460 for( i = izero; i < m; i++) {
01461 temp = alpha*B[j*ldb+i];
01462 for(k = izero; k < i; k++) {
01463 temp -= A[i*lda + k] * B[j*ldb + k];
01464 }
01465 if (NoUnit) {
01466 temp /= A[i*lda + i];
01467 }
01468 B[j*ldb + i] = temp;
01469 }
01470 }
01471 }
01472 else
01473 {
01474 for(j = izero; j < n; j++) {
01475 for(i = (m - ione) ; i > -ione; i--) {
01476 temp = alpha*B[j*ldb+i];
01477 for(k = i+ione; k < m; k++) {
01478 temp -= A[i*lda + k] * B[j*ldb + k];
01479 }
01480 if (NoUnit) {
01481 temp /= A[i*lda + i];
01482 }
01483 B[j*ldb + i] = temp;
01484 }
01485 }
01486 }
01487 }
01488 }
01489 else {
01490
01491
01492
01493
01494 if (ETranspChar[transa] == 'N') {
01495
01496
01497
01498 if(EUploChar[uplo] == 'U') {
01499
01500
01501 for(j = izero; j < n; j++) {
01502
01503 if(alpha != alpha_one) {
01504 for( i = izero; i < m; i++) {
01505 B[j*ldb+i] *= alpha;
01506 }
01507 }
01508 for(k = izero; k < j; k++) {
01509
01510 if (A[j*lda + k] != A_zero) {
01511 for(i = izero; i < m; i++) {
01512 B[j*ldb + i] -= A[j*lda + k] * B[k*ldb + i];
01513 }
01514 }
01515 }
01516 if (NoUnit) {
01517 temp = B_one/A[j*lda + j];
01518 for(i = izero; i < m; i++) {
01519 B[j*ldb + i] *= temp;
01520 }
01521 }
01522 }
01523 }
01524 else
01525 {
01526 for(j = (n - ione); j > -ione; j--) {
01527
01528 if(alpha != alpha_one) {
01529 for( i = izero; i < m; i++) {
01530 B[j*ldb+i] *= alpha;
01531 }
01532 }
01533
01534 for(k = j+ione; k < n; k++) {
01535
01536 if (A[j*lda + k] != A_zero) {
01537 for(i = izero; i < m; i++) {
01538 B[j*ldb + i] -= A[j*lda + k] * B[k*ldb + i];
01539 }
01540 }
01541 }
01542 if (NoUnit) {
01543 temp = B_one/A[j*lda + j];
01544 for(i = izero; i < m; i++) {
01545 B[j*ldb + i] *= temp;
01546 }
01547 }
01548 }
01549 }
01550 }
01551 else {
01552
01553
01554
01555 if(EUploChar[uplo] == 'U') {
01556
01557 for(k = (n - ione); k > -ione; k--) {
01558 if (NoUnit) {
01559 temp = B_one/A[k*lda + k];
01560 for(i = izero; i < m; i++) {
01561 B[k*ldb + i] *= temp;
01562 }
01563 }
01564 for(j = izero; j < k; j++) {
01565 if (A[k*lda + j] != A_zero) {
01566 temp = A[k*lda + j];
01567 for(i = izero; i < m; i++) {
01568 B[j*ldb + i] -= temp*B[k*ldb + i];
01569 }
01570 }
01571 }
01572 if (alpha != alpha_one) {
01573 for (i = izero; i < m; i++) {
01574 B[k*ldb + i] *= alpha;
01575 }
01576 }
01577 }
01578 }
01579 else
01580 {
01581 for(k = izero; k < n; k++) {
01582 if (NoUnit) {
01583 temp = B_one/A[k*lda + k];
01584 for (i = izero; i < m; i++) {
01585 B[k*ldb + i] *= temp;
01586 }
01587 }
01588 for(j = k+ione; j < n; j++) {
01589 if(A[k*lda + j] != A_zero) {
01590 temp = A[k*lda + j];
01591 for(i = izero; i < m; i++) {
01592 B[j*ldb + i] -= temp*B[k*ldb + i];
01593 }
01594 }
01595 }
01596 if (alpha != alpha_one) {
01597 for (i = izero; i < m; i++) {
01598 B[k*ldb + i] *= alpha;
01599 }
01600 }
01601 }
01602 }
01603 }
01604 }
01605 }
01606 }
01607 }
01608
01609
01610
01611 template <>
01612 class TEUCHOS_LIB_DLL_EXPORT BLAS<int, float>
01613 {
01614 public:
01615 inline BLAS(void) {}
01616 inline BLAS(const BLAS<int, float>& ) {}
01617 inline virtual ~BLAS(void) {}
01618 void ROTG(float* da, float* db, float* c, float* s) const;
01619 void ROT(const int n, float* dx, const int incx, float* dy, const int incy, float* c, float* s) const;
01620 float ASUM(const int n, const float* x, const int incx) const;
01621 void AXPY(const int n, const float alpha, const float* x, const int incx, float* y, const int incy) const;
01622 void COPY(const int n, const float* x, const int incx, float* y, const int incy) const;
01623 float DOT(const int n, const float* x, const int incx, const float* y, const int incy) const;
01624 float NRM2(const int n, const float* x, const int incx) const;
01625 void SCAL(const int n, const float alpha, float* x, const int incx) const;
01626 int IAMAX(const int n, const float* x, const int incx) const;
01627 void GEMV(ETransp trans, const int m, const int n, const float alpha, const float* A, const int lda, const float* x, const int incx, const float beta, float* y, const int incy) const;
01628 void TRMV(EUplo uplo, ETransp trans, EDiag diag, const int n, const float* A, const int lda, float* x, const int incx) const;
01629 void GER(const int m, const int n, const float alpha, const float* x, const int incx, const float* y, const int incy, float* A, const int lda) const;
01630 void GEMM(ETransp transa, ETransp transb, const int m, const int n, const int k, const float alpha, const float* A, const int lda, const float* B, const int ldb, const float beta, float* C, const int ldc) const;
01631 void SYMM(ESide side, EUplo uplo, const int m, const int n, const float alpha, const float* A, const int lda, const float *B, const int ldb, const float beta, float *C, const int ldc) const;
01632 void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const float alpha, const float* A, const int lda, float* B, const int ldb) const;
01633 void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const float alpha, const float* A, const int lda, float* B, const int ldb) const;
01634 };
01635
01636
01637
01638 template<>
01639 class TEUCHOS_LIB_DLL_EXPORT BLAS<int, double>
01640 {
01641 public:
01642 inline BLAS(void) {}
01643 inline BLAS(const BLAS<int, double>& ) {}
01644 inline virtual ~BLAS(void) {}
01645 void ROTG(double* da, double* db, double* c, double* s) const;
01646 void ROT(const int n, double* dx, const int incx, double* dy, const int incy, double* c, double* s) const;
01647 double ASUM(const int n, const double* x, const int incx) const;
01648 void AXPY(const int n, const double alpha, const double* x, const int incx, double* y, const int incy) const;
01649 void COPY(const int n, const double* x, const int incx, double* y, const int incy) const;
01650 double DOT(const int n, const double* x, const int incx, const double* y, const int incy) const;
01651 double NRM2(const int n, const double* x, const int incx) const;
01652 void SCAL(const int n, const double alpha, double* x, const int incx) const;
01653 int IAMAX(const int n, const double* x, const int incx) const;
01654 void GEMV(ETransp trans, const int m, const int n, const double alpha, const double* A, const int lda, const double* x, const int incx, const double beta, double* y, const int incy) const;
01655 void TRMV(EUplo uplo, ETransp trans, EDiag diag, const int n, const double* A, const int lda, double* x, const int incx) const;
01656 void GER(const int m, const int n, const double alpha, const double* x, const int incx, const double* y, const int incy, double* A, const int lda) const;
01657 void GEMM(ETransp transa, ETransp transb, const int m, const int n, const int k, const double alpha, const double* A, const int lda, const double* B, const int ldb, const double beta, double* C, const int ldc) const;
01658 void SYMM(ESide side, EUplo uplo, const int m, const int n, const double alpha, const double* A, const int lda, const double *B, const int ldb, const double beta, double *C, const int ldc) const;
01659 void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const double alpha, const double* A, const int lda, double* B, const int ldb) const;
01660 void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const double alpha, const double* A, const int lda, double* B, const int ldb) const;
01661 };
01662
01663
01664
01665 template<>
01666 class TEUCHOS_LIB_DLL_EXPORT BLAS<int, std::complex<float> >
01667 {
01668 public:
01669 inline BLAS(void) {}
01670 inline BLAS(const BLAS<int, std::complex<float> >& ) {}
01671 inline virtual ~BLAS(void) {}
01672 void ROTG(std::complex<float>* da, std::complex<float>* db, float* c, std::complex<float>* s) const;
01673 void ROT(const int n, std::complex<float>* dx, const int incx, std::complex<float>* dy, const int incy, float* c, std::complex<float>* s) const;
01674 float ASUM(const int n, const std::complex<float>* x, const int incx) const;
01675 void AXPY(const int n, const std::complex<float> alpha, const std::complex<float>* x, const int incx, std::complex<float>* y, const int incy) const;
01676 void COPY(const int n, const std::complex<float>* x, const int incx, std::complex<float>* y, const int incy) const;
01677 std::complex<float> DOT(const int n, const std::complex<float>* x, const int incx, const std::complex<float>* y, const int incy) const;
01678 float NRM2(const int n, const std::complex<float>* x, const int incx) const;
01679 void SCAL(const int n, const std::complex<float> alpha, std::complex<float>* x, const int incx) const;
01680 int IAMAX(const int n, const std::complex<float>* x, const int incx) const;
01681 void GEMV(ETransp trans, const int m, const int n, const std::complex<float> alpha, const std::complex<float>* A, const int lda, const std::complex<float>* x, const int incx, const std::complex<float> beta, std::complex<float>* y, const int incy) const;
01682 void TRMV(EUplo uplo, ETransp trans, EDiag diag, const int n, const std::complex<float>* A, const int lda, std::complex<float>* x, const int incx) const;
01683 void GER(const int m, const int n, const std::complex<float> alpha, const std::complex<float>* x, const int incx, const std::complex<float>* y, const int incy, std::complex<float>* A, const int lda) const;
01684 void GEMM(ETransp transa, ETransp transb, const int m, const int n, const int k, const std::complex<float> alpha, const std::complex<float>* A, const int lda, const std::complex<float>* B, const int ldb, const std::complex<float> beta, std::complex<float>* C, const int ldc) const;
01685 void SYMM(ESide side, EUplo uplo, const int m, const int n, const std::complex<float> alpha, const std::complex<float>* A, const int lda, const std::complex<float> *B, const int ldb, const std::complex<float> beta, std::complex<float> *C, const int ldc) const;
01686 void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const std::complex<float> alpha, const std::complex<float>* A, const int lda, std::complex<float>* B, const int ldb) const;
01687 void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const std::complex<float> alpha, const std::complex<float>* A, const int lda, std::complex<float>* B, const int ldb) const;
01688 };
01689
01690
01691
01692 template<>
01693 class TEUCHOS_LIB_DLL_EXPORT BLAS<int, std::complex<double> >
01694 {
01695 public:
01696 inline BLAS(void) {}
01697 inline BLAS(const BLAS<int, std::complex<double> >& ) {}
01698 inline virtual ~BLAS(void) {}
01699 void ROTG(std::complex<double>* da, std::complex<double>* db, double* c, std::complex<double>* s) const;
01700 void ROT(const int n, std::complex<double>* dx, const int incx, std::complex<double>* dy, const int incy, double* c, std::complex<double>* s) const;
01701 double ASUM(const int n, const std::complex<double>* x, const int incx) const;
01702 void AXPY(const int n, const std::complex<double> alpha, const std::complex<double>* x, const int incx, std::complex<double>* y, const int incy) const;
01703 void COPY(const int n, const std::complex<double>* x, const int incx, std::complex<double>* y, const int incy) const;
01704 std::complex<double> DOT(const int n, const std::complex<double>* x, const int incx, const std::complex<double>* y, const int incy) const;
01705 double NRM2(const int n, const std::complex<double>* x, const int incx) const;
01706 void SCAL(const int n, const std::complex<double> alpha, std::complex<double>* x, const int incx) const;
01707 int IAMAX(const int n, const std::complex<double>* x, const int incx) const;
01708 void GEMV(ETransp trans, const int m, const int n, const std::complex<double> alpha, const std::complex<double>* A, const int lda, const std::complex<double>* x, const int incx, const std::complex<double> beta, std::complex<double>* y, const int incy) const;
01709 void TRMV(EUplo uplo, ETransp trans, EDiag diag, const int n, const std::complex<double>* A, const int lda, std::complex<double>* x, const int incx) const;
01710 void GER(const int m, const int n, const std::complex<double> alpha, const std::complex<double>* x, const int incx, const std::complex<double>* y, const int incy, std::complex<double>* A, const int lda) const;
01711 void GEMM(ETransp transa, ETransp transb, const int m, const int n, const int k, const std::complex<double> alpha, const std::complex<double>* A, const int lda, const std::complex<double>* B, const int ldb, const std::complex<double> beta, std::complex<double>* C, const int ldc) const;
01712 void SYMM(ESide side, EUplo uplo, const int m, const int n, const std::complex<double> alpha, const std::complex<double>* A, const int lda, const std::complex<double> *B, const int ldb, const std::complex<double> beta, std::complex<double> *C, const int ldc) const;
01713 void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const std::complex<double> alpha, const std::complex<double>* A, const int lda, std::complex<double>* B, const int ldb) const;
01714 void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const std::complex<double> alpha, const std::complex<double>* A, const int lda, std::complex<double>* B, const int ldb) const;
01715 };
01716
01717 }
01718
01719 #endif // _TEUCHOS_BLAS_HPP_