00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046 #ifndef _TEUCHOS_BLAS_HPP_
00047 #define _TEUCHOS_BLAS_HPP_
00048
00056
00057
00058
00059
00060 #if defined (INTEL_CXML)
00061 unsigned int one=1;
00062 #endif
00063
00064 #ifdef CHAR_MACRO
00065 #undef CHAR_MACRO
00066 #endif
00067 #if defined (INTEL_CXML)
00068 #define CHAR_MACRO(char_var) &char_var, one
00069 #else
00070 #define CHAR_MACRO(char_var) &char_var
00071 #endif
00072
00073 #include "Teuchos_ConfigDefs.hpp"
00074 #include "Teuchos_ScalarTraits.hpp"
00075 #include "Teuchos_OrdinalTraits.hpp"
00076 #include "Teuchos_BLAS_types.hpp"
00077 #include "Teuchos_TestForException.hpp"
00078
00112 namespace Teuchos
00113 {
00114 extern const char ESideChar[];
00115 extern const char ETranspChar[];
00116 extern const char EUploChar[];
00117 extern const char EDiagChar[];
00118
00120
00125 template<typename OrdinalType, typename ScalarType>
00126 class DefaultBLASImpl
00127 {
00128
00129 typedef typename Teuchos::ScalarTraits<ScalarType>::magnitudeType MagnitudeType;
00130
00131 public:
00133
00134
00136 inline DefaultBLASImpl(void) {}
00137
00139 inline DefaultBLASImpl(const DefaultBLASImpl<OrdinalType, ScalarType>& ) {}
00140
00142 inline virtual ~DefaultBLASImpl(void) {}
00144
00146
00147
00149 void ROTG(ScalarType* da, ScalarType* db, MagnitudeType* c, ScalarType* s) const;
00150
00152 void ROT(const OrdinalType n, ScalarType* dx, const OrdinalType incx, ScalarType* dy, const OrdinalType incy, MagnitudeType* c, ScalarType* s) const;
00153
00155 void SCAL(const OrdinalType n, const ScalarType alpha, ScalarType* x, const OrdinalType incx) const;
00156
00158 void COPY(const OrdinalType n, const ScalarType* x, const OrdinalType incx, ScalarType* y, const OrdinalType incy) const;
00159
00161 template <typename alpha_type, typename x_type>
00162 void AXPY(const OrdinalType n, const alpha_type alpha, const x_type* x, const OrdinalType incx, ScalarType* y, const OrdinalType incy) const;
00163
00165 typename ScalarTraits<ScalarType>::magnitudeType ASUM(const OrdinalType n, const ScalarType* x, const OrdinalType incx) const;
00166
00168 template <typename x_type, typename y_type>
00169 ScalarType DOT(const OrdinalType n, const x_type* x, const OrdinalType incx, const y_type* y, const OrdinalType incy) const;
00170
00172 typename ScalarTraits<ScalarType>::magnitudeType NRM2(const OrdinalType n, const ScalarType* x, const OrdinalType incx) const;
00173
00175 OrdinalType IAMAX(const OrdinalType n, const ScalarType* x, const OrdinalType incx) const;
00176
00178
00180
00181
00183 template <typename alpha_type, typename A_type, typename x_type, typename beta_type>
00184 void GEMV(ETransp trans, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type* A,
00185 const OrdinalType lda, const x_type* x, const OrdinalType incx, const beta_type beta, ScalarType* y, const OrdinalType incy) const;
00186
00188 template <typename A_type>
00189 void TRMV(EUplo uplo, ETransp trans, EDiag diag, const OrdinalType n, const A_type* A,
00190 const OrdinalType lda, ScalarType* x, const OrdinalType incx) const;
00191
00193 template <typename alpha_type, typename x_type, typename y_type>
00194 void GER(const OrdinalType m, const OrdinalType n, const alpha_type alpha, const x_type* x, const OrdinalType incx,
00195 const y_type* y, const OrdinalType incy, ScalarType* A, const OrdinalType lda) const;
00197
00199
00200
00202 template <typename alpha_type, typename A_type, typename B_type, typename beta_type>
00203 void GEMM(ETransp transa, ETransp transb, const OrdinalType m, const OrdinalType n, const OrdinalType k, const alpha_type alpha, const A_type* A, const OrdinalType lda, const B_type* B, const OrdinalType ldb, const beta_type beta, ScalarType* C, const OrdinalType ldc) const;
00204
00206 template <typename alpha_type, typename A_type, typename B_type, typename beta_type>
00207 void SYMM(ESide side, EUplo uplo, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type* A, const OrdinalType lda, const B_type* B, const OrdinalType ldb, const beta_type beta, ScalarType* C, const OrdinalType ldc) const;
00208
00210 template <typename alpha_type, typename A_type>
00211 void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n,
00212 const alpha_type alpha, const A_type* A, const OrdinalType lda, ScalarType* B, const OrdinalType ldb) const;
00213
00215 template <typename alpha_type, typename A_type>
00216 void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n,
00217 const alpha_type alpha, const A_type* A, const OrdinalType lda, ScalarType* B, const OrdinalType ldb) const;
00219 };
00220
00221 template<typename OrdinalType, typename ScalarType>
00222 class BLAS : public DefaultBLASImpl<OrdinalType,ScalarType>
00223 {
00224
00225 typedef typename Teuchos::ScalarTraits<ScalarType>::magnitudeType MagnitudeType;
00226
00227 public:
00229
00230
00232 inline BLAS(void) {}
00233
00235 inline BLAS(const BLAS<OrdinalType, ScalarType>& ) {}
00236
00238 inline virtual ~BLAS(void) {}
00240 };
00241
00242
00243
00244
00245
00246 template<typename OrdinalType, typename ScalarType>
00247 void DefaultBLASImpl<OrdinalType, ScalarType>::ROTG(ScalarType* da, ScalarType* db, MagnitudeType* c, ScalarType* s) const
00248 {
00249 ScalarType roe, alpha;
00250 ScalarType zero = ScalarTraits<ScalarType>::zero();
00251 ScalarType one = ScalarTraits<ScalarType>::one();
00252 MagnitudeType scale, norm;
00253 MagnitudeType m_one = ScalarTraits<MagnitudeType>::one();
00254 MagnitudeType m_zero = ScalarTraits<MagnitudeType>::zero();
00255
00256 roe = *db;
00257 if ( ScalarTraits<ScalarType>::magnitude( *da ) > ScalarTraits<ScalarType>::magnitude( *db ) ) { roe = *da; }
00258 scale = ScalarTraits<ScalarType>::magnitude( *da ) + ScalarTraits<ScalarType>::magnitude( *db );
00259 if ( scale == m_zero )
00260 {
00261 *c = m_one;
00262 *s = zero;
00263 *da = zero; *db = zero;
00264 }
00265 else if ( *da == zero )
00266 {
00267 *c = m_zero;
00268 *s = one;
00269 *da = *db; *db = zero;
00270 }
00271 else
00272 {
00273 norm = scale*ScalarTraits<ScalarType>::magnitude(ScalarTraits<ScalarType>::squareroot( ( *da/scale)*(*da/scale) + (*db/scale)*(*db/scale) ) );
00274 alpha = roe / ScalarTraits<ScalarType>::magnitude(roe);
00275 *c = ScalarTraits<ScalarType>::magnitude(*da) / norm;
00276 *s = alpha * ScalarTraits<ScalarType>::conjugate(*db) / norm;
00277 *db = one;
00278 if( ScalarTraits<ScalarType>::magnitude( *da ) > ScalarTraits<ScalarType>::magnitude( *db ) ){ *db = *s; }
00279 if( ScalarTraits<ScalarType>::magnitude( *db ) >= ScalarTraits<ScalarType>::magnitude( *da ) &&
00280 *c != ScalarTraits<MagnitudeType>::zero() ) { *db = one / *c; }
00281 *da = norm * alpha;
00282 }
00283 }
00284
00285 template<typename OrdinalType, typename ScalarType>
00286 void DefaultBLASImpl<OrdinalType,ScalarType>::ROT(const OrdinalType n, ScalarType* dx, const OrdinalType incx, ScalarType* dy, const OrdinalType incy, MagnitudeType* c, ScalarType* s) const
00287 {
00288
00289 }
00290
00291 template<typename OrdinalType, typename ScalarType>
00292 void DefaultBLASImpl<OrdinalType, ScalarType>::SCAL(const OrdinalType n, const ScalarType alpha, ScalarType* x, const OrdinalType incx) const
00293 {
00294 OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00295 OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00296 OrdinalType i, ix = izero;
00297 if ( n > izero ) {
00298
00299 if (incx < izero) { ix = (-n+ione)*incx; }
00300
00301 for(i = izero; i < n; i++)
00302 {
00303 x[ix] *= alpha;
00304 ix += incx;
00305 }
00306 }
00307 }
00308
00309 template<typename OrdinalType, typename ScalarType>
00310 void DefaultBLASImpl<OrdinalType, ScalarType>::COPY(const OrdinalType n, const ScalarType* x, const OrdinalType incx, ScalarType* y, const OrdinalType incy) const
00311 {
00312 OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00313 OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00314 OrdinalType i, ix = izero, iy = izero;
00315 if ( n > izero ) {
00316
00317 if (incx < izero) { ix = (-n+ione)*incx; }
00318 if (incy < izero) { iy = (-n+ione)*incy; }
00319
00320 for(i = izero; i < n; i++)
00321 {
00322 y[iy] = x[ix];
00323 ix += incx;
00324 iy += incy;
00325 }
00326 }
00327 }
00328
00329 template<typename OrdinalType, typename ScalarType>
00330 template <typename alpha_type, typename x_type>
00331 void DefaultBLASImpl<OrdinalType, ScalarType>::AXPY(const OrdinalType n, const alpha_type alpha, const x_type* x, const OrdinalType incx, ScalarType* y, const OrdinalType incy) const
00332 {
00333 OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00334 OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00335 OrdinalType i, ix = izero, iy = izero;
00336 if( n > izero && alpha != ScalarTraits<alpha_type>::zero())
00337 {
00338
00339 if (incx < izero) { ix = (-n+ione)*incx; }
00340 if (incy < izero) { iy = (-n+ione)*incy; }
00341
00342 for(i = izero; i < n; i++)
00343 {
00344 y[iy] += alpha * x[ix];
00345 ix += incx;
00346 iy += incy;
00347 }
00348 }
00349 }
00350
00351 template<typename OrdinalType, typename ScalarType>
00352 typename ScalarTraits<ScalarType>::magnitudeType DefaultBLASImpl<OrdinalType, ScalarType>::ASUM(const OrdinalType n, const ScalarType* x, const OrdinalType incx) const
00353 {
00354 OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00355 OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00356 typename ScalarTraits<ScalarType>::magnitudeType result =
00357 ScalarTraits<typename ScalarTraits<ScalarType>::magnitudeType>::zero();
00358 OrdinalType i, ix = izero;
00359 if( n > izero ) {
00360
00361 if (incx < izero) { ix = (-n+ione)*incx; }
00362
00363 for(i = izero; i < n; i++)
00364 {
00365 result += ScalarTraits<ScalarType>::magnitude(x[ix]);
00366 ix += incx;
00367 }
00368 }
00369 return result;
00370 }
00371
00372 template<typename OrdinalType, typename ScalarType>
00373 template <typename x_type, typename y_type>
00374 ScalarType DefaultBLASImpl<OrdinalType, ScalarType>::DOT(const OrdinalType n, const x_type* x, const OrdinalType incx, const y_type* y, const OrdinalType incy) const
00375 {
00376 OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00377 OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00378 ScalarType result = ScalarTraits<ScalarType>::zero();
00379 OrdinalType i, ix = izero, iy = izero;
00380 if( n > izero )
00381 {
00382
00383 if (incx < izero) { ix = (-n+ione)*incx; }
00384 if (incy < izero) { iy = (-n+ione)*incy; }
00385
00386 for(i = izero; i < n; i++)
00387 {
00388 result += ScalarTraits<x_type>::conjugate(x[ix]) * y[iy];
00389 ix += incx;
00390 iy += incy;
00391 }
00392 }
00393 return result;
00394 }
00395
00396 template<typename OrdinalType, typename ScalarType>
00397 typename ScalarTraits<ScalarType>::magnitudeType DefaultBLASImpl<OrdinalType, ScalarType>::NRM2(const OrdinalType n, const ScalarType* x, const OrdinalType incx) const
00398 {
00399 OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00400 OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00401 typename ScalarTraits<ScalarType>::magnitudeType result =
00402 ScalarTraits<typename ScalarTraits<ScalarType>::magnitudeType>::zero();
00403 OrdinalType i, ix = izero;
00404 if ( n > izero )
00405 {
00406
00407 if (incx < izero) { ix = (-n+ione)*incx; }
00408
00409 for(i = izero; i < n; i++)
00410 {
00411 result += ScalarTraits<ScalarType>::magnitude(ScalarTraits<ScalarType>::conjugate(x[ix]) * x[ix]);
00412 ix += incx;
00413 }
00414 result = ScalarTraits<typename ScalarTraits<ScalarType>::magnitudeType>::squareroot(result);
00415 }
00416 return result;
00417 }
00418
00419 template<typename OrdinalType, typename ScalarType>
00420 OrdinalType DefaultBLASImpl<OrdinalType, ScalarType>::IAMAX(const OrdinalType n, const ScalarType* x, const OrdinalType incx) const
00421 {
00422 OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00423 OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00424 OrdinalType result = izero, ix = izero, i;
00425 typename ScalarTraits<ScalarType>::magnitudeType maxval =
00426 ScalarTraits<typename ScalarTraits<ScalarType>::magnitudeType>::zero();
00427
00428 if ( n > izero )
00429 {
00430 if (incx < izero) { ix = (-n+ione)*incx; }
00431 maxval = ScalarTraits<ScalarType>::magnitude(x[ix]);
00432 ix += incx;
00433 for(i = ione; i < n; i++)
00434 {
00435 if(ScalarTraits<ScalarType>::magnitude(x[ix]) > maxval)
00436 {
00437 result = i;
00438 maxval = ScalarTraits<ScalarType>::magnitude(x[ix]);
00439 }
00440 ix += incx;
00441 }
00442 }
00443 return result + 1;
00444 }
00445
00446
00447
00448
00449 template<typename OrdinalType, typename ScalarType>
00450 template <typename alpha_type, typename A_type, typename x_type, typename beta_type>
00451 void DefaultBLASImpl<OrdinalType, ScalarType>::GEMV(ETransp trans, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type* A, const OrdinalType lda, const x_type* x, const OrdinalType incx, const beta_type beta, ScalarType* y, const OrdinalType incy) const
00452 {
00453 OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00454 OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00455 alpha_type alpha_zero = ScalarTraits<alpha_type>::zero();
00456 beta_type beta_zero = ScalarTraits<beta_type>::zero();
00457 x_type x_zero = ScalarTraits<x_type>::zero();
00458 ScalarType y_zero = ScalarTraits<ScalarType>::zero();
00459 beta_type beta_one = ScalarTraits<beta_type>::one();
00460 bool BadArgument = false;
00461
00462 TEST_FOR_EXCEPTION(Teuchos::ScalarTraits<ScalarType>::isComplex && trans == CONJ_TRANS, std::logic_error,
00463 "Teuchos::BLAS::GEMV() does not currently support CONJ_TRANS for complex data types.");
00464
00465
00466 if( m == izero || n == izero || ( alpha == alpha_zero && beta == beta_one ) ){ return; }
00467
00468
00469 if( m < izero ) {
00470 std::cout << "BLAS::GEMV Error: M == " << m << std::endl;
00471 BadArgument = true;
00472 }
00473 if( n < izero ) {
00474 std::cout << "BLAS::GEMV Error: N == " << n << std::endl;
00475 BadArgument = true;
00476 }
00477 if( lda < m ) {
00478 std::cout << "BLAS::GEMV Error: LDA < MAX(1,M)"<< std::endl;
00479 BadArgument = true;
00480 }
00481 if( incx == izero ) {
00482 std::cout << "BLAS::GEMV Error: INCX == 0"<< std::endl;
00483 BadArgument = true;
00484 }
00485 if( incy == izero ) {
00486 std::cout << "BLAS::GEMV Error: INCY == 0"<< std::endl;
00487 BadArgument = true;
00488 }
00489
00490 if(!BadArgument) {
00491 OrdinalType i, j, lenx, leny, ix, iy, jx, jy;
00492 OrdinalType kx = izero, ky = izero;
00493 ScalarType temp;
00494
00495
00496 if(ETranspChar[trans] == 'N') {
00497 lenx = n;
00498 leny = m;
00499 } else {
00500 lenx = m;
00501 leny = n;
00502 }
00503
00504
00505 if (incx < izero ) { kx = (ione - lenx)*incx; }
00506 if (incy < izero ) { ky = (ione - leny)*incy; }
00507
00508
00509 ix = kx; iy = ky;
00510 if(beta != beta_one) {
00511 if (incy == ione) {
00512 if (beta == beta_zero) {
00513 for(i = izero; i < leny; i++) { y[i] = y_zero; }
00514 } else {
00515 for(i = izero; i < leny; i++) { y[i] *= beta; }
00516 }
00517 } else {
00518 if (beta == beta_zero) {
00519 for(i = izero; i < leny; i++) {
00520 y[iy] = y_zero;
00521 iy += incy;
00522 }
00523 } else {
00524 for(i = izero; i < leny; i++) {
00525 y[iy] *= beta;
00526 iy += incy;
00527 }
00528 }
00529 }
00530 }
00531
00532
00533 if(alpha == alpha_zero) { return; }
00534
00535 if( ETranspChar[trans] == 'N' ) {
00536
00537 jx = kx;
00538 if (incy == ione) {
00539 for(j = izero; j < n; j++) {
00540 if (x[jx] != x_zero) {
00541 temp = alpha*x[jx];
00542 for(i = izero; i < m; i++) {
00543 y[i] += temp*A[j*lda + i];
00544 }
00545 }
00546 jx += incx;
00547 }
00548 } else {
00549 for(j = izero; j < n; j++) {
00550 if (x[jx] != x_zero) {
00551 temp = alpha*x[jx];
00552 iy = ky;
00553 for(i = izero; i < m; i++) {
00554 y[iy] += temp*A[j*lda + i];
00555 iy += incy;
00556 }
00557 }
00558 jx += incx;
00559 }
00560 }
00561 } else {
00562 jy = ky;
00563 if (incx == ione) {
00564 for(j = izero; j < n; j++) {
00565 temp = y_zero;
00566 for(i = izero; i < m; i++) {
00567 temp += A[j*lda + i]*x[i];
00568 }
00569 y[jy] += alpha*temp;
00570 jy += incy;
00571 }
00572 } else {
00573 for(j = izero; j < n; j++) {
00574 temp = y_zero;
00575 ix = kx;
00576 for (i = izero; i < m; i++) {
00577 temp += A[j*lda + i]*x[ix];
00578 ix += incx;
00579 }
00580 y[jy] += alpha*temp;
00581 jy += incy;
00582 }
00583 }
00584 }
00585 }
00586 }
00587
00588 template<typename OrdinalType, typename ScalarType>
00589 template <typename A_type>
00590 void DefaultBLASImpl<OrdinalType, ScalarType>::TRMV(EUplo uplo, ETransp trans, EDiag diag, const OrdinalType n, const A_type* A, const OrdinalType lda, ScalarType* x, const OrdinalType incx) const
00591 {
00592 OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00593 OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00594 ScalarType zero = ScalarTraits<ScalarType>::zero();
00595 bool BadArgument = false;
00596
00597 TEST_FOR_EXCEPTION(Teuchos::ScalarTraits<ScalarType>::isComplex && trans == CONJ_TRANS, std::logic_error,
00598 "Teuchos::BLAS::TRMV() does not currently support CONJ_TRANS for complex data types.");
00599
00600
00601 if( n == izero ){ return; }
00602
00603
00604 if( n < izero ) {
00605 std::cout << "BLAS::TRMV Error: N == " << n << std::endl;
00606 BadArgument = true;
00607 }
00608 if( lda < n ) {
00609 std::cout << "BLAS::TRMV Error: LDA < MAX(1,N)"<< std::endl;
00610 BadArgument = true;
00611 }
00612 if( incx == izero ) {
00613 std::cout << "BLAS::TRMV Error: INCX == 0"<< std::endl;
00614 BadArgument = true;
00615 }
00616
00617 if(!BadArgument) {
00618 OrdinalType i, j, ix, jx, kx = izero;
00619 ScalarType temp;
00620 bool NoUnit = (EDiagChar[diag] == 'N');
00621
00622
00623 if (incx < izero) { kx = (-n+ione)*incx; }
00624
00625
00626 if (ETranspChar[trans] == 'N') {
00627
00628 if (EUploChar[uplo] == 'U') {
00629
00630 if (incx == ione) {
00631 for (j=izero; j<n; j++) {
00632 if (x[j] != zero) {
00633 temp = x[j];
00634 for (i=izero; i<j; i++) {
00635 x[i] += temp*A[j*lda + i];
00636 }
00637 if (NoUnit)
00638 x[j] *= A[j*lda + j];
00639 }
00640 }
00641 } else {
00642 jx = kx;
00643 for (j=izero; j<n; j++) {
00644 if (x[jx] != zero) {
00645 temp = x[jx];
00646 ix = kx;
00647 for (i=izero; i<j; i++) {
00648 x[ix] += temp*A[j*lda + i];
00649 ix += incx;
00650 }
00651 if (NoUnit)
00652 x[jx] *= A[j*lda + j];
00653 }
00654 jx += incx;
00655 }
00656 }
00657 } else {
00658 if (incx == ione) {
00659 for (j=n-ione; j>-ione; j--) {
00660 if (x[j] != zero) {
00661 temp = x[j];
00662 for (i=n-ione; i>j; i--) {
00663 x[i] += temp*A[j*lda + i];
00664 }
00665 if (NoUnit)
00666 x[j] *= A[j*lda + j];
00667 }
00668 }
00669 } else {
00670 kx += (n-ione)*incx;
00671 jx = kx;
00672 for (j=n-ione; j>-ione; j--) {
00673 if (x[jx] != zero) {
00674 temp = x[jx];
00675 ix = kx;
00676 for (i=n-ione; i>j; i--) {
00677 x[ix] += temp*A[j*lda + i];
00678 ix -= incx;
00679 }
00680 if (NoUnit)
00681 x[jx] *= A[j*lda + j];
00682 }
00683 jx -= incx;
00684 }
00685 }
00686 }
00687 } else {
00688
00689 if (EUploChar[uplo]=='U') {
00690
00691 if (incx == ione) {
00692 for (j=n-ione; j>-ione; j--) {
00693 temp = x[j];
00694 if (NoUnit)
00695 temp *= A[j*lda + j];
00696 for (i=j-ione; i>-ione; i--) {
00697 temp += A[j*lda + i]*x[i];
00698 }
00699 x[j] = temp;
00700 }
00701 } else {
00702 jx = kx + (n-ione)*incx;
00703 for (j=n-ione; j>-ione; j--) {
00704 temp = x[jx];
00705 ix = jx;
00706 if (NoUnit)
00707 temp *= A[j*lda + j];
00708 for (i=j-ione; i>-ione; i--) {
00709 ix -= incx;
00710 temp += A[j*lda + i]*x[ix];
00711 }
00712 x[jx] = temp;
00713 jx -= incx;
00714 }
00715 }
00716 } else {
00717
00718 if (incx == ione) {
00719 for (j=izero; j<n; j++) {
00720 temp = x[j];
00721 if (NoUnit)
00722 temp *= A[j*lda + j];
00723 for (i=j+ione; i<n; i++) {
00724 temp += A[j*lda + i]*x[i];
00725 }
00726 x[j] = temp;
00727 }
00728 } else {
00729 jx = kx;
00730 for (j=izero; j<n; j++) {
00731 temp = x[jx];
00732 ix = jx;
00733 if (NoUnit)
00734 temp *= A[j*lda + j];
00735 for (i=j+ione; i<n; i++) {
00736 ix += incx;
00737 temp += A[j*lda + i]*x[ix];
00738 }
00739 x[jx] = temp;
00740 jx += incx;
00741 }
00742 }
00743 }
00744 }
00745 }
00746 }
00747
00748 template<typename OrdinalType, typename ScalarType>
00749 template <typename alpha_type, typename x_type, typename y_type>
00750 void DefaultBLASImpl<OrdinalType, ScalarType>::GER(const OrdinalType m, const OrdinalType n, const alpha_type alpha, const x_type* x, const OrdinalType incx, const y_type* y, const OrdinalType incy, ScalarType* A, const OrdinalType lda) const
00751 {
00752 OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00753 OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00754 alpha_type alpha_zero = ScalarTraits<alpha_type>::zero();
00755 y_type y_zero = ScalarTraits<y_type>::zero();
00756 bool BadArgument = false;
00757
00758 TEST_FOR_EXCEPTION(Teuchos::ScalarTraits<ScalarType>::isComplex, std::logic_error,
00759 "Teuchos::BLAS::GER() does not currently support complex data types.");
00760
00761
00762 if( m == izero || n == izero || alpha == alpha_zero ){ return; }
00763
00764
00765 if( m < izero ) {
00766 std::cout << "BLAS::GER Error: M == " << m << std::endl;
00767 BadArgument = true;
00768 }
00769 if( n < izero ) {
00770 std::cout << "BLAS::GER Error: N == " << n << std::endl;
00771 BadArgument = true;
00772 }
00773 if( lda < m ) {
00774 std::cout << "BLAS::GER Error: LDA < MAX(1,M)"<< std::endl;
00775 BadArgument = true;
00776 }
00777 if( incx == 0 ) {
00778 std::cout << "BLAS::GER Error: INCX == 0"<< std::endl;
00779 BadArgument = true;
00780 }
00781 if( incy == 0 ) {
00782 std::cout << "BLAS::GER Error: INCY == 0"<< std::endl;
00783 BadArgument = true;
00784 }
00785
00786 if(!BadArgument) {
00787 OrdinalType i, j, ix, jy = izero, kx = izero;
00788 ScalarType temp;
00789
00790
00791 if (incx < izero) { kx = (-m+ione)*incx; }
00792 if (incy < izero) { jy = (-n+ione)*incy; }
00793
00794
00795 if( incx == ione ) {
00796 for( j=izero; j<n; j++ ) {
00797 if ( y[jy] != y_zero ) {
00798 temp = alpha*y[jy];
00799 for ( i=izero; i<m; i++ ) {
00800 A[j*lda + i] += x[i]*temp;
00801 }
00802 }
00803 jy += incy;
00804 }
00805 }
00806 else {
00807 for( j=izero; j<n; j++ ) {
00808 if ( y[jy] != y_zero ) {
00809 temp = alpha*y[jy];
00810 ix = kx;
00811 for( i=izero; i<m; i++ ) {
00812 A[j*lda + i] += x[ix]*temp;
00813 ix += incx;
00814 }
00815 }
00816 jy += incy;
00817 }
00818 }
00819 }
00820 }
00821
00822
00823
00824
00825
00826 template<typename OrdinalType, typename ScalarType>
00827 template <typename alpha_type, typename A_type, typename B_type, typename beta_type>
00828 void DefaultBLASImpl<OrdinalType, ScalarType>::GEMM(ETransp transa, ETransp transb, const OrdinalType m, const OrdinalType n, const OrdinalType k, const alpha_type alpha, const A_type* A, const OrdinalType lda, const B_type* B, const OrdinalType ldb, const beta_type beta, ScalarType* C, const OrdinalType ldc) const
00829 {
00830
00831 typedef TypeNameTraits<OrdinalType> OTNT;
00832 typedef TypeNameTraits<ScalarType> STNT;
00833
00834 OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00835 alpha_type alpha_zero = ScalarTraits<alpha_type>::zero();
00836 beta_type beta_zero = ScalarTraits<beta_type>::zero();
00837 B_type B_zero = ScalarTraits<B_type>::zero();
00838 ScalarType C_zero = ScalarTraits<ScalarType>::zero();
00839 beta_type beta_one = ScalarTraits<beta_type>::one();
00840 OrdinalType i, j, p;
00841 OrdinalType NRowA = m, NRowB = k;
00842 ScalarType temp;
00843 bool BadArgument = false;
00844
00845 TEST_FOR_EXCEPTION(
00846 Teuchos::ScalarTraits<ScalarType>::isComplex
00847 && (transa == CONJ_TRANS || transb == CONJ_TRANS),
00848 std::logic_error,
00849 "Teuchos::BLAS<"<<OTNT::name()<<","<<STNT::name()<<">::GEMM()"
00850 " does not currently support CONJ_TRANS for complex data types."
00851 );
00852
00853
00854 if( !(ETranspChar[transa]=='N') ) {
00855 NRowA = k;
00856 }
00857 if( !(ETranspChar[transb]=='N') ) {
00858 NRowB = n;
00859 }
00860
00861
00862 if( (m==izero) || (n==izero) || (((alpha==alpha_zero)||(k==izero)) && (beta==beta_one)) ){ return; }
00863 if( m < izero ) {
00864 std::cout << "BLAS::GEMM Error: M == " << m << std::endl;
00865 BadArgument = true;
00866 }
00867 if( n < izero ) {
00868 std::cout << "BLAS::GEMM Error: N == " << n << std::endl;
00869 BadArgument = true;
00870 }
00871 if( k < izero ) {
00872 std::cout << "BLAS::GEMM Error: K == " << k << std::endl;
00873 BadArgument = true;
00874 }
00875 if( lda < NRowA ) {
00876 std::cout << "BLAS::GEMM Error: LDA < MAX(1,M)"<< std::endl;
00877 BadArgument = true;
00878 }
00879 if( ldb < NRowB ) {
00880 std::cout << "BLAS::GEMM Error: LDB < MAX(1,K)"<< std::endl;
00881 BadArgument = true;
00882 }
00883 if( ldc < m ) {
00884 std::cout << "BLAS::GEMM Error: LDC < MAX(1,M)"<< std::endl;
00885 BadArgument = true;
00886 }
00887
00888 if(!BadArgument) {
00889
00890
00891 if( alpha == alpha_zero ) {
00892 if( beta == beta_zero ) {
00893 for (j=izero; j<n; j++) {
00894 for (i=izero; i<m; i++) {
00895 C[j*ldc + i] = C_zero;
00896 }
00897 }
00898 } else {
00899 for (j=izero; j<n; j++) {
00900 for (i=izero; i<m; i++) {
00901 C[j*ldc + i] *= beta;
00902 }
00903 }
00904 }
00905 return;
00906 }
00907
00908
00909
00910 if ( ETranspChar[transb]=='N' ) {
00911 if ( ETranspChar[transa]=='N' ) {
00912
00913 for (j=izero; j<n; j++) {
00914 if( beta == beta_zero ) {
00915 for (i=izero; i<m; i++){
00916 C[j*ldc + i] = C_zero;
00917 }
00918 } else if( beta != beta_one ) {
00919 for (i=izero; i<m; i++){
00920 C[j*ldc + i] *= beta;
00921 }
00922 }
00923 for (p=izero; p<k; p++){
00924 if (B[j*ldb + p] != B_zero ){
00925 temp = alpha*B[j*ldb + p];
00926 for (i=izero; i<m; i++) {
00927 C[j*ldc + i] += temp*A[p*lda + i];
00928 }
00929 }
00930 }
00931 }
00932 } else {
00933
00934 for (j=izero; j<n; j++) {
00935 for (i=izero; i<m; i++) {
00936 temp = C_zero;
00937 for (p=izero; p<k; p++) {
00938 temp += A[i*lda + p]*B[j*ldb + p];
00939 }
00940 if (beta == beta_zero) {
00941 C[j*ldc + i] = alpha*temp;
00942 } else {
00943 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
00944 }
00945 }
00946 }
00947 }
00948 } else {
00949 if ( ETranspChar[transa]=='N' ) {
00950
00951 for (j=izero; j<n; j++) {
00952 if (beta == beta_zero) {
00953 for (i=izero; i<m; i++) {
00954 C[j*ldc + i] = C_zero;
00955 }
00956 } else if ( beta != beta_one ) {
00957 for (i=izero; i<m; i++) {
00958 C[j*ldc + i] *= beta;
00959 }
00960 }
00961 for (p=izero; p<k; p++) {
00962 if (B[p*ldb + j] != B_zero) {
00963 temp = alpha*B[p*ldb + j];
00964 for (i=izero; i<m; i++) {
00965 C[j*ldc + i] += temp*A[p*lda + i];
00966 }
00967 }
00968 }
00969 }
00970 } else {
00971
00972 for (j=izero; j<n; j++) {
00973 for (i=izero; i<m; i++) {
00974 temp = C_zero;
00975 for (p=izero; p<k; p++) {
00976 temp += A[i*lda + p]*B[p*ldb + j];
00977 }
00978 if (beta == beta_zero) {
00979 C[j*ldc + i] = alpha*temp;
00980 } else {
00981 C[j*ldc + i] = alpha*temp + beta*C[j*ldc + i];
00982 }
00983 }
00984 }
00985 }
00986 }
00987 }
00988 }
00989
00990
00991 template<typename OrdinalType, typename ScalarType>
00992 template <typename alpha_type, typename A_type, typename B_type, typename beta_type>
00993 void DefaultBLASImpl<OrdinalType, ScalarType>::SYMM(ESide side, EUplo uplo, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type* A, const OrdinalType lda, const B_type* B, const OrdinalType ldb, const beta_type beta, ScalarType* C, const OrdinalType ldc) const
00994 {
00995 OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
00996 OrdinalType ione = OrdinalTraits<OrdinalType>::one();
00997 alpha_type alpha_zero = ScalarTraits<alpha_type>::zero();
00998 beta_type beta_zero = ScalarTraits<beta_type>::zero();
00999 ScalarType C_zero = ScalarTraits<ScalarType>::zero();
01000 beta_type beta_one = ScalarTraits<beta_type>::one();
01001 OrdinalType i, j, k, NRowA = m;
01002 ScalarType temp1, temp2;
01003 bool BadArgument = false;
01004 bool Upper = (EUploChar[uplo] == 'U');
01005 if (ESideChar[side] == 'R') { NRowA = n; }
01006
01007 TEST_FOR_EXCEPTION(Teuchos::ScalarTraits<ScalarType>::isComplex, std::logic_error,
01008 "Teuchos::BLAS::SYMM() does not currently support complex data types.");
01009
01010
01011 if ( (m==izero) || (n==izero) || ( (alpha==alpha_zero)&&(beta==beta_one) ) ) { return; }
01012 if( m < 0 ) {
01013 std::cout << "BLAS::SYMM Error: M == "<< m << std::endl;
01014 BadArgument = true; }
01015 if( n < 0 ) {
01016 std::cout << "BLAS::SYMM Error: N == "<< n << std::endl;
01017 BadArgument = true; }
01018 if( lda < NRowA ) {
01019 std::cout << "BLAS::SYMM Error: LDA == "<<lda<<std::endl;
01020 BadArgument = true; }
01021 if( ldb < m ) {
01022 std::cout << "BLAS::SYMM Error: LDB == "<<ldb<<std::endl;
01023 BadArgument = true; }
01024 if( ldc < m ) {
01025 std::cout << "BLAS::SYMM Error: LDC == "<<ldc<<std::endl;
01026 BadArgument = true; }
01027
01028 if(!BadArgument) {
01029
01030
01031 if (alpha == alpha_zero) {
01032 if (beta == beta_zero ) {
01033 for (j=izero; j<n; j++) {
01034 for (i=izero; i<m; i++) {
01035 C[j*ldc + i] = C_zero;
01036 }
01037 }
01038 } else {
01039 for (j=izero; j<n; j++) {
01040 for (i=izero; i<m; i++) {
01041 C[j*ldc + i] *= beta;
01042 }
01043 }
01044 }
01045 return;
01046 }
01047
01048 if ( ESideChar[side] == 'L') {
01049
01050
01051 if (Upper) {
01052
01053 for (j=izero; j<n; j++) {
01054 for (i=izero; i<m; i++) {
01055 temp1 = alpha*B[j*ldb + i];
01056 temp2 = C_zero;
01057 for (k=izero; k<i; k++) {
01058 C[j*ldc + k] += temp1*A[i*lda + k];
01059 temp2 += B[j*ldb + k]*A[i*lda + k];
01060 }
01061 if (beta == beta_zero) {
01062 C[j*ldc + i] = temp1*A[i*lda + i] + alpha*temp2;
01063 } else {
01064 C[j*ldc + i] = beta*C[j*ldc + i] + temp1*A[i*lda + i] + alpha*temp2;
01065 }
01066 }
01067 }
01068 } else {
01069
01070 for (j=izero; j<n; j++) {
01071 for (i=m-ione; i>-ione; i--) {
01072 temp1 = alpha*B[j*ldb + i];
01073 temp2 = C_zero;
01074 for (k=i+ione; k<m; k++) {
01075 C[j*ldc + k] += temp1*A[i*lda + k];
01076 temp2 += B[j*ldb + k]*A[i*lda + k];
01077 }
01078 if (beta == beta_zero) {
01079 C[j*ldc + i] = temp1*A[i*lda + i] + alpha*temp2;
01080 } else {
01081 C[j*ldc + i] = beta*C[j*ldc + i] + temp1*A[i*lda + i] + alpha*temp2;
01082 }
01083 }
01084 }
01085 }
01086 } else {
01087
01088 for (j=izero; j<n; j++) {
01089 temp1 = alpha*A[j*lda + j];
01090 if (beta == beta_zero) {
01091 for (i=izero; i<m; i++) {
01092 C[j*ldc + i] = temp1*B[j*ldb + i];
01093 }
01094 } else {
01095 for (i=izero; i<m; i++) {
01096 C[j*ldc + i] = beta*C[j*ldc + i] + temp1*B[j*ldb + i];
01097 }
01098 }
01099 for (k=izero; k<j; k++) {
01100 if (Upper) {
01101 temp1 = alpha*A[j*lda + k];
01102 } else {
01103 temp1 = alpha*A[k*lda + j];
01104 }
01105 for (i=izero; i<m; i++) {
01106 C[j*ldc + i] += temp1*B[k*ldb + i];
01107 }
01108 }
01109 for (k=j+ione; k<n; k++) {
01110 if (Upper) {
01111 temp1 = alpha*A[k*lda + j];
01112 } else {
01113 temp1 = alpha*A[j*lda + k];
01114 }
01115 for (i=izero; i<m; i++) {
01116 C[j*ldc + i] += temp1*B[k*ldb + i];
01117 }
01118 }
01119 }
01120 }
01121 }
01122 }
01123
01124 template<typename OrdinalType, typename ScalarType>
01125 template <typename alpha_type, typename A_type>
01126 void DefaultBLASImpl<OrdinalType, ScalarType>::TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type* A, const OrdinalType lda, ScalarType* B, const OrdinalType ldb) const
01127 {
01128 OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
01129 OrdinalType ione = OrdinalTraits<OrdinalType>::one();
01130 alpha_type alpha_zero = ScalarTraits<alpha_type>::zero();
01131 A_type A_zero = ScalarTraits<A_type>::zero();
01132 ScalarType B_zero = ScalarTraits<ScalarType>::zero();
01133 ScalarType one = ScalarTraits<ScalarType>::one();
01134 OrdinalType i, j, k, NRowA = m;
01135 ScalarType temp;
01136 bool BadArgument = false;
01137 bool LSide = (ESideChar[side] == 'L');
01138 bool NoUnit = (EDiagChar[diag] == 'N');
01139 bool Upper = (EUploChar[uplo] == 'U');
01140
01141 TEST_FOR_EXCEPTION(Teuchos::ScalarTraits<ScalarType>::isComplex && transa == CONJ_TRANS, std::logic_error,
01142 "Teuchos::BLAS::TRMM() does not currently support CONJ_TRANS for complex data types.");
01143
01144 if(!LSide) { NRowA = n; }
01145
01146
01147 if (n==izero || m==izero) { return; }
01148 if( m < 0 ) {
01149 std::cout << "BLAS::TRMM Error: M == "<< m <<std::endl;
01150 BadArgument = true; }
01151 if( n < 0 ) {
01152 std::cout << "BLAS::TRMM Error: N == "<< n <<std::endl;
01153 BadArgument = true; }
01154 if( lda < NRowA ) {
01155 std::cout << "BLAS::TRMM Error: LDA == "<< lda << std::endl;
01156 BadArgument = true; }
01157 if( ldb < m ) {
01158 std::cout << "BLAS::TRMM Error: M == "<< ldb << std::endl;
01159 BadArgument = true; }
01160
01161 if(!BadArgument) {
01162
01163
01164 if( alpha == alpha_zero ) {
01165 for( j=izero; j<n; j++ ) {
01166 for( i=izero; i<m; i++ ) {
01167 B[j*ldb + i] = B_zero;
01168 }
01169 }
01170 return;
01171 }
01172
01173
01174 if ( LSide ) {
01175
01176
01177 if ( ETranspChar[transa]=='N' ) {
01178
01179
01180 if ( Upper ) {
01181
01182 for( j=izero; j<n; j++ ) {
01183 for( k=izero; k<m; k++) {
01184 if ( B[j*ldb + k] != B_zero ) {
01185 temp = alpha*B[j*ldb + k];
01186 for( i=izero; i<k; i++ ) {
01187 B[j*ldb + i] += temp*A[k*lda + i];
01188 }
01189 if ( NoUnit )
01190 temp *=A[k*lda + k];
01191 B[j*ldb + k] = temp;
01192 }
01193 }
01194 }
01195 } else {
01196
01197 for( j=izero; j<n; j++ ) {
01198 for( k=m-ione; k>-ione; k-- ) {
01199 if( B[j*ldb + k] != B_zero ) {
01200 temp = alpha*B[j*ldb + k];
01201 B[j*ldb + k] = temp;
01202 if ( NoUnit )
01203 B[j*ldb + k] *= A[k*lda + k];
01204 for( i=k+ione; i<m; i++ ) {
01205 B[j*ldb + i] += temp*A[k*lda + i];
01206 }
01207 }
01208 }
01209 }
01210 }
01211 } else {
01212
01213 if( Upper ) {
01214 for( j=izero; j<n; j++ ) {
01215 for( i=m-ione; i>-ione; i-- ) {
01216 temp = B[j*ldb + i];
01217 if( NoUnit )
01218 temp *= A[i*lda + i];
01219 for( k=izero; k<i; k++ ) {
01220 temp += A[i*lda + k]*B[j*ldb + k];
01221 }
01222 B[j*ldb + i] = alpha*temp;
01223 }
01224 }
01225 } else {
01226 for( j=izero; j<n; j++ ) {
01227 for( i=izero; i<m; i++ ) {
01228 temp = B[j*ldb + i];
01229 if( NoUnit )
01230 temp *= A[i*lda + i];
01231 for( k=i+ione; k<m; k++ ) {
01232 temp += A[i*lda + k]*B[j*ldb + k];
01233 }
01234 B[j*ldb + i] = alpha*temp;
01235 }
01236 }
01237 }
01238 }
01239 } else {
01240
01241
01242 if( ETranspChar[transa] == 'N' ) {
01243
01244
01245 if( Upper ) {
01246
01247 for( j=n-ione; j>-ione; j-- ) {
01248 temp = alpha;
01249 if( NoUnit )
01250 temp *= A[j*lda + j];
01251 for( i=izero; i<m; i++ ) {
01252 B[j*ldb + i] *= temp;
01253 }
01254 for( k=izero; k<j; k++ ) {
01255 if( A[j*lda + k] != A_zero ) {
01256 temp = alpha*A[j*lda + k];
01257 for( i=izero; i<m; i++ ) {
01258 B[j*ldb + i] += temp*B[k*ldb + i];
01259 }
01260 }
01261 }
01262 }
01263 } else {
01264
01265 for( j=izero; j<n; j++ ) {
01266 temp = alpha;
01267 if( NoUnit )
01268 temp *= A[j*lda + j];
01269 for( i=izero; i<m; i++ ) {
01270 B[j*ldb + i] *= temp;
01271 }
01272 for( k=j+ione; k<n; k++ ) {
01273 if( A[j*lda + k] != A_zero ) {
01274 temp = alpha*A[j*lda + k];
01275 for( i=izero; i<m; i++ ) {
01276 B[j*ldb + i] += temp*B[k*ldb + i];
01277 }
01278 }
01279 }
01280 }
01281 }
01282 } else {
01283
01284
01285 if( Upper ) {
01286 for( k=izero; k<n; k++ ) {
01287 for( j=izero; j<k; j++ ) {
01288 if( A[k*lda + j] != A_zero ) {
01289 temp = alpha*A[k*lda + j];
01290 for( i=izero; i<m; i++ ) {
01291 B[j*ldb + i] += temp*B[k*ldb + i];
01292 }
01293 }
01294 }
01295 temp = alpha;
01296 if( NoUnit )
01297 temp *= A[k*lda + k];
01298 if( temp != one ) {
01299 for( i=izero; i<m; i++ ) {
01300 B[k*ldb + i] *= temp;
01301 }
01302 }
01303 }
01304 } else {
01305 for( k=n-ione; k>-ione; k-- ) {
01306 for( j=k+ione; j<n; j++ ) {
01307 if( A[k*lda + j] != A_zero ) {
01308 temp = alpha*A[k*lda + j];
01309 for( i=izero; i<m; i++ ) {
01310 B[j*ldb + i] += temp*B[k*ldb + i];
01311 }
01312 }
01313 }
01314 temp = alpha;
01315 if( NoUnit )
01316 temp *= A[k*lda + k];
01317 if( temp != one ) {
01318 for( i=izero; i<m; i++ ) {
01319 B[k*ldb + i] *= temp;
01320 }
01321 }
01322 }
01323 }
01324 }
01325 }
01326 }
01327 }
01328
01329 template<typename OrdinalType, typename ScalarType>
01330 template <typename alpha_type, typename A_type>
01331 void DefaultBLASImpl<OrdinalType, ScalarType>::TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const OrdinalType m, const OrdinalType n, const alpha_type alpha, const A_type* A, const OrdinalType lda, ScalarType* B, const OrdinalType ldb) const
01332 {
01333 OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
01334 OrdinalType ione = OrdinalTraits<OrdinalType>::one();
01335 alpha_type alpha_zero = ScalarTraits<alpha_type>::zero();
01336 A_type A_zero = ScalarTraits<A_type>::zero();
01337 ScalarType B_zero = ScalarTraits<ScalarType>::zero();
01338 alpha_type alpha_one = ScalarTraits<alpha_type>::one();
01339 ScalarType B_one = ScalarTraits<ScalarType>::one();
01340 ScalarType temp;
01341 OrdinalType NRowA = m;
01342 bool BadArgument = false;
01343 bool NoUnit = (EDiagChar[diag]=='N');
01344
01345 TEST_FOR_EXCEPTION(Teuchos::ScalarTraits<ScalarType>::isComplex && transa == CONJ_TRANS, std::logic_error,
01346 "Teuchos::BLAS::TRSM() does not currently support CONJ_TRANS for complex data types.");
01347
01348 if (!(ESideChar[side] == 'L')) { NRowA = n; }
01349
01350
01351 if (n == izero || m == izero) { return; }
01352 if( m < izero ) {
01353 std::cout << "BLAS::TRSM Error: M == "<<m<<std::endl;
01354 BadArgument = true; }
01355 if( n < izero ) {
01356 std::cout << "BLAS::TRSM Error: N == "<<n<<std::endl;
01357 BadArgument = true; }
01358 if( lda < NRowA ) {
01359 std::cout << "BLAS::TRSM Error: LDA == "<<lda<<std::endl;
01360 BadArgument = true; }
01361 if( ldb < m ) {
01362 std::cout << "BLAS::TRSM Error: LDB == "<<ldb<<std::endl;
01363 BadArgument = true; }
01364
01365 if(!BadArgument)
01366 {
01367 int i, j, k;
01368
01369 if(alpha == alpha_zero) {
01370 for(j = izero; j < n; j++) {
01371 for( i = izero; i < m; i++) {
01372 B[j*ldb+i] = B_zero;
01373 }
01374 }
01375 }
01376 else
01377 {
01378 if(ESideChar[side] == 'L') {
01379
01380
01381
01382 if(ETranspChar[transa] == 'N') {
01383
01384
01385
01386 if(EUploChar[uplo] == 'U') {
01387
01388 for(j = izero; j < n; j++) {
01389
01390 if(alpha != alpha_one) {
01391 for( i = izero; i < m; i++) {
01392 B[j*ldb+i] *= alpha;
01393 }
01394 }
01395
01396 for(k = (m - ione); k > -ione; k--) {
01397
01398 if (B[j*ldb + k] != B_zero) {
01399 if (NoUnit) {
01400 B[j*ldb + k] /= A[k*lda + k];
01401 }
01402 for(i = izero; i < k; i++) {
01403 B[j*ldb + i] -= B[j*ldb + k] * A[k*lda + i];
01404 }
01405 }
01406 }
01407 }
01408 }
01409 else
01410 {
01411 for(j = izero; j < n; j++) {
01412
01413 if(alpha != alpha_one) {
01414 for( i = izero; i < m; i++) {
01415 B[j*ldb+i] *= alpha;
01416 }
01417 }
01418
01419 for(k = izero; k < m; k++) {
01420
01421 if (B[j*ldb + k] != B_zero) {
01422 if (NoUnit) {
01423 B[j*ldb + k] /= A[k*lda + k];
01424 }
01425 for(i = k+ione; i < m; i++) {
01426 B[j*ldb + i] -= B[j*ldb + k] * A[k*lda + i];
01427 }
01428 }
01429 }
01430 }
01431 }
01432 }
01433 else {
01434
01435
01436
01437 if(EUploChar[uplo] == 'U') {
01438
01439 for(j = izero; j < n; j++) {
01440 for( i = izero; i < m; i++) {
01441 temp = alpha*B[j*ldb+i];
01442 for(k = izero; k < i; k++) {
01443 temp -= A[i*lda + k] * B[j*ldb + k];
01444 }
01445 if (NoUnit) {
01446 temp /= A[i*lda + i];
01447 }
01448 B[j*ldb + i] = temp;
01449 }
01450 }
01451 }
01452 else
01453 {
01454 for(j = izero; j < n; j++) {
01455 for(i = (m - ione) ; i > -ione; i--) {
01456 temp = alpha*B[j*ldb+i];
01457 for(k = i+ione; k < m; k++) {
01458 temp -= A[i*lda + k] * B[j*ldb + k];
01459 }
01460 if (NoUnit) {
01461 temp /= A[i*lda + i];
01462 }
01463 B[j*ldb + i] = temp;
01464 }
01465 }
01466 }
01467 }
01468 }
01469 else {
01470
01471
01472
01473
01474 if (ETranspChar[transa] == 'N') {
01475
01476
01477
01478 if(EUploChar[uplo] == 'U') {
01479
01480
01481 for(j = izero; j < n; j++) {
01482
01483 if(alpha != alpha_one) {
01484 for( i = izero; i < m; i++) {
01485 B[j*ldb+i] *= alpha;
01486 }
01487 }
01488 for(k = izero; k < j; k++) {
01489
01490 if (A[j*lda + k] != A_zero) {
01491 for(i = izero; i < m; i++) {
01492 B[j*ldb + i] -= A[j*lda + k] * B[k*ldb + i];
01493 }
01494 }
01495 }
01496 if (NoUnit) {
01497 temp = B_one/A[j*lda + j];
01498 for(i = izero; i < m; i++) {
01499 B[j*ldb + i] *= temp;
01500 }
01501 }
01502 }
01503 }
01504 else
01505 {
01506 for(j = (n - ione); j > -ione; j--) {
01507
01508 if(alpha != alpha_one) {
01509 for( i = izero; i < m; i++) {
01510 B[j*ldb+i] *= alpha;
01511 }
01512 }
01513
01514 for(k = j+ione; k < n; k++) {
01515
01516 if (A[j*lda + k] != A_zero) {
01517 for(i = izero; i < m; i++) {
01518 B[j*ldb + i] -= A[j*lda + k] * B[k*ldb + i];
01519 }
01520 }
01521 }
01522 if (NoUnit) {
01523 temp = B_one/A[j*lda + j];
01524 for(i = izero; i < m; i++) {
01525 B[j*ldb + i] *= temp;
01526 }
01527 }
01528 }
01529 }
01530 }
01531 else {
01532
01533
01534
01535 if(EUploChar[uplo] == 'U') {
01536
01537 for(k = (n - ione); k > -ione; k--) {
01538 if (NoUnit) {
01539 temp = B_one/A[k*lda + k];
01540 for(i = izero; i < m; i++) {
01541 B[k*ldb + i] *= temp;
01542 }
01543 }
01544 for(j = izero; j < k; j++) {
01545 if (A[k*lda + j] != A_zero) {
01546 temp = A[k*lda + j];
01547 for(i = izero; i < m; i++) {
01548 B[j*ldb + i] -= temp*B[k*ldb + i];
01549 }
01550 }
01551 }
01552 if (alpha != alpha_one) {
01553 for (i = izero; i < m; i++) {
01554 B[k*ldb + i] *= alpha;
01555 }
01556 }
01557 }
01558 }
01559 else
01560 {
01561 for(k = izero; k < n; k++) {
01562 if (NoUnit) {
01563 temp = B_one/A[k*lda + k];
01564 for (i = izero; i < m; i++) {
01565 B[k*ldb + i] *= temp;
01566 }
01567 }
01568 for(j = k+ione; j < n; j++) {
01569 if(A[k*lda + j] != A_zero) {
01570 temp = A[k*lda + j];
01571 for(i = izero; i < m; i++) {
01572 B[j*ldb + i] -= temp*B[k*ldb + i];
01573 }
01574 }
01575 }
01576 if (alpha != alpha_one) {
01577 for (i = izero; i < m; i++) {
01578 B[k*ldb + i] *= alpha;
01579 }
01580 }
01581 }
01582 }
01583 }
01584 }
01585 }
01586 }
01587 }
01588
01589
01590
01591 template <>
01592 class BLAS<int, float>
01593 {
01594 public:
01595 inline BLAS(void) {}
01596 inline BLAS(const BLAS<int, float>& ) {}
01597 inline virtual ~BLAS(void) {}
01598 void ROTG(float* da, float* db, float* c, float* s) const;
01599 void ROT(const int n, float* dx, const int incx, float* dy, const int incy, float* c, float* s) const;
01600 float ASUM(const int n, const float* x, const int incx) const;
01601 void AXPY(const int n, const float alpha, const float* x, const int incx, float* y, const int incy) const;
01602 void COPY(const int n, const float* x, const int incx, float* y, const int incy) const;
01603 float DOT(const int n, const float* x, const int incx, const float* y, const int incy) const;
01604 float NRM2(const int n, const float* x, const int incx) const;
01605 void SCAL(const int n, const float alpha, float* x, const int incx) const;
01606 int IAMAX(const int n, const float* x, const int incx) const;
01607 void GEMV(ETransp trans, const int m, const int n, const float alpha, const float* A, const int lda, const float* x, const int incx, const float beta, float* y, const int incy) const;
01608 void TRMV(EUplo uplo, ETransp trans, EDiag diag, const int n, const float* A, const int lda, float* x, const int incx) const;
01609 void GER(const int m, const int n, const float alpha, const float* x, const int incx, const float* y, const int incy, float* A, const int lda) const;
01610 void GEMM(ETransp transa, ETransp transb, const int m, const int n, const int k, const float alpha, const float* A, const int lda, const float* B, const int ldb, const float beta, float* C, const int ldc) const;
01611 void SYMM(ESide side, EUplo uplo, const int m, const int n, const float alpha, const float* A, const int lda, const float *B, const int ldb, const float beta, float *C, const int ldc) const;
01612 void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const float alpha, const float* A, const int lda, float* B, const int ldb) const;
01613 void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const float alpha, const float* A, const int lda, float* B, const int ldb) const;
01614 };
01615
01616
01617
01618 template<>
01619 class BLAS<int, double>
01620 {
01621 public:
01622 inline BLAS(void) {}
01623 inline BLAS(const BLAS<int, double>& ) {}
01624 inline virtual ~BLAS(void) {}
01625 void ROTG(double* da, double* db, double* c, double* s) const;
01626 void ROT(const int n, double* dx, const int incx, double* dy, const int incy, double* c, double* s) const;
01627 double ASUM(const int n, const double* x, const int incx) const;
01628 void AXPY(const int n, const double alpha, const double* x, const int incx, double* y, const int incy) const;
01629 void COPY(const int n, const double* x, const int incx, double* y, const int incy) const;
01630 double DOT(const int n, const double* x, const int incx, const double* y, const int incy) const;
01631 double NRM2(const int n, const double* x, const int incx) const;
01632 void SCAL(const int n, const double alpha, double* x, const int incx) const;
01633 int IAMAX(const int n, const double* x, const int incx) const;
01634 void GEMV(ETransp trans, const int m, const int n, const double alpha, const double* A, const int lda, const double* x, const int incx, const double beta, double* y, const int incy) const;
01635 void TRMV(EUplo uplo, ETransp trans, EDiag diag, const int n, const double* A, const int lda, double* x, const int incx) const;
01636 void GER(const int m, const int n, const double alpha, const double* x, const int incx, const double* y, const int incy, double* A, const int lda) const;
01637 void GEMM(ETransp transa, ETransp transb, const int m, const int n, const int k, const double alpha, const double* A, const int lda, const double* B, const int ldb, const double beta, double* C, const int ldc) const;
01638 void SYMM(ESide side, EUplo uplo, const int m, const int n, const double alpha, const double* A, const int lda, const double *B, const int ldb, const double beta, double *C, const int ldc) const;
01639 void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const double alpha, const double* A, const int lda, double* B, const int ldb) const;
01640 void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const double alpha, const double* A, const int lda, double* B, const int ldb) const;
01641 };
01642
01643
01644
01645 template<>
01646 class BLAS<int, std::complex<float> >
01647 {
01648 public:
01649 inline BLAS(void) {}
01650 inline BLAS(const BLAS<int, std::complex<float> >& ) {}
01651 inline virtual ~BLAS(void) {}
01652 void ROTG(std::complex<float>* da, std::complex<float>* db, float* c, std::complex<float>* s) const;
01653 void ROT(const int n, std::complex<float>* dx, const int incx, std::complex<float>* dy, const int incy, float* c, std::complex<float>* s) const;
01654 float ASUM(const int n, const std::complex<float>* x, const int incx) const;
01655 void AXPY(const int n, const std::complex<float> alpha, const std::complex<float>* x, const int incx, std::complex<float>* y, const int incy) const;
01656 void COPY(const int n, const std::complex<float>* x, const int incx, std::complex<float>* y, const int incy) const;
01657 std::complex<float> DOT(const int n, const std::complex<float>* x, const int incx, const std::complex<float>* y, const int incy) const;
01658 float NRM2(const int n, const std::complex<float>* x, const int incx) const;
01659 void SCAL(const int n, const std::complex<float> alpha, std::complex<float>* x, const int incx) const;
01660 int IAMAX(const int n, const std::complex<float>* x, const int incx) const;
01661 void GEMV(ETransp trans, const int m, const int n, const std::complex<float> alpha, const std::complex<float>* A, const int lda, const std::complex<float>* x, const int incx, const std::complex<float> beta, std::complex<float>* y, const int incy) const;
01662 void TRMV(EUplo uplo, ETransp trans, EDiag diag, const int n, const std::complex<float>* A, const int lda, std::complex<float>* x, const int incx) const;
01663 void GER(const int m, const int n, const std::complex<float> alpha, const std::complex<float>* x, const int incx, const std::complex<float>* y, const int incy, std::complex<float>* A, const int lda) const;
01664 void GEMM(ETransp transa, ETransp transb, const int m, const int n, const int k, const std::complex<float> alpha, const std::complex<float>* A, const int lda, const std::complex<float>* B, const int ldb, const std::complex<float> beta, std::complex<float>* C, const int ldc) const;
01665 void SYMM(ESide side, EUplo uplo, const int m, const int n, const std::complex<float> alpha, const std::complex<float>* A, const int lda, const std::complex<float> *B, const int ldb, const std::complex<float> beta, std::complex<float> *C, const int ldc) const;
01666 void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const std::complex<float> alpha, const std::complex<float>* A, const int lda, std::complex<float>* B, const int ldb) const;
01667 void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const std::complex<float> alpha, const std::complex<float>* A, const int lda, std::complex<float>* B, const int ldb) const;
01668 };
01669
01670
01671
01672 template<>
01673 class BLAS<int, std::complex<double> >
01674 {
01675 public:
01676 inline BLAS(void) {}
01677 inline BLAS(const BLAS<int, std::complex<double> >& ) {}
01678 inline virtual ~BLAS(void) {}
01679 void ROTG(std::complex<double>* da, std::complex<double>* db, double* c, std::complex<double>* s) const;
01680 void ROT(const int n, std::complex<double>* dx, const int incx, std::complex<double>* dy, const int incy, double* c, std::complex<double>* s) const;
01681 double ASUM(const int n, const std::complex<double>* x, const int incx) const;
01682 void AXPY(const int n, const std::complex<double> alpha, const std::complex<double>* x, const int incx, std::complex<double>* y, const int incy) const;
01683 void COPY(const int n, const std::complex<double>* x, const int incx, std::complex<double>* y, const int incy) const;
01684 std::complex<double> DOT(const int n, const std::complex<double>* x, const int incx, const std::complex<double>* y, const int incy) const;
01685 double NRM2(const int n, const std::complex<double>* x, const int incx) const;
01686 void SCAL(const int n, const std::complex<double> alpha, std::complex<double>* x, const int incx) const;
01687 int IAMAX(const int n, const std::complex<double>* x, const int incx) const;
01688 void GEMV(ETransp trans, const int m, const int n, const std::complex<double> alpha, const std::complex<double>* A, const int lda, const std::complex<double>* x, const int incx, const std::complex<double> beta, std::complex<double>* y, const int incy) const;
01689 void TRMV(EUplo uplo, ETransp trans, EDiag diag, const int n, const std::complex<double>* A, const int lda, std::complex<double>* x, const int incx) const;
01690 void GER(const int m, const int n, const std::complex<double> alpha, const std::complex<double>* x, const int incx, const std::complex<double>* y, const int incy, std::complex<double>* A, const int lda) const;
01691 void GEMM(ETransp transa, ETransp transb, const int m, const int n, const int k, const std::complex<double> alpha, const std::complex<double>* A, const int lda, const std::complex<double>* B, const int ldb, const std::complex<double> beta, std::complex<double>* C, const int ldc) const;
01692 void SYMM(ESide side, EUplo uplo, const int m, const int n, const std::complex<double> alpha, const std::complex<double>* A, const int lda, const std::complex<double> *B, const int ldb, const std::complex<double> beta, std::complex<double> *C, const int ldc) const;
01693 void TRMM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const std::complex<double> alpha, const std::complex<double>* A, const int lda, std::complex<double>* B, const int ldb) const;
01694 void TRSM(ESide side, EUplo uplo, ETransp transa, EDiag diag, const int m, const int n, const std::complex<double> alpha, const std::complex<double>* A, const int lda, std::complex<double>* B, const int ldb) const;
01695 };
01696
01697 }
01698
01699 #endif // _TEUCHOS_BLAS_HPP_