Sacado Package Browser (Single Doxygen Collection) Version of the Day
FadBLASUnitTests.hpp
Go to the documentation of this file.
00001 // $Id$ 
00002 // $Source$ 
00003 // @HEADER
00004 // ***********************************************************************
00005 // 
00006 //                           Sacado Package
00007 //                 Copyright (2006) Sandia Corporation
00008 // 
00009 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00010 // the U.S. Government retains certain rights in this software.
00011 // 
00012 // This library is free software; you can redistribute it and/or modify
00013 // it under the terms of the GNU Lesser General Public License as
00014 // published by the Free Software Foundation; either version 2.1 of the
00015 // License, or (at your option) any later version.
00016 //  
00017 // This library is distributed in the hope that it will be useful, but
00018 // WITHOUT ANY WARRANTY; without even the implied warranty of
00019 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00020 // Lesser General Public License for more details.
00021 //  
00022 // You should have received a copy of the GNU Lesser General Public
00023 // License along with this library; if not, write to the Free Software
00024 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00025 // USA
00026 // Questions? Contact David M. Gay (dmgay@sandia.gov) or Eric T. Phipps
00027 // (etphipp@sandia.gov).
00028 // 
00029 // ***********************************************************************
00030 // @HEADER
00031 
00032 #ifndef FADBLASUNITTESTS_HPP
00033 #define FADBLASUNITTESTS_HPP
00034 
00035 // Sacado includes
00036 #include "Sacado.hpp"
00037 #include "Sacado_Fad_BLAS.hpp"
00038 #include "Sacado_Random.hpp"
00039 
00040 // Cppunit includes
00041 #include <cppunit/extensions/HelperMacros.h>
00042 
00043 #define COMPARE_VALUES(a, b) \
00044   CPPUNIT_ASSERT( std::abs(a-b) < this->tol_a + this->tol_r*std::abs(a) );
00045 
00046 #define COMPARE_FADS(a, b)                              \
00047 CPPUNIT_ASSERT(a.size() == b.size());     \
00048 CPPUNIT_ASSERT(a.hasFastAccess() == b.hasFastAccess()); \
00049 COMPARE_VALUES(a.val(), b.val());     \
00050 for (int k=0; k<a.size(); k++) {      \
00051   COMPARE_VALUES(a.dx(k), b.dx(k));     \
00052   COMPARE_VALUES(a.fastAccessDx(k), b.fastAccessDx(k)); \
00053  }              \
00054  ;
00055 
00056 #define COMPARE_FAD_VECTORS(X1, X2, n)    \
00057   CPPUNIT_ASSERT(X1.size() == std::size_t(n));  \
00058   CPPUNIT_ASSERT(X2.size() == std::size_t(n));  \
00059   for (unsigned int i=0; i<n; i++) {    \
00060     COMPARE_FADS(X1[i], X2[i]);     \
00061   }           \
00062   ;
00063 
00064 // A class for testing differentiated BLAS operations for general Fad types
00065 template <class FadType, class ScalarType>
00066 class FadBLASUnitTests : public CppUnit::TestFixture {
00067 
00068   typedef Sacado::Fad::Vector<unsigned int,FadType> VectorType;
00069 
00070   CPPUNIT_TEST_SUITE( FadBLASUnitTests );
00071 
00072   CPPUNIT_TEST(testSCAL1);
00073   CPPUNIT_TEST(testSCAL2);
00074   CPPUNIT_TEST(testSCAL3);
00075   CPPUNIT_TEST(testSCAL4);
00076 
00077   CPPUNIT_TEST(testCOPY1);
00078   CPPUNIT_TEST(testCOPY2);
00079   CPPUNIT_TEST(testCOPY3);
00080   CPPUNIT_TEST(testCOPY4);
00081 
00082   CPPUNIT_TEST(testAXPY1);
00083   CPPUNIT_TEST(testAXPY2);
00084   CPPUNIT_TEST(testAXPY3);
00085   CPPUNIT_TEST(testAXPY4);
00086 
00087   CPPUNIT_TEST(testDOT1);
00088   CPPUNIT_TEST(testDOT2);
00089   CPPUNIT_TEST(testDOT3);
00090   CPPUNIT_TEST(testDOT4);
00091 
00092   CPPUNIT_TEST(testNRM21);
00093   CPPUNIT_TEST(testNRM22);
00094   
00095   CPPUNIT_TEST(testGEMV1);
00096   CPPUNIT_TEST(testGEMV2);
00097   CPPUNIT_TEST(testGEMV3);
00098   CPPUNIT_TEST(testGEMV4);
00099   CPPUNIT_TEST(testGEMV5);
00100   CPPUNIT_TEST(testGEMV6);
00101   CPPUNIT_TEST(testGEMV7);
00102   CPPUNIT_TEST(testGEMV8);
00103   CPPUNIT_TEST(testGEMV9);
00104 
00105   CPPUNIT_TEST(testTRMV1);
00106   CPPUNIT_TEST(testTRMV2);
00107   CPPUNIT_TEST(testTRMV3);
00108   CPPUNIT_TEST(testTRMV4);
00109 
00110   CPPUNIT_TEST(testGER1);
00111   CPPUNIT_TEST(testGER2);
00112   CPPUNIT_TEST(testGER3);
00113   CPPUNIT_TEST(testGER4);
00114   CPPUNIT_TEST(testGER5);
00115   CPPUNIT_TEST(testGER6);
00116   CPPUNIT_TEST(testGER7);
00117 
00118   CPPUNIT_TEST(testGEMM1);
00119   CPPUNIT_TEST(testGEMM2);
00120   CPPUNIT_TEST(testGEMM3);
00121   CPPUNIT_TEST(testGEMM4);
00122   CPPUNIT_TEST(testGEMM5);
00123   CPPUNIT_TEST(testGEMM6);
00124   CPPUNIT_TEST(testGEMM7);
00125   CPPUNIT_TEST(testGEMM8);
00126   CPPUNIT_TEST(testGEMM9);
00127   CPPUNIT_TEST(testGEMM10);
00128 
00129   CPPUNIT_TEST(testSYMM1);
00130   CPPUNIT_TEST(testSYMM2);
00131   CPPUNIT_TEST(testSYMM3);
00132   CPPUNIT_TEST(testSYMM4);
00133   CPPUNIT_TEST(testSYMM5);
00134   CPPUNIT_TEST(testSYMM6);
00135   CPPUNIT_TEST(testSYMM7);
00136   CPPUNIT_TEST(testSYMM8);
00137   CPPUNIT_TEST(testSYMM9);
00138 
00139   CPPUNIT_TEST(testTRMM1);
00140   CPPUNIT_TEST(testTRMM2);
00141   CPPUNIT_TEST(testTRMM3);
00142   CPPUNIT_TEST(testTRMM4);
00143   CPPUNIT_TEST(testTRMM5);
00144   CPPUNIT_TEST(testTRMM6);
00145   CPPUNIT_TEST(testTRMM7);
00146 
00147   CPPUNIT_TEST(testTRSM1);
00148   CPPUNIT_TEST(testTRSM2);
00149   CPPUNIT_TEST(testTRSM3);
00150   CPPUNIT_TEST(testTRSM4);
00151   CPPUNIT_TEST(testTRSM5);
00152   CPPUNIT_TEST(testTRSM6);
00153   CPPUNIT_TEST(testTRSM7);
00154 
00155   CPPUNIT_TEST_SUITE_END();
00156 
00157 public:
00158 
00159   FadBLASUnitTests();
00160 
00161   FadBLASUnitTests(int m, int n, int l, int ndot, 
00162        double absolute_tolerance, double relative_tolerance);
00163 
00164   void setUp();
00165   void tearDown();
00166 
00167   void testSCAL1();
00168   void testSCAL2();
00169   void testSCAL3();
00170   void testSCAL4();
00171 
00172   void testCOPY1();
00173   void testCOPY2();
00174   void testCOPY3();
00175   void testCOPY4();
00176 
00177   void testAXPY1();
00178   void testAXPY2();
00179   void testAXPY3();
00180   void testAXPY4();
00181 
00182   void testDOT1();
00183   void testDOT2();
00184   void testDOT3();
00185   void testDOT4();
00186 
00187   void testNRM21();
00188   void testNRM22();
00189 
00190   void testGEMV1();
00191   void testGEMV2();
00192   void testGEMV3();
00193   void testGEMV4();
00194   void testGEMV5();
00195   void testGEMV6();
00196   void testGEMV7();
00197   void testGEMV8();
00198   void testGEMV9();
00199 
00200   void testTRMV1();
00201   void testTRMV2();
00202   void testTRMV3();
00203   void testTRMV4();
00204 
00205   void testGER1();
00206   void testGER2();
00207   void testGER3();
00208   void testGER4();
00209   void testGER5();
00210   void testGER6();
00211   void testGER7();
00212 
00213   void testGEMM1();
00214   void testGEMM2();
00215   void testGEMM3();
00216   void testGEMM4();
00217   void testGEMM5();
00218   void testGEMM6();
00219   void testGEMM7();
00220   void testGEMM8();
00221   void testGEMM9();
00222   void testGEMM10();
00223 
00224   void testSYMM1();
00225   void testSYMM2();
00226   void testSYMM3();
00227   void testSYMM4();
00228   void testSYMM5();
00229   void testSYMM6();
00230   void testSYMM7();
00231   void testSYMM8();
00232   void testSYMM9();
00233 
00234   void testTRMM1();
00235   void testTRMM2();
00236   void testTRMM3();
00237   void testTRMM4();
00238   void testTRMM5();
00239   void testTRMM6();
00240   void testTRMM7();
00241 
00242   void testTRSM1();
00243   void testTRSM2();
00244   void testTRSM3();
00245   void testTRSM4();
00246   void testTRSM5();
00247   void testTRSM6();
00248   void testTRSM7();
00249 
00250 protected:
00251 
00252   // Random number generator
00253   Sacado::Random<ScalarType> urand;
00254 
00255   // Real random number generator for derivative components
00256   Sacado::Random<double> real_urand;
00257 
00258   // Number of matrix rows
00259   unsigned int m;
00260 
00261   // Number of matrix columns
00262   unsigned int n;
00263 
00264   // Number of matrix columns for level 3 blas
00265   unsigned int l;
00266 
00267   // Number of derivative components
00268   unsigned int ndot;
00269 
00270   // Tolerances to which fad objects should be the same
00271   double tol_a, tol_r;
00272 
00273 }; // class FadBLASUnitTests
00274 
00275 template <class FadType, class ScalarType>
00276 FadBLASUnitTests<FadType,ScalarType>::
00277 FadBLASUnitTests() :
00278   urand(), real_urand(), m(5), n(6), l(4), ndot(7), tol_a(1.0e-11), tol_r(1.0e-11) {}
00279 
00280 template <class FadType, class ScalarType>
00281 FadBLASUnitTests<FadType,ScalarType>::
00282 FadBLASUnitTests(int m_, int n_, int l_, int ndot_, double absolute_tolerance, 
00283      double relative_tolerance) :
00284   urand(),
00285   real_urand(),
00286   m(m_),
00287   n(n_),
00288   l(l_),
00289   ndot(ndot_), 
00290   tol_a(absolute_tolerance), 
00291   tol_r(relative_tolerance) {}
00292 
00293 template <class FadType, class ScalarType>
00294 void FadBLASUnitTests<FadType,ScalarType>::
00295 setUp() {}
00296 
00297 template <class FadType, class ScalarType>
00298 void FadBLASUnitTests<FadType,ScalarType>::
00299 tearDown() {}
00300 
00301 // Tests all arguments
00302 template <class FadType, class ScalarType>
00303 void
00304 FadBLASUnitTests<FadType,ScalarType>::
00305 testSCAL1() {
00306   VectorType x1(m,ndot), x2(m,ndot), x3(m,ndot);
00307   for (unsigned int i=0; i<m; i++) {
00308     ScalarType val = urand.number();
00309     x1[i] = FadType(ndot, val);
00310     x2[i] = FadType(ndot, val);
00311     x3[i] = FadType(ndot, val);
00312     for (unsigned int k=0; k<ndot; k++) {
00313       val = urand.number();
00314       x1[i].fastAccessDx(k) = val;
00315       x2[i].fastAccessDx(k) = val;
00316       x3[i].fastAccessDx(k) = val;
00317     }
00318   }
00319   FadType alpha(ndot, urand.number());
00320   for (unsigned int k=0; k<ndot; k++) {
00321     alpha.fastAccessDx(k) = urand.number();
00322   }
00323   
00324   Teuchos::BLAS<int,FadType> teuchos_blas;
00325   teuchos_blas.SCAL(m, alpha, &x1[0], 1);
00326 
00327   Teuchos::BLAS<int,FadType> sacado_blas(false);
00328   sacado_blas.SCAL(m, alpha, &x2[0], 1);
00329 
00330   COMPARE_FAD_VECTORS(x1, x2, m);
00331 
00332   unsigned int sz = m*(1+ndot);
00333   Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
00334   sacado_blas2.SCAL(m, alpha, &x3[0], 1);
00335 
00336   COMPARE_FAD_VECTORS(x1, x3, m);
00337 }
00338 
00339 // Tests non-unit inc
00340 template <class FadType, class ScalarType>
00341 void
00342 FadBLASUnitTests<FadType,ScalarType>::
00343 testSCAL2() {
00344   unsigned int incx = 2;
00345   VectorType x1(m*incx,ndot), x2(m*incx,ndot), x3(m*incx,ndot);
00346   for (unsigned int i=0; i<m*incx; i++) {
00347     ScalarType val = urand.number();
00348     x1[i] = FadType(ndot, val);
00349     x2[i] = FadType(ndot, val);
00350     x3[i] = FadType(ndot, val);
00351     for (unsigned int k=0; k<ndot; k++) {
00352       val = urand.number();
00353       x1[i].fastAccessDx(k) = val;
00354       x2[i].fastAccessDx(k) = val;
00355       x3[i].fastAccessDx(k) = val;
00356     }
00357   }
00358   FadType alpha(ndot, urand.number());
00359   for (unsigned int k=0; k<ndot; k++) {
00360     alpha.fastAccessDx(k) = urand.number();
00361   }
00362   
00363   Teuchos::BLAS<int,FadType> teuchos_blas;
00364   teuchos_blas.SCAL(m, alpha, &x1[0], incx);
00365 
00366   Teuchos::BLAS<int,FadType> sacado_blas(false);
00367   sacado_blas.SCAL(m, alpha, &x2[0], incx);
00368 
00369   COMPARE_FAD_VECTORS(x1, x2, m*incx);
00370 
00371   unsigned int sz = m*(1+ndot);
00372   Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
00373   sacado_blas2.SCAL(m, alpha, &x3[0], incx);
00374 
00375   COMPARE_FAD_VECTORS(x1, x3, m*incx);
00376 }
00377 
00378 // Tests constant alpha
00379 template <class FadType, class ScalarType>
00380 void
00381 FadBLASUnitTests<FadType,ScalarType>::
00382 testSCAL3() {
00383   VectorType x1(m,ndot), x2(m,ndot), x3(m,ndot);
00384   for (unsigned int i=0; i<m; i++) {
00385     ScalarType val = urand.number();
00386     x1[i] = FadType(ndot, val);
00387     x2[i] = FadType(ndot, val);
00388     x3[i] = FadType(ndot, val);
00389     for (unsigned int k=0; k<ndot; k++) {
00390       val = urand.number();
00391       x1[i].fastAccessDx(k) = val;
00392       x2[i].fastAccessDx(k) = val;
00393       x3[i].fastAccessDx(k) = val;
00394     }
00395   }
00396   ScalarType alpha = urand.number();
00397 
00398   Teuchos::BLAS<int,FadType> teuchos_blas;
00399   teuchos_blas.SCAL(m, alpha, &x1[0], 1);
00400 
00401   Teuchos::BLAS<int,FadType> sacado_blas(false);
00402   sacado_blas.SCAL(m, alpha, &x2[0], 1);
00403 
00404   COMPARE_FAD_VECTORS(x1, x2, m);
00405 
00406   unsigned int sz = m*(1+ndot);
00407   Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
00408   sacado_blas2.SCAL(m, alpha, &x3[0], 1);
00409 
00410   COMPARE_FAD_VECTORS(x1, x3, m);
00411 }
00412 
00413 // Tests constant x
00414 template <class FadType, class ScalarType>
00415 void
00416 FadBLASUnitTests<FadType,ScalarType>::
00417 testSCAL4() {
00418   VectorType x1(m,ndot), x2(m,ndot), x3(m,ndot);
00419   for (unsigned int i=0; i<m; i++) {
00420     ScalarType val = urand.number();
00421     x1[i] = val;
00422     x2[i] = val;
00423     x3[i] = val;
00424   }
00425   FadType alpha = FadType(ndot, urand.number());
00426   for (unsigned int k=0; k<ndot; k++)
00427     alpha.fastAccessDx(k) = urand.number();
00428 
00429   Teuchos::BLAS<int,FadType> teuchos_blas;
00430   teuchos_blas.SCAL(m, alpha, &x1[0], 1);
00431 
00432   Teuchos::BLAS<int,FadType> sacado_blas(false);
00433   sacado_blas.SCAL(m, alpha, &x2[0], 1);
00434 
00435   COMPARE_FAD_VECTORS(x1, x2, m);
00436 
00437   unsigned int sz = m*(1+ndot);
00438   Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
00439   sacado_blas2.SCAL(m, alpha, &x3[0], 1);
00440 
00441   COMPARE_FAD_VECTORS(x1, x3, m);
00442 }
00443 
00444 // Tests all arguments
00445 template <class FadType, class ScalarType>
00446 void
00447 FadBLASUnitTests<FadType,ScalarType>::
00448 testCOPY1() {
00449   VectorType x(m,ndot), y1(m,ndot), y2(m,ndot), y3(m,ndot);
00450   for (unsigned int i=0; i<m; i++) {
00451     x[i] = FadType(ndot, urand.number());
00452     ScalarType val = urand.number();
00453     y1[i] = FadType(ndot, val);
00454     y2[i] = FadType(ndot, val);
00455     y3[i] = FadType(ndot, val);
00456     for (unsigned int k=0; k<ndot; k++) {
00457       x[i].fastAccessDx(k) = urand.number();
00458       val = urand.number();
00459       y1[i].fastAccessDx(k) = val;
00460       y2[i].fastAccessDx(k) = val;
00461       y3[i].fastAccessDx(k) = val;
00462     }
00463   }
00464   
00465   Teuchos::BLAS<int,FadType> teuchos_blas;
00466   teuchos_blas.COPY(m, &x[0], 1, &y1[0], 1);
00467 
00468   Teuchos::BLAS<int,FadType> sacado_blas(false);
00469   sacado_blas.COPY(m, &x[0], 1, &y2[0], 1);
00470 
00471   COMPARE_FAD_VECTORS(y1, y2, m);
00472 
00473   unsigned int sz = 2*m*(1+ndot);
00474   Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
00475   sacado_blas2.COPY(m, &x[0], 1, &y3[0], 1);
00476 
00477   COMPARE_FAD_VECTORS(y1, y3, m);
00478 }
00479 
00480 // Tests non unit inc
00481 template <class FadType, class ScalarType>
00482 void
00483 FadBLASUnitTests<FadType,ScalarType>::
00484 testCOPY2() {
00485   unsigned int incx = 2;
00486   unsigned int incy = 3;
00487   VectorType x(m*incx,ndot), y1(m*incy,ndot), y2(m*incy,ndot), y3(m*incy,ndot);
00488   for (unsigned int i=0; i<m*incx; i++) {
00489     x[i] = FadType(ndot, urand.number());
00490     for (unsigned int k=0; k<ndot; k++) {
00491       x[i].fastAccessDx(k) = urand.number();
00492     }
00493   }
00494   for (unsigned int i=0; i<m*incy; i++) {
00495     ScalarType val = urand.number();
00496     y1[i] = FadType(ndot, val);
00497     y2[i] = FadType(ndot, val);
00498     y3[i] = FadType(ndot, val);
00499     for (unsigned int k=0; k<ndot; k++) {
00500       val = urand.number();
00501       y1[i].fastAccessDx(k) = val;
00502       y2[i].fastAccessDx(k) = val;
00503       y3[i].fastAccessDx(k) = val;
00504     }
00505   }
00506   
00507   Teuchos::BLAS<int,FadType> teuchos_blas;
00508   teuchos_blas.COPY(m, &x[0], incx, &y1[0], incy);
00509 
00510   Teuchos::BLAS<int,FadType> sacado_blas(false);
00511   sacado_blas.COPY(m, &x[0], incx, &y2[0], incy);
00512 
00513   COMPARE_FAD_VECTORS(y1, y2, m*incy);
00514 
00515   unsigned int sz = 2*m*(1+ndot);
00516   Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
00517   sacado_blas2.COPY(m, &x[0], incx, &y3[0], incy);
00518 
00519   COMPARE_FAD_VECTORS(y1, y3, m*incy);
00520 }
00521 
00522 // Tests x constant
00523 template <class FadType, class ScalarType>
00524 void
00525 FadBLASUnitTests<FadType,ScalarType>::
00526 testCOPY3() {
00527   VectorType x(m,ndot), y1(m,ndot), y2(m,ndot), y3(m,ndot);
00528   for (unsigned int i=0; i<m; i++) {
00529     x[i] = urand.number();
00530   }
00531   for (unsigned int i=0; i<m; i++) {
00532     ScalarType val = urand.number();
00533     y1[i] = FadType(ndot, val);
00534     y2[i] = FadType(ndot, val);
00535     y3[i] = FadType(ndot, val);
00536     for (unsigned int k=0; k<ndot; k++) {
00537       val = urand.number();
00538       y1[i].fastAccessDx(k) = val;
00539       y2[i].fastAccessDx(k) = val;
00540       y3[i].fastAccessDx(k) = val;
00541     }
00542   }
00543   
00544   Teuchos::BLAS<int,FadType> teuchos_blas;
00545   teuchos_blas.COPY(m, &x[0], 1, &y1[0], 1);
00546 
00547   Teuchos::BLAS<int,FadType> sacado_blas(false);
00548   sacado_blas.COPY(m, &x[0], 1, &y2[0], 1);
00549 
00550   COMPARE_FAD_VECTORS(y1, y2, m);
00551 
00552   unsigned int sz = 2*m*(1+ndot);
00553   Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
00554   sacado_blas2.COPY(m, &x[0], 1, &y3[0], 1);
00555 
00556   COMPARE_FAD_VECTORS(y1, y3, m);
00557 }
00558 
00559 // Tests y constant
00560 template <class FadType, class ScalarType>
00561 void
00562 FadBLASUnitTests<FadType,ScalarType>::
00563 testCOPY4() {
00564   VectorType x(m,ndot), y1(m,ndot), y2(m,ndot), y3(m,ndot);
00565   for (unsigned int i=0; i<m; i++) {
00566     x[i] = FadType(ndot, urand.number());
00567     ScalarType val = urand.number();
00568     y1[i] = val;
00569     y2[i] = val;
00570     y3[i] = val;
00571     for (unsigned int k=0; k<ndot; k++) {
00572       x[i].fastAccessDx(k) = urand.number();
00573     }
00574   }
00575   
00576   Teuchos::BLAS<int,FadType> teuchos_blas;
00577   teuchos_blas.COPY(m, &x[0], 1, &y1[0], 1);
00578 
00579   Teuchos::BLAS<int,FadType> sacado_blas(false);
00580   sacado_blas.COPY(m, &x[0], 1, &y2[0], 1);
00581 
00582   COMPARE_FAD_VECTORS(y1, y2, m);
00583 
00584   unsigned int sz = 2*m*(1+ndot);
00585   Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
00586   sacado_blas2.COPY(m, &x[0], 1, &y3[0], 1);
00587 
00588   COMPARE_FAD_VECTORS(y1, y3, m);
00589 }
00590 
00591 // Tests all arguments
00592 template <class FadType, class ScalarType>
00593 void
00594 FadBLASUnitTests<FadType,ScalarType>::
00595 testAXPY1() {
00596   VectorType x(m,ndot), y1(m,ndot), y2(m,ndot), y3(m,ndot);
00597   for (unsigned int i=0; i<m; i++) {
00598     x[i] = FadType(ndot, urand.number());
00599     ScalarType val = urand.number();
00600     y1[i] = FadType(ndot, val);
00601     y2[i] = FadType(ndot, val);
00602     y3[i] = FadType(ndot, val);
00603     for (unsigned int k=0; k<ndot; k++) {
00604       x[i].fastAccessDx(k) = urand.number();
00605       val = urand.number();
00606       y1[i].fastAccessDx(k) = val;
00607       y2[i].fastAccessDx(k) = val;
00608       y3[i].fastAccessDx(k) = val;
00609     }
00610   }
00611   FadType alpha(ndot, urand.number());
00612   for (unsigned int k=0; k<ndot; k++) 
00613     alpha.fastAccessDx(k) = urand.number();
00614   
00615   Teuchos::BLAS<int,FadType> teuchos_blas;
00616   teuchos_blas.AXPY(m, alpha, &x[0], 1, &y1[0], 1);
00617 
00618   Teuchos::BLAS<int,FadType> sacado_blas(false);
00619   sacado_blas.AXPY(m, alpha, &x[0], 1, &y2[0], 1);
00620 
00621   COMPARE_FAD_VECTORS(y1, y2, m);
00622 
00623   unsigned int sz = 2*m*(1+ndot);
00624   Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
00625   sacado_blas2.AXPY(m, alpha, &x[0], 1, &y3[0], 1);
00626 
00627   COMPARE_FAD_VECTORS(y1, y3, m);
00628 }
00629 
00630 // Tests non unit inc
00631 template <class FadType, class ScalarType>
00632 void
00633 FadBLASUnitTests<FadType,ScalarType>::
00634 testAXPY2() {
00635   unsigned int incx = 2;
00636   unsigned int incy = 3;
00637   VectorType x(m*incx,ndot), y1(m*incy,ndot), y2(m*incy,ndot), y3(m*incy,ndot);
00638   for (unsigned int i=0; i<m*incx; i++) {
00639     x[i] = FadType(ndot, urand.number());
00640     for (unsigned int k=0; k<ndot; k++) {
00641       x[i].fastAccessDx(k) = urand.number();
00642     }
00643   }
00644   for (unsigned int i=0; i<m*incy; i++) {
00645     ScalarType val = urand.number();
00646     y1[i] = FadType(ndot, val);
00647     y2[i] = FadType(ndot, val);
00648     y3[i] = FadType(ndot, val);
00649     for (unsigned int k=0; k<ndot; k++) {
00650       val = urand.number();
00651       y1[i].fastAccessDx(k) = val;
00652       y2[i].fastAccessDx(k) = val;
00653       y3[i].fastAccessDx(k) = val;
00654     }
00655   }
00656   FadType alpha(ndot, urand.number());
00657   for (unsigned int k=0; k<ndot; k++) 
00658     alpha.fastAccessDx(k) = urand.number();
00659   
00660   Teuchos::BLAS<int,FadType> teuchos_blas;
00661   teuchos_blas.AXPY(m, alpha, &x[0], incx, &y1[0], incy);
00662 
00663   Teuchos::BLAS<int,FadType> sacado_blas(false);
00664   sacado_blas.AXPY(m, alpha, &x[0], incx, &y2[0], incy);
00665 
00666   COMPARE_FAD_VECTORS(y1, y2, m*incy);
00667 
00668   unsigned int sz = 2*m*(1+ndot);
00669   Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
00670   sacado_blas2.AXPY(m, alpha, &x[0], incx, &y3[0], incy);
00671 
00672   COMPARE_FAD_VECTORS(y1, y3, m*incy);
00673 }
00674 
00675 // Tests x constant
00676 template <class FadType, class ScalarType>
00677 void
00678 FadBLASUnitTests<FadType,ScalarType>::
00679 testAXPY3() {
00680   VectorType x(m,ndot), y1(m,ndot), y2(m,ndot), y3(m,ndot), y4(m,ndot);
00681   std::vector<ScalarType> xx(m);
00682   for (unsigned int i=0; i<m; i++) {
00683     xx[i] = urand.number();
00684     x[i] = xx[i];
00685     ScalarType val = urand.number();
00686     y1[i] = FadType(ndot, val);
00687     y2[i] = FadType(ndot, val);
00688     y3[i] = FadType(ndot, val);
00689     y4[i] = FadType(ndot, val);
00690     for (unsigned int k=0; k<ndot; k++) {
00691       val = urand.number();
00692       y1[i].fastAccessDx(k) = val;
00693       y2[i].fastAccessDx(k) = val;
00694       y3[i].fastAccessDx(k) = val;
00695       y4[i].fastAccessDx(k) = val;
00696     }
00697   }
00698   FadType alpha(ndot, urand.number());
00699   for (unsigned int k=0; k<ndot; k++) 
00700     alpha.fastAccessDx(k) = urand.number();
00701   
00702   Teuchos::BLAS<int,FadType> teuchos_blas;
00703   teuchos_blas.AXPY(m, alpha, &x[0], 1, &y1[0], 1);
00704 
00705   Teuchos::BLAS<int,FadType> sacado_blas(false);
00706   sacado_blas.AXPY(m, alpha, &x[0], 1, &y2[0], 1);
00707 
00708   COMPARE_FAD_VECTORS(y1, y2, m);
00709 
00710   unsigned int sz = m*(1+ndot)+m;
00711   Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
00712   sacado_blas2.AXPY(m, alpha, &x[0], 1, &y3[0], 1);
00713 
00714   COMPARE_FAD_VECTORS(y1, y3, m);
00715 
00716   sacado_blas.AXPY(m, alpha, &xx[0], 1, &y4[0], 1);
00717 
00718   COMPARE_FAD_VECTORS(y1, y4, m);
00719 }
00720 
00721 // Tests y constant
00722 template <class FadType, class ScalarType>
00723 void
00724 FadBLASUnitTests<FadType,ScalarType>::
00725 testAXPY4() {
00726   VectorType x(m,ndot), y1(m,ndot), y2(m,ndot), y3(m,ndot);
00727   for (unsigned int i=0; i<m; i++) {
00728     x[i] = FadType(ndot, urand.number());
00729     ScalarType val = urand.number();
00730     y1[i] = val;
00731     y2[i] = val;
00732     y3[i] = val;
00733     for (unsigned int k=0; k<ndot; k++) {
00734       x[i].fastAccessDx(k) = urand.number();
00735     }
00736   }
00737   FadType alpha(ndot, urand.number());
00738   for (unsigned int k=0; k<ndot; k++) 
00739     alpha.fastAccessDx(k) = urand.number();
00740   
00741   Teuchos::BLAS<int,FadType> teuchos_blas;
00742   teuchos_blas.AXPY(m, alpha, &x[0], 1, &y1[0], 1);
00743 
00744   Teuchos::BLAS<int,FadType> sacado_blas(false);
00745   sacado_blas.AXPY(m, alpha, &x[0], 1, &y2[0], 1);
00746 
00747   COMPARE_FAD_VECTORS(y1, y2, m);
00748 
00749   unsigned int sz = 2*m*(1+ndot);
00750   Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
00751   sacado_blas2.AXPY(m, alpha, &x[0], 1, &y3[0], 1);
00752 
00753   COMPARE_FAD_VECTORS(y1, y3, m);
00754 }
00755 
00756 // Tests all arguments
00757 template <class FadType, class ScalarType>
00758 void
00759 FadBLASUnitTests<FadType,ScalarType>::
00760 testDOT1() {
00761   VectorType X(m,ndot), Y(m,ndot);
00762   for (unsigned int i=0; i<m; i++) {
00763     X[i] = FadType(ndot, real_urand.number());
00764     Y[i] = FadType(ndot, real_urand.number());
00765     for (unsigned int k=0; k<ndot; k++) {
00766       X[i].fastAccessDx(k) = real_urand.number();
00767       Y[i].fastAccessDx(k) = real_urand.number();
00768     }
00769   }
00770   
00771   Teuchos::BLAS<int,FadType> teuchos_blas;
00772   FadType z1 = teuchos_blas.DOT(m, &X[0], 1, &Y[0], 1);
00773 
00774   Teuchos::BLAS<int,FadType> sacado_blas(false);
00775   FadType z2 = sacado_blas.DOT(m, &X[0], 1, &Y[0], 1);
00776 
00777   COMPARE_FADS(z1, z2);
00778 
00779   unsigned int sz = 2*m*(1+ndot);
00780   Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
00781   FadType z3 = sacado_blas2.DOT(m, &X[0], 1, &Y[0], 1);
00782 
00783   COMPARE_FADS(z1, z3);
00784 }
00785 
00786 // Tests non-unit inc
00787 template <class FadType, class ScalarType>
00788 void
00789 FadBLASUnitTests<FadType,ScalarType>::
00790 testDOT2() {
00791   unsigned int incx = 2;
00792   unsigned int incy = 3;
00793   VectorType X(m*incx,ndot), Y(m*incy,ndot);
00794   for (unsigned int i=0; i<m*incx; i++) {
00795     X[i] = FadType(ndot, real_urand.number());
00796     for (unsigned int k=0; k<ndot; k++) {
00797       X[i].fastAccessDx(k) = real_urand.number();
00798     }
00799   }
00800   for (unsigned int i=0; i<m*incy; i++) {
00801     Y[i] = FadType(ndot, real_urand.number());
00802     for (unsigned int k=0; k<ndot; k++) {
00803       Y[i].fastAccessDx(k) = real_urand.number();
00804     }
00805   }
00806   
00807   Teuchos::BLAS<int,FadType> teuchos_blas;
00808   FadType z1 = teuchos_blas.DOT(m, &X[0], incx, &Y[0], incy);
00809 
00810   Teuchos::BLAS<int,FadType> sacado_blas(false);
00811   FadType z2 = sacado_blas.DOT(m, &X[0], incx, &Y[0], incy);
00812 
00813   COMPARE_FADS(z1, z2);
00814 
00815   unsigned int sz = 2*m*(1+ndot);
00816   Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
00817   FadType z3 = sacado_blas2.DOT(m, &X[0], incx, &Y[0], incy);
00818 
00819   COMPARE_FADS(z1, z3);
00820 }
00821 
00822 // Tests X constant
00823 template <class FadType, class ScalarType>
00824 void
00825 FadBLASUnitTests<FadType,ScalarType>::
00826 testDOT3() {
00827   VectorType X(m,0), Y(m,ndot);
00828   std::vector<ScalarType> x(m);
00829   for (unsigned int i=0; i<m; i++) {
00830     x[i] = urand.number();
00831     X[i] = x[i];
00832     Y[i] = FadType(ndot, real_urand.number());
00833     for (unsigned int k=0; k<ndot; k++) {
00834       Y[i].fastAccessDx(k) = real_urand.number();
00835     }
00836   }
00837   
00838   Teuchos::BLAS<int,FadType> teuchos_blas;
00839   FadType z1 = teuchos_blas.DOT(m, &X[0], 1, &Y[0], 1);
00840 
00841   Teuchos::BLAS<int,FadType> sacado_blas(false);
00842   FadType z2 = sacado_blas.DOT(m, &X[0], 1, &Y[0], 1);
00843 
00844   COMPARE_FADS(z1, z2);
00845 
00846   unsigned int sz = 2*m*(1+ndot);
00847   Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
00848   FadType z3 = sacado_blas2.DOT(m, &X[0], 1, &Y[0], 1);
00849 
00850   COMPARE_FADS(z1, z3);
00851 
00852   FadType z4 = sacado_blas.DOT(m, &x[0], 1, &Y[0], 1);
00853 
00854   COMPARE_FADS(z1, z4);
00855 }
00856 
00857 // Tests Y constant
00858 template <class FadType, class ScalarType>
00859 void
00860 FadBLASUnitTests<FadType,ScalarType>::
00861 testDOT4() {
00862   VectorType X(m,ndot), Y(m,0);
00863   std::vector<ScalarType> y(m);
00864   for (unsigned int i=0; i<m; i++) {
00865     X[i] = FadType(ndot, real_urand.number());
00866     y[i] = urand.number();
00867     Y[i] = y[i];
00868     for (unsigned int k=0; k<ndot; k++) {
00869       X[i].fastAccessDx(k) = real_urand.number();
00870     }
00871   }
00872   
00873   Teuchos::BLAS<int,FadType> teuchos_blas;
00874   FadType z1 = teuchos_blas.DOT(m, &X[0], 1, &Y[0], 1);
00875 
00876   Teuchos::BLAS<int,FadType> sacado_blas(false);
00877   FadType z2 = sacado_blas.DOT(m, &X[0], 1, &Y[0], 1);
00878 
00879   COMPARE_FADS(z1, z2);
00880 
00881   unsigned int sz = 2*m*(1+ndot);
00882   Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
00883   FadType z3 = sacado_blas2.DOT(m, &X[0], 1, &Y[0], 1);
00884 
00885   COMPARE_FADS(z1, z3);
00886 
00887   FadType z4 = sacado_blas.DOT(m, &X[0], 1, &y[0], 1);
00888 
00889   COMPARE_FADS(z1, z4);
00890 }
00891 
00892 // Tests all arguments
00893 template <class FadType, class ScalarType>
00894 void
00895 FadBLASUnitTests<FadType,ScalarType>::
00896 testNRM21() {
00897   VectorType X(m,ndot);
00898   for (unsigned int i=0; i<m; i++) {
00899     X[i] = FadType(ndot, real_urand.number());
00900     for (unsigned int k=0; k<ndot; k++) {
00901       X[i].fastAccessDx(k) = real_urand.number();
00902     }
00903   }
00904   
00905   Teuchos::BLAS<int,FadType> teuchos_blas;
00906   typename Teuchos::ScalarTraits<FadType>::magnitudeType z1 = 
00907     teuchos_blas.NRM2(m, &X[0], 1);
00908 
00909   Teuchos::BLAS<int,FadType> sacado_blas(false);
00910   typename Teuchos::ScalarTraits<FadType>::magnitudeType z2 = 
00911     sacado_blas.NRM2(m, &X[0], 1);
00912 
00913   COMPARE_FADS(z1, z2);
00914 
00915   unsigned int sz = m*(1+ndot);
00916   Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
00917   typename Teuchos::ScalarTraits<FadType>::magnitudeType z3 = 
00918     sacado_blas2.NRM2(m, &X[0], 1);
00919 
00920   COMPARE_FADS(z1, z3);
00921 }
00922 
00923 // Tests non-unit inc
00924 template <class FadType, class ScalarType>
00925 void
00926 FadBLASUnitTests<FadType,ScalarType>::
00927 testNRM22() {
00928   unsigned int incx = 2;
00929   VectorType X(m*incx,ndot);
00930   for (unsigned int i=0; i<m*incx; i++) {
00931     X[i] = FadType(ndot, real_urand.number());
00932     for (unsigned int k=0; k<ndot; k++) {
00933       X[i].fastAccessDx(k) = real_urand.number();
00934     }
00935   }
00936   
00937   Teuchos::BLAS<int,FadType> teuchos_blas;
00938   typename Teuchos::ScalarTraits<FadType>::magnitudeType z1 = 
00939     teuchos_blas.NRM2(m, &X[0], incx);
00940 
00941   Teuchos::BLAS<int,FadType> sacado_blas(false);
00942   typename Teuchos::ScalarTraits<FadType>::magnitudeType z2 = 
00943     sacado_blas.NRM2(m, &X[0], incx);
00944 
00945   COMPARE_FADS(z1, z2);
00946 
00947   unsigned int sz = m*(1+ndot);
00948   Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
00949   typename Teuchos::ScalarTraits<FadType>::magnitudeType z3 = 
00950     sacado_blas2.NRM2(m, &X[0], incx);
00951 
00952   COMPARE_FADS(z1, z3);
00953 }
00954 
00955 // Tests all arguments
00956 template <class FadType, class ScalarType>
00957 void
00958 FadBLASUnitTests<FadType,ScalarType>::
00959 testGEMV1() {
00960   VectorType A(m*n,ndot), B(n,ndot), C1(m,ndot), C2(m,ndot), C3(m,ndot);
00961   for (unsigned int j=0; j<n; j++) {
00962     for (unsigned int i=0; i<m; i++) {
00963       A[i+j*m] = FadType(ndot, urand.number());
00964       for (unsigned int k=0; k<ndot; k++)
00965         A[i+j*m].fastAccessDx(k) = urand.number();
00966     }
00967     B[j] = FadType(ndot, urand.number());
00968     for (unsigned int k=0; k<ndot; k++)
00969       B[j].fastAccessDx(k) = urand.number();
00970   }
00971   FadType alpha(ndot, urand.number());
00972   FadType beta(ndot, urand.number());
00973   for (unsigned int k=0; k<ndot; k++) {
00974     alpha.fastAccessDx(k) = urand.number();
00975     beta.fastAccessDx(k) = urand.number();
00976   }
00977 
00978   for (unsigned int i=0; i<m; i++) {
00979     ScalarType val = urand.number();
00980     C1[i] = FadType(ndot, val);
00981     C2[i] = FadType(ndot, val);
00982     C3[i] = FadType(ndot, val);
00983     for (unsigned int k=0; k<ndot; k++) {
00984       val = urand.number();
00985       C1[i].fastAccessDx(k) = val;
00986       C2[i].fastAccessDx(k) = val;
00987       C3[i].fastAccessDx(k) = val;
00988     } 
00989   }
00990   
00991   Teuchos::BLAS<int,FadType> teuchos_blas;
00992   teuchos_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1, 
00993         beta, &C1[0], 1);
00994 
00995   Teuchos::BLAS<int,FadType> sacado_blas(false);
00996   sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1, 
00997        beta, &C2[0], 1);
00998 
00999   COMPARE_FAD_VECTORS(C1, C2, m);
01000 
01001   unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
01002   Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
01003   sacado_blas2.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1, 
01004         beta, &C3[0], 1);
01005 
01006   COMPARE_FAD_VECTORS(C1, C3, m);
01007 }
01008 
01009 // Tests non-unit inc and different lda
01010 template <class FadType, class ScalarType>
01011 void
01012 FadBLASUnitTests<FadType,ScalarType>::
01013 testGEMV2() {
01014   unsigned int lda = m+3;
01015   unsigned int incb = 2;
01016   unsigned int incc = 3;
01017   VectorType A(lda*n,ndot), B(n*incb,ndot), C1(m*incc,ndot), C2(m*incc,ndot), 
01018     C3(m*incc,ndot);
01019   for (unsigned int j=0; j<n; j++) {
01020     for (unsigned int i=0; i<lda; i++) {
01021       A[i+j*lda] = FadType(ndot, urand.number());
01022       for (unsigned int k=0; k<ndot; k++)
01023         A[i+j*lda].fastAccessDx(k) = urand.number();
01024     }
01025   }
01026   for (unsigned int j=0; j<n*incb; j++) {
01027     B[j] = FadType(ndot, urand.number());
01028     for (unsigned int k=0; k<ndot; k++)
01029       B[j].fastAccessDx(k) = urand.number();
01030   }
01031   FadType alpha(ndot, urand.number());
01032   FadType beta(ndot, urand.number());
01033   for (unsigned int k=0; k<ndot; k++) {
01034     alpha.fastAccessDx(k) = urand.number();
01035     beta.fastAccessDx(k) = urand.number();
01036   }
01037 
01038   for (unsigned int i=0; i<m*incc; i++) {
01039     ScalarType val = urand.number();
01040     C1[i] = FadType(ndot, val);
01041     C2[i] = FadType(ndot, val);
01042     C3[i] = FadType(ndot, val);
01043     for (unsigned int k=0; k<ndot; k++) {
01044       val = urand.number();
01045       C1[i].fastAccessDx(k) = val;
01046       C2[i].fastAccessDx(k) = val;
01047       C3[i].fastAccessDx(k) = val;
01048     } 
01049   }
01050   
01051   Teuchos::BLAS<int,FadType> teuchos_blas;
01052   teuchos_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], lda, &B[0], incb, 
01053         beta, &C1[0], incc);
01054 
01055   Teuchos::BLAS<int,FadType> sacado_blas(false);
01056   sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], lda, &B[0], incb, 
01057        beta, &C2[0], incc);
01058 
01059   COMPARE_FAD_VECTORS(C1, C2, m*incc);
01060 
01061   unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
01062   Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
01063   sacado_blas2.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], lda, &B[0], incb, 
01064         beta, &C3[0], incc);
01065 
01066   COMPARE_FAD_VECTORS(C1, C3, m*incc);
01067 }
01068 
01069 // Tests transpose with all arguments
01070 template <class FadType, class ScalarType>
01071 void
01072 FadBLASUnitTests<FadType,ScalarType>::
01073 testGEMV3() {
01074   VectorType A(m*n,ndot), B(m,ndot), C1(n,ndot), C2(n,ndot), C3(n,ndot);
01075   for (unsigned int j=0; j<n; j++) {
01076     for (unsigned int i=0; i<m; i++) {
01077       A[i+j*m] = FadType(ndot, urand.number());
01078       for (unsigned int k=0; k<ndot; k++)
01079         A[i+j*m].fastAccessDx(k) = urand.number();
01080     }
01081   }
01082   for (unsigned int j=0; j<m; j++) {
01083     B[j] = FadType(ndot, urand.number());
01084     for (unsigned int k=0; k<ndot; k++)
01085       B[j].fastAccessDx(k) = urand.number();
01086   }
01087   FadType alpha(ndot, urand.number());
01088   FadType beta(ndot, urand.number());
01089   for (unsigned int k=0; k<ndot; k++) {
01090     alpha.fastAccessDx(k) = urand.number();
01091     beta.fastAccessDx(k) = urand.number();
01092   }
01093 
01094   for (unsigned int i=0; i<n; i++) {
01095     ScalarType val = urand.number();
01096     C1[i] = FadType(ndot, val);
01097     C2[i] = FadType(ndot, val);
01098     C3[i] = FadType(ndot, val);
01099     for (unsigned int k=0; k<ndot; k++) {
01100       val = urand.number();
01101       C1[i].fastAccessDx(k) = val;
01102       C2[i].fastAccessDx(k) = val;
01103       C3[i].fastAccessDx(k) = val;
01104     } 
01105   }
01106   
01107   Teuchos::BLAS<int,FadType> teuchos_blas;
01108   teuchos_blas.GEMV(Teuchos::TRANS, m, n, alpha, &A[0], m, &B[0], 1, 
01109         beta, &C1[0], 1);
01110 
01111   Teuchos::BLAS<int,FadType> sacado_blas(false);
01112   sacado_blas.GEMV(Teuchos::TRANS, m, n, alpha, &A[0], m, &B[0], 1, 
01113        beta, &C2[0], 1);
01114 
01115   COMPARE_FAD_VECTORS(C1, C2, n);
01116 
01117   unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
01118   Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
01119   sacado_blas2.GEMV(Teuchos::TRANS, m, n, alpha, &A[0], m, &B[0], 1, 
01120         beta, &C3[0], 1);
01121 
01122   COMPARE_FAD_VECTORS(C1, C3, n);
01123 }
01124 
01125 // Tests transpose with non-unit inc and different lda
01126 template <class FadType, class ScalarType>
01127 void
01128 FadBLASUnitTests<FadType,ScalarType>::
01129 testGEMV4() {
01130   unsigned int lda = m+3;
01131   unsigned int incb = 2;
01132   unsigned int incc = 3;
01133   VectorType A(lda*n,ndot), B(m*incb,ndot), C1(n*incc,ndot), C2(n*incc,ndot), 
01134     C3(n*incc,ndot);
01135   for (unsigned int j=0; j<n; j++) {
01136     for (unsigned int i=0; i<lda; i++) {
01137       A[i+j*lda] = FadType(ndot, urand.number());
01138       for (unsigned int k=0; k<ndot; k++)
01139         A[i+j*lda].fastAccessDx(k) = urand.number();
01140     }
01141   }
01142   for (unsigned int j=0; j<m*incb; j++) {
01143     B[j] = FadType(ndot, urand.number());
01144     for (unsigned int k=0; k<ndot; k++)
01145       B[j].fastAccessDx(k) = urand.number();
01146   }
01147   FadType alpha(ndot, urand.number());
01148   FadType beta(ndot, urand.number());
01149   for (unsigned int k=0; k<ndot; k++) {
01150     alpha.fastAccessDx(k) = urand.number();
01151     beta.fastAccessDx(k) = urand.number();
01152   }
01153 
01154   for (unsigned int i=0; i<n*incc; i++) {
01155     ScalarType val = urand.number();
01156     C1[i] = FadType(ndot, val);
01157     C2[i] = FadType(ndot, val);
01158     C3[i] = FadType(ndot, val);
01159     for (unsigned int k=0; k<ndot; k++) {
01160       val = urand.number();
01161       C1[i].fastAccessDx(k) = val;
01162       C2[i].fastAccessDx(k) = val;
01163       C3[i].fastAccessDx(k) = val;
01164     } 
01165   }
01166   
01167   Teuchos::BLAS<int,FadType> teuchos_blas;
01168   teuchos_blas.GEMV(Teuchos::TRANS, m, n, alpha, &A[0], lda, &B[0], incb, 
01169         beta, &C1[0], incc);
01170 
01171   Teuchos::BLAS<int,FadType> sacado_blas(false);
01172   sacado_blas.GEMV(Teuchos::TRANS, m, n, alpha, &A[0], lda, &B[0], incb, 
01173        beta, &C2[0], incc);
01174 
01175   COMPARE_FAD_VECTORS(C1, C2, n*incc);
01176 
01177   unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
01178   Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
01179   sacado_blas2.GEMV(Teuchos::TRANS, m, n, alpha, &A[0], lda, &B[0], incb, 
01180           beta, &C3[0], incc);
01181 
01182   COMPARE_FAD_VECTORS(C1, C3, n*incc);
01183 }
01184 
01185 // Tests with constant C
01186 template <class FadType, class ScalarType>
01187 void
01188 FadBLASUnitTests<FadType,ScalarType>::
01189 testGEMV5() {
01190   VectorType A(m*n,ndot), B(n,ndot), C1(m,ndot), C2(m,ndot), C3(m,ndot);
01191   for (unsigned int j=0; j<n; j++) {
01192     for (unsigned int i=0; i<m; i++) {
01193       A[i+j*m] = FadType(ndot, urand.number());
01194       for (unsigned int k=0; k<ndot; k++)
01195         A[i+j*m].fastAccessDx(k) = urand.number();
01196     }
01197     B[j] = FadType(ndot, urand.number());
01198     for (unsigned int k=0; k<ndot; k++)
01199       B[j].fastAccessDx(k) = urand.number();
01200   }
01201   FadType alpha(ndot, urand.number());
01202   FadType beta(ndot, urand.number());
01203   for (unsigned int k=0; k<ndot; k++) {
01204     alpha.fastAccessDx(k) = urand.number();
01205     beta.fastAccessDx(k) = urand.number();
01206   }
01207 
01208   for (unsigned int i=0; i<m; i++) {
01209     ScalarType val = urand.number();
01210     C1[i] = val;
01211     C2[i] = val;
01212     C3[i] = val;
01213   }
01214   
01215   Teuchos::BLAS<int,FadType> teuchos_blas;
01216   teuchos_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1, 
01217         beta, &C1[0], 1);
01218 
01219   Teuchos::BLAS<int,FadType> sacado_blas(false);
01220   sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1, 
01221        beta, &C2[0], 1);
01222 
01223   COMPARE_FAD_VECTORS(C1, C2, m);
01224 
01225   unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
01226   Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
01227   sacado_blas2.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1, 
01228         beta, &C3[0], 1);
01229 
01230   COMPARE_FAD_VECTORS(C1, C3, m);
01231 }
01232 
01233 // Tests with constant alpha, beta
01234 template <class FadType, class ScalarType>
01235 void
01236 FadBLASUnitTests<FadType,ScalarType>::
01237 testGEMV6() {
01238   VectorType A(m*n,ndot), B(n,ndot), C1(m,ndot), C2(m,ndot), C3(m,ndot);
01239   for (unsigned int j=0; j<n; j++) {
01240     for (unsigned int i=0; i<m; i++) {
01241       A[i+j*m] = FadType(ndot, urand.number());
01242       for (unsigned int k=0; k<ndot; k++)
01243         A[i+j*m].fastAccessDx(k) = urand.number();
01244     }
01245     B[j] = FadType(ndot, urand.number());
01246     for (unsigned int k=0; k<ndot; k++)
01247       B[j].fastAccessDx(k) = urand.number();
01248   }
01249   ScalarType alpha = urand.number();
01250   ScalarType beta = urand.number();
01251 
01252   for (unsigned int i=0; i<m; i++) {
01253     ScalarType val = urand.number();
01254     C1[i] = FadType(ndot, val);
01255     C2[i] = FadType(ndot, val);
01256     C3[i] = FadType(ndot, val);
01257     for (unsigned int k=0; k<ndot; k++) {
01258       val = urand.number();
01259       C1[i].fastAccessDx(k) = val;
01260       C2[i].fastAccessDx(k) = val;
01261       C3[i].fastAccessDx(k) = val;
01262     }
01263   }
01264   
01265   Teuchos::BLAS<int,FadType> teuchos_blas;
01266   teuchos_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1, 
01267         beta, &C1[0], 1);
01268 
01269   Teuchos::BLAS<int,FadType> sacado_blas(false);
01270   sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1, 
01271        beta, &C2[0], 1);
01272 
01273   COMPARE_FAD_VECTORS(C1, C2, m);
01274 
01275   unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
01276   Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
01277   sacado_blas2.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1, 
01278         beta, &C3[0], 1);
01279 
01280   COMPARE_FAD_VECTORS(C1, C3, m);
01281 }
01282 
01283 // Tests wth constant B
01284 template <class FadType, class ScalarType>
01285 void
01286 FadBLASUnitTests<FadType,ScalarType>::
01287 testGEMV7() {
01288   VectorType A(m*n,ndot), B(n,0), C1(m,ndot), C2(m,ndot), C3(m,ndot), 
01289     C4(m,ndot);
01290   std::vector<ScalarType> b(n);
01291   for (unsigned int j=0; j<n; j++) {
01292     for (unsigned int i=0; i<m; i++) {
01293       A[i+j*m] = FadType(ndot, urand.number());
01294       for (unsigned int k=0; k<ndot; k++)
01295         A[i+j*m].fastAccessDx(k) = urand.number();
01296     }
01297     b[j] = urand.number();
01298     B[j] = b[j];
01299   }
01300   FadType alpha(ndot, urand.number());
01301   FadType beta(ndot, urand.number());
01302   for (unsigned int k=0; k<ndot; k++) {
01303     alpha.fastAccessDx(k) = urand.number();
01304     beta.fastAccessDx(k) = urand.number();
01305   }
01306 
01307   for (unsigned int i=0; i<m; i++) {
01308     ScalarType val = urand.number();
01309     C1[i] = FadType(ndot, val);
01310     C2[i] = FadType(ndot, val);
01311     C3[i] = FadType(ndot, val);
01312     C4[i] = FadType(ndot, val);
01313     for (unsigned int k=0; k<ndot; k++) {
01314       val = urand.number();
01315       C1[i].fastAccessDx(k) = val;
01316       C2[i].fastAccessDx(k) = val;
01317       C3[i].fastAccessDx(k) = val;
01318       C4[i].fastAccessDx(k) = val;
01319     }
01320   }
01321   
01322   Teuchos::BLAS<int,FadType> teuchos_blas;
01323   teuchos_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1, 
01324         beta, &C1[0], 1);
01325   
01326   Teuchos::BLAS<int,FadType> sacado_blas(false);
01327   sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1, 
01328        beta, &C2[0], 1);
01329   
01330   COMPARE_FAD_VECTORS(C1, C2, m);
01331   
01332   unsigned int sz = m*n*(1+ndot) + n + m*(1+ndot);
01333   Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
01334   sacado_blas2.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1, 
01335         beta, &C3[0], 1);
01336   
01337   COMPARE_FAD_VECTORS(C1, C3, m);
01338 
01339   sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &b[0], 1, 
01340        beta, &C4[0], 1);
01341   
01342   COMPARE_FAD_VECTORS(C1, C4, m);
01343 }
01344 
01345 // Tests with constant A
01346 template <class FadType, class ScalarType>
01347 void
01348 FadBLASUnitTests<FadType,ScalarType>::
01349 testGEMV8() {
01350   VectorType A(m*n,0), B(n,ndot), C1(m,ndot), C2(m,ndot), C3(m,ndot),
01351     C4(m,ndot);
01352   std::vector<ScalarType> a(m*n);
01353   for (unsigned int j=0; j<n; j++) {
01354     for (unsigned int i=0; i<m; i++) {
01355       a[i+j*m] = urand.number();
01356       A[i+j*m] = a[i+j*m];
01357     }
01358     B[j] = FadType(ndot, urand.number());
01359     for (unsigned int k=0; k<ndot; k++)
01360       B[j].fastAccessDx(k) = urand.number();
01361   }
01362   FadType alpha(ndot, urand.number());
01363   FadType beta(ndot, urand.number());
01364   for (unsigned int k=0; k<ndot; k++) {
01365     alpha.fastAccessDx(k) = urand.number();
01366     beta.fastAccessDx(k) = urand.number();
01367   }
01368   
01369   for (unsigned int i=0; i<m; i++) {
01370     ScalarType val = urand.number();
01371     C1[i] = FadType(ndot, val);
01372     C2[i] = FadType(ndot, val);
01373     C3[i] = FadType(ndot, val);
01374     C4[i] = FadType(ndot, val);
01375     for (unsigned int k=0; k<ndot; k++) {
01376       val = urand.number();
01377       C1[i].fastAccessDx(k) = val;
01378       C2[i].fastAccessDx(k) = val;
01379       C3[i].fastAccessDx(k) = val;
01380       C4[i].fastAccessDx(k) = val;
01381     }
01382   }
01383 
01384   Teuchos::BLAS<int,FadType> teuchos_blas;
01385   teuchos_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1, 
01386         beta, &C1[0], 1);
01387 
01388   Teuchos::BLAS<int,FadType> sacado_blas(false);
01389   sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1, 
01390        beta, &C2[0], 1);
01391 
01392   COMPARE_FAD_VECTORS(C1, C2, m);
01393 
01394   unsigned int sz = m*n* + n*(1+ndot) + m*(1+ndot);
01395   Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
01396   sacado_blas2.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1, 
01397         beta, &C3[0], 1);
01398   
01399   COMPARE_FAD_VECTORS(C1, C3, m);
01400 
01401   sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &a[0], m, &B[0], 1, 
01402        beta, &C4[0], 1);
01403 
01404   COMPARE_FAD_VECTORS(C1, C4, m);
01405 }
01406 
01407 // Tests with constant A, B
01408 template <class FadType, class ScalarType>
01409 void
01410 FadBLASUnitTests<FadType,ScalarType>::
01411 testGEMV9() {
01412   VectorType A(m*n,0), B(n,ndot), C1(m,ndot), C2(m,ndot), C3(m,ndot),
01413     C4(m,ndot);
01414   std::vector<ScalarType> a(m*n), b(n);
01415   for (unsigned int j=0; j<n; j++) {
01416     for (unsigned int i=0; i<m; i++) {
01417       a[i+j*m] = urand.number();
01418       A[i+j*m] = a[i+j*m];
01419     }
01420     b[j] = urand.number();
01421     B[j] = b[j];
01422   }
01423   FadType alpha(ndot, urand.number());
01424   FadType beta(ndot, urand.number());
01425   for (unsigned int k=0; k<ndot; k++) {
01426     alpha.fastAccessDx(k) = urand.number();
01427     beta.fastAccessDx(k) = urand.number();
01428   }
01429 
01430   for (unsigned int i=0; i<m; i++) {
01431     ScalarType val = urand.number();
01432     C1[i] = FadType(ndot, val);
01433     C2[i] = FadType(ndot, val);
01434     C3[i] = FadType(ndot, val);
01435     C4[i] = FadType(ndot, val);
01436     for (unsigned int k=0; k<ndot; k++) {
01437       val = urand.number();
01438       C1[i].fastAccessDx(k) = val;
01439       C2[i].fastAccessDx(k) = val;
01440       C3[i].fastAccessDx(k) = val;
01441       C4[i].fastAccessDx(k) = val;
01442     } 
01443   }
01444   
01445   Teuchos::BLAS<int,FadType> teuchos_blas;
01446   teuchos_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1, 
01447         beta, &C1[0], 1);
01448 
01449   Teuchos::BLAS<int,FadType> sacado_blas(false);
01450   sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1, 
01451        beta, &C2[0], 1);
01452 
01453   COMPARE_FAD_VECTORS(C1, C2, m);
01454 
01455   unsigned int sz = m*n* + n*(1+ndot) + m*(1+ndot);
01456   Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
01457   sacado_blas2.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1, 
01458         beta, &C3[0], 1);
01459   
01460   COMPARE_FAD_VECTORS(C1, C3, m);
01461 
01462   sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &a[0], m, &b[0], 1, 
01463        beta, &C4[0], 1);
01464 
01465   COMPARE_FAD_VECTORS(C1, C4, m);
01466 }
01467 
01468 // Tests all arguments
01469 template <class FadType, class ScalarType>
01470 void
01471 FadBLASUnitTests<FadType,ScalarType>::
01472 testTRMV1() {
01473   VectorType A(n*n,ndot), x1(n,ndot), x2(n,ndot), x3(n,ndot);
01474   for (unsigned int j=0; j<n; j++) {
01475     for (unsigned int i=0; i<n; i++) {
01476       A[i+j*n] = FadType(ndot, urand.number());
01477       for (unsigned int k=0; k<ndot; k++)
01478         A[i+j*n].fastAccessDx(k) = urand.number();
01479     }
01480     ScalarType val = urand.number();
01481     x1[j] = FadType(ndot, val);
01482     x2[j] = FadType(ndot, val);
01483     x3[j] = FadType(ndot, val);
01484     for (unsigned int k=0; k<ndot; k++) {
01485       val = urand.number();
01486       x1[j].fastAccessDx(k) = val;
01487       x2[j].fastAccessDx(k) = val;
01488       x3[j].fastAccessDx(k) = val;
01489     }
01490   }
01491   
01492   Teuchos::BLAS<int,FadType> teuchos_blas;
01493   teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS, 
01494         Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x1[0], 1);
01495 
01496   Teuchos::BLAS<int,FadType> sacado_blas(false);
01497   sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS, 
01498         Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x2[0], 1);
01499 
01500   COMPARE_FAD_VECTORS(x1, x2, n);
01501 
01502   unsigned int sz = n*n*(1+ndot) + n*(1+ndot);
01503   Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
01504   sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS, 
01505         Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x3[0], 1);
01506 
01507   COMPARE_FAD_VECTORS(x1, x3, n);
01508 
01509   teuchos_blas.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS, 
01510         Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x1[0], 1);
01511   sacado_blas.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS, 
01512         Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x2[0], 1);
01513   sacado_blas2.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS, 
01514         Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x3[0], 1);
01515   COMPARE_FAD_VECTORS(x1, x2, n);
01516   COMPARE_FAD_VECTORS(x1, x3, n);
01517 
01518   teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS, 
01519         Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x1[0], 1);
01520   sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS, 
01521         Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x2[0], 1);
01522   sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS, 
01523         Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x3[0], 1);
01524   COMPARE_FAD_VECTORS(x1, x2, n);
01525   COMPARE_FAD_VECTORS(x1, x3, n);
01526 
01527   for (unsigned int i=0; i<n; i++) {
01528     A[i*n+i].val() = 1.0;
01529     for (unsigned int k=0; k<ndot; k++)
01530       A[i*n+i].fastAccessDx(k) = 0.0;
01531   }
01532   teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS, 
01533         Teuchos::UNIT_DIAG, n, &A[0], n, &x1[0], 1);
01534   sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS, 
01535         Teuchos::UNIT_DIAG, n, &A[0], n, &x2[0], 1);
01536   sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS, 
01537         Teuchos::UNIT_DIAG, n, &A[0], n, &x3[0], 1);
01538   COMPARE_FAD_VECTORS(x1, x2, n);
01539   COMPARE_FAD_VECTORS(x1, x3, n);
01540 }
01541 
01542 // Tests non unit inc, different lda
01543 template <class FadType, class ScalarType>
01544 void
01545 FadBLASUnitTests<FadType,ScalarType>::
01546 testTRMV2() {
01547   unsigned int lda = n+3;
01548   unsigned int incx = 2;
01549   VectorType A(lda*n,ndot), x1(n*incx,ndot), x2(n*incx,ndot), x3(n*incx,ndot);
01550   for (unsigned int j=0; j<n; j++) {
01551     for (unsigned int i=0; i<lda; i++) {
01552       A[i+j*lda] = FadType(ndot, urand.number());
01553       for (unsigned int k=0; k<ndot; k++)
01554         A[i+j*lda].fastAccessDx(k) = urand.number();
01555     }
01556   }
01557   for (unsigned int j=0; j<n*incx; j++) {
01558     ScalarType val = urand.number();
01559     x1[j] = FadType(ndot, val);
01560     x2[j] = FadType(ndot, val);
01561     x3[j] = FadType(ndot, val);
01562     for (unsigned int k=0; k<ndot; k++) {
01563       val = urand.number();
01564       x1[j].fastAccessDx(k) = val;
01565       x2[j].fastAccessDx(k) = val;
01566       x3[j].fastAccessDx(k) = val;
01567     }
01568   }
01569   
01570   Teuchos::BLAS<int,FadType> teuchos_blas;
01571   teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS, 
01572         Teuchos::NON_UNIT_DIAG, n, &A[0], lda, &x1[0], incx);
01573 
01574   Teuchos::BLAS<int,FadType> sacado_blas(false);
01575   sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS, 
01576         Teuchos::NON_UNIT_DIAG, n, &A[0], lda, &x2[0], incx);
01577 
01578   COMPARE_FAD_VECTORS(x1, x2, n*incx);
01579 
01580   unsigned int sz = n*n*(1+ndot) + n*(1+ndot);
01581   Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
01582   sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS, 
01583         Teuchos::NON_UNIT_DIAG, n, &A[0], lda, &x3[0], incx);
01584 
01585   COMPARE_FAD_VECTORS(x1, x3, n*incx);
01586 
01587   teuchos_blas.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS, 
01588         Teuchos::NON_UNIT_DIAG, n, &A[0], lda, &x1[0], incx);
01589   sacado_blas.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS, 
01590         Teuchos::NON_UNIT_DIAG, n, &A[0], lda, &x2[0], incx);
01591   sacado_blas2.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS, 
01592         Teuchos::NON_UNIT_DIAG, n, &A[0], lda, &x3[0], incx);
01593   COMPARE_FAD_VECTORS(x1, x2, n*incx);
01594   COMPARE_FAD_VECTORS(x1, x3, n*incx);
01595 
01596   teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS, 
01597         Teuchos::NON_UNIT_DIAG, n, &A[0], lda, &x1[0], incx);
01598   sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS, 
01599         Teuchos::NON_UNIT_DIAG, n, &A[0], lda, &x2[0], incx);
01600   sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS, 
01601         Teuchos::NON_UNIT_DIAG, n, &A[0], lda, &x3[0], incx);
01602   COMPARE_FAD_VECTORS(x1, x2, n*incx);
01603   COMPARE_FAD_VECTORS(x1, x3, n*incx);
01604 
01605   for (unsigned int i=0; i<n; i++) {
01606     A[i*lda+i].val() = 1.0;
01607     for (unsigned int k=0; k<ndot; k++)
01608       A[i*lda+i].fastAccessDx(k) = 0.0;
01609   }
01610   teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS, 
01611         Teuchos::UNIT_DIAG, n, &A[0], lda, &x1[0], incx);
01612   sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS, 
01613         Teuchos::UNIT_DIAG, n, &A[0], lda, &x2[0], incx);
01614   sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS, 
01615         Teuchos::UNIT_DIAG, n, &A[0], lda, &x3[0], incx);
01616   COMPARE_FAD_VECTORS(x1, x2, n*incx);
01617   COMPARE_FAD_VECTORS(x1, x3, n*incx);
01618 }
01619 
01620 // Tests A constant
01621 template <class FadType, class ScalarType>
01622 void
01623 FadBLASUnitTests<FadType,ScalarType>::
01624 testTRMV3() {
01625   VectorType A(n*n,ndot), x1(n,ndot), x2(n,ndot), x3(n,ndot), x4(n,ndot), 
01626     x5(n,ndot);
01627   std::vector<ScalarType> a(n*n);
01628   for (unsigned int j=0; j<n; j++) {
01629     for (unsigned int i=0; i<n; i++) {
01630       a[i+j*n] = urand.number();
01631       A[i+j*n] = a[i+j*n];
01632     }
01633     ScalarType val = urand.number();
01634     x1[j] = FadType(ndot, val);
01635     x2[j] = FadType(ndot, val);
01636     x3[j] = FadType(ndot, val);
01637     x4[j] = FadType(ndot, val);
01638     x5[j] = FadType(ndot, val);
01639     for (unsigned int k=0; k<ndot; k++) {
01640       val = urand.number();
01641       x1[j].fastAccessDx(k) = val;
01642       x2[j].fastAccessDx(k) = val;
01643       x3[j].fastAccessDx(k) = val;
01644       x4[j].fastAccessDx(k) = val;
01645       x5[j].fastAccessDx(k) = val;
01646     }
01647   }
01648   
01649   Teuchos::BLAS<int,FadType> teuchos_blas;
01650   teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS, 
01651         Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x1[0], 1);
01652 
01653   Teuchos::BLAS<int,FadType> sacado_blas(false);
01654   sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS, 
01655         Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x2[0], 1);
01656 
01657   COMPARE_FAD_VECTORS(x1, x2, n);
01658 
01659   unsigned int sz = n*n+n*(1+ndot);
01660   Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
01661   sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS, 
01662         Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x3[0], 1);
01663 
01664   COMPARE_FAD_VECTORS(x1, x3, n);
01665 
01666   sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS, 
01667        Teuchos::NON_UNIT_DIAG, n, &a[0], n, &x4[0], 1);
01668 
01669   COMPARE_FAD_VECTORS(x1, x4, n);
01670 
01671   sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS, 
01672         Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x5[0], 1);
01673 
01674   COMPARE_FAD_VECTORS(x1, x5, n);
01675 
01676   teuchos_blas.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS, 
01677         Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x1[0], 1);
01678   sacado_blas.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS, 
01679         Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x2[0], 1);
01680   sacado_blas2.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS, 
01681         Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x3[0], 1);
01682   sacado_blas.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS, 
01683         Teuchos::NON_UNIT_DIAG, n, &a[0], n, &x4[0], 1);
01684   sacado_blas2.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS, 
01685         Teuchos::NON_UNIT_DIAG, n, &a[0], n, &x5[0], 1);
01686   COMPARE_FAD_VECTORS(x1, x2, n);
01687   COMPARE_FAD_VECTORS(x1, x3, n);
01688   COMPARE_FAD_VECTORS(x1, x4, n);
01689   COMPARE_FAD_VECTORS(x1, x5, n);
01690 
01691   teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS, 
01692         Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x1[0], 1);
01693   sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS, 
01694         Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x2[0], 1);
01695   sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS, 
01696         Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x3[0], 1);
01697   sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS, 
01698         Teuchos::NON_UNIT_DIAG, n, &a[0], n, &x4[0], 1);
01699   sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS, 
01700         Teuchos::NON_UNIT_DIAG, n, &a[0], n, &x5[0], 1);
01701   COMPARE_FAD_VECTORS(x1, x2, n);
01702   COMPARE_FAD_VECTORS(x1, x3, n);
01703   COMPARE_FAD_VECTORS(x1, x4, n);
01704   COMPARE_FAD_VECTORS(x1, x5, n);
01705 
01706   for (unsigned int i=0; i<n; i++) {
01707     A[i*n+i].val() = 1.0;
01708     for (unsigned int k=0; k<ndot; k++)
01709       A[i*n+i].fastAccessDx(k) = 0.0;
01710   }
01711   teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS, 
01712         Teuchos::UNIT_DIAG, n, &A[0], n, &x1[0], 1);
01713   sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS, 
01714         Teuchos::UNIT_DIAG, n, &A[0], n, &x2[0], 1);
01715   sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS, 
01716         Teuchos::UNIT_DIAG, n, &A[0], n, &x3[0], 1);
01717   sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS, 
01718         Teuchos::UNIT_DIAG, n, &a[0], n, &x4[0], 1);
01719   sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS, 
01720         Teuchos::UNIT_DIAG, n, &a[0], n, &x5[0], 1);
01721   COMPARE_FAD_VECTORS(x1, x2, n);
01722   COMPARE_FAD_VECTORS(x1, x3, n);
01723   COMPARE_FAD_VECTORS(x1, x4, n);
01724   COMPARE_FAD_VECTORS(x1, x5, n);
01725 }
01726 
01727 // Tests x constant
01728 template <class FadType, class ScalarType>
01729 void
01730 FadBLASUnitTests<FadType,ScalarType>::
01731 testTRMV4() {
01732   VectorType A(n*n,ndot), x1(n,ndot), x2(n,ndot), x3(n,ndot);
01733   for (unsigned int j=0; j<n; j++) {
01734     for (unsigned int i=0; i<n; i++) {
01735       A[i+j*n] = FadType(ndot, urand.number());
01736       for (unsigned int k=0; k<ndot; k++)
01737         A[i+j*n].fastAccessDx(k) = urand.number();
01738     }
01739     ScalarType val = urand.number();
01740     x1[j] = val;
01741     x2[j] = val;
01742     x3[j] = val;
01743   }
01744   
01745   Teuchos::BLAS<int,FadType> teuchos_blas;
01746   teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS, 
01747         Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x1[0], 1);
01748 
01749   Teuchos::BLAS<int,FadType> sacado_blas(false);
01750   sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS, 
01751         Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x2[0], 1);
01752 
01753   COMPARE_FAD_VECTORS(x1, x2, n);
01754 
01755   unsigned int sz = n*n*(1+ndot) + n*(1+ndot);
01756   Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
01757   sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS, 
01758         Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x3[0], 1);
01759 
01760   COMPARE_FAD_VECTORS(x1, x3, n);
01761 
01762   teuchos_blas.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS, 
01763         Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x1[0], 1);
01764   sacado_blas.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS, 
01765         Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x2[0], 1);
01766   sacado_blas2.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS, 
01767         Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x3[0], 1);
01768   COMPARE_FAD_VECTORS(x1, x2, n);
01769   COMPARE_FAD_VECTORS(x1, x3, n);
01770 
01771   teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS, 
01772         Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x1[0], 1);
01773   sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS, 
01774         Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x2[0], 1);
01775   sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS, 
01776         Teuchos::NON_UNIT_DIAG, n, &A[0], n, &x3[0], 1);
01777   COMPARE_FAD_VECTORS(x1, x2, n);
01778   COMPARE_FAD_VECTORS(x1, x3, n);
01779 
01780   for (unsigned int i=0; i<n; i++) {
01781     A[i*n+i].val() = 1.0;
01782     for (unsigned int k=0; k<ndot; k++)
01783       A[i*n+i].fastAccessDx(k) = 0.0;
01784   }
01785   teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS, 
01786         Teuchos::UNIT_DIAG, n, &A[0], n, &x1[0], 1);
01787   sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS, 
01788         Teuchos::UNIT_DIAG, n, &A[0], n, &x2[0], 1);
01789   sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS, 
01790         Teuchos::UNIT_DIAG, n, &A[0], n, &x3[0], 1);
01791   COMPARE_FAD_VECTORS(x1, x2, n);
01792   COMPARE_FAD_VECTORS(x1, x3, n);
01793 }
01794 
01795 // Tests all arguments
01796 template <class FadType, class ScalarType>
01797 void
01798 FadBLASUnitTests<FadType,ScalarType>::
01799 testGER1() {
01800 
01801   // GER is apparently not defined in the BLAS for complex types
01802   if (Teuchos::ScalarTraits<ScalarType>::isComplex)
01803     return; 
01804 
01805   VectorType A1(m*n,ndot), A2(m*n,ndot), A3(m*n,ndot), x(m,ndot), y(n,ndot);
01806   for (unsigned int j=0; j<n; j++) {
01807     for (unsigned int i=0; i<m; i++) {
01808       ScalarType val = urand.number();
01809       A1[i+j*m] = FadType(ndot, val);
01810       A2[i+j*m] = FadType(ndot, val);
01811       A3[i+j*m] = FadType(ndot, val);
01812       for (unsigned int k=0; k<ndot; k++) {
01813         val = urand.number();
01814         A1[i+j*m].fastAccessDx(k) = val;
01815         A2[i+j*m].fastAccessDx(k) = val;
01816         A3[i+j*m].fastAccessDx(k) = val;
01817       }
01818     }
01819   }
01820   for (unsigned int i=0; i<m; i++) {
01821     x[i] = FadType(ndot, urand.number());
01822     for (unsigned int k=0; k<ndot; k++)
01823       x[i].fastAccessDx(k) = urand.number();
01824   }
01825   for (unsigned int i=0; i<n; i++) {
01826     y[i] = FadType(ndot, urand.number());
01827     for (unsigned int k=0; k<ndot; k++)
01828       y[i].fastAccessDx(k) = urand.number();
01829   }
01830   FadType alpha(ndot, urand.number());
01831   for (unsigned int k=0; k<ndot; k++) {
01832     alpha.fastAccessDx(k) = urand.number();
01833   }
01834   
01835   Teuchos::BLAS<int,FadType> teuchos_blas;
01836   teuchos_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A1[0], m);
01837 
01838   Teuchos::BLAS<int,FadType> sacado_blas(false);
01839   sacado_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A2[0], m);
01840 
01841   COMPARE_FAD_VECTORS(A1, A2, m*n);
01842 
01843   unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
01844   Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
01845   sacado_blas2.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A3[0], m);
01846 
01847   COMPARE_FAD_VECTORS(A1, A3, m*n);
01848 }
01849 
01850 // Tests non unit inc, different lda
01851 template <class FadType, class ScalarType>
01852 void
01853 FadBLASUnitTests<FadType,ScalarType>::
01854 testGER2() {
01855 
01856   // GER is apparently not defined in the BLAS for complex types
01857   if (Teuchos::ScalarTraits<ScalarType>::isComplex)
01858     return; 
01859 
01860   unsigned int lda = m+3;
01861   unsigned int incx = 2;
01862   unsigned int incy = 3;
01863   VectorType A1(lda*n,ndot), A2(lda*n,ndot), A3(lda*n,ndot), x(m*incx,ndot), 
01864     y(n*incy,ndot);
01865   for (unsigned int j=0; j<n; j++) {
01866     for (unsigned int i=0; i<lda; i++) {
01867       ScalarType val = urand.number();
01868       A1[i+j*lda] = FadType(ndot, val);
01869       A2[i+j*lda] = FadType(ndot, val);
01870       A3[i+j*lda] = FadType(ndot, val);
01871       for (unsigned int k=0; k<ndot; k++) {
01872         val = urand.number();
01873         A1[i+j*lda].fastAccessDx(k) = val;
01874         A2[i+j*lda].fastAccessDx(k) = val;
01875         A3[i+j*lda].fastAccessDx(k) = val;
01876       }
01877     }
01878   }
01879   for (unsigned int i=0; i<m*incx; i++) {
01880     x[i] = FadType(ndot, urand.number());
01881     for (unsigned int k=0; k<ndot; k++)
01882       x[i].fastAccessDx(k) = urand.number();
01883   }
01884   for (unsigned int i=0; i<n*incy; i++) {
01885     y[i] = FadType(ndot, urand.number());
01886     for (unsigned int k=0; k<ndot; k++)
01887       y[i].fastAccessDx(k) = urand.number();
01888   }
01889   FadType alpha(ndot, urand.number());
01890   for (unsigned int k=0; k<ndot; k++) {
01891     alpha.fastAccessDx(k) = urand.number();
01892   }
01893   
01894   Teuchos::BLAS<int,FadType> teuchos_blas;
01895   teuchos_blas.GER(m, n, alpha, &x[0], incx, &y[0], incy, &A1[0], lda);
01896 
01897   Teuchos::BLAS<int,FadType> sacado_blas(false);
01898   sacado_blas.GER(m, n, alpha, &x[0], incx, &y[0], incy, &A2[0], lda);
01899 
01900   COMPARE_FAD_VECTORS(A1, A2, lda*n);
01901 
01902   unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
01903   Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
01904   sacado_blas2.GER(m, n, alpha, &x[0], incx, &y[0], incy, &A3[0], lda);
01905 
01906   COMPARE_FAD_VECTORS(A1, A3, lda*n);
01907 }
01908 
01909 // Tests constant alpha
01910 template <class FadType, class ScalarType>
01911 void
01912 FadBLASUnitTests<FadType,ScalarType>::
01913 testGER3() {
01914 
01915   // GER is apparently not defined in the BLAS for complex types
01916   if (Teuchos::ScalarTraits<ScalarType>::isComplex)
01917     return; 
01918 
01919   VectorType A1(m*n,ndot), A2(m*n,ndot), A3(m*n,ndot), x(m,ndot), y(n,ndot);
01920   for (unsigned int j=0; j<n; j++) {
01921     for (unsigned int i=0; i<m; i++) {
01922       ScalarType val = urand.number();
01923       A1[i+j*m] = FadType(ndot, val);
01924       A2[i+j*m] = FadType(ndot, val);
01925       A3[i+j*m] = FadType(ndot, val);
01926       for (unsigned int k=0; k<ndot; k++) {
01927         val = urand.number();
01928         A1[i+j*m].fastAccessDx(k) = val;
01929         A2[i+j*m].fastAccessDx(k) = val;
01930         A3[i+j*m].fastAccessDx(k) = val;
01931       }
01932     }
01933   }
01934   for (unsigned int i=0; i<m; i++) {
01935     x[i] = FadType(ndot, urand.number());
01936     for (unsigned int k=0; k<ndot; k++)
01937       x[i].fastAccessDx(k) = urand.number();
01938   }
01939   for (unsigned int i=0; i<n; i++) {
01940     y[i] = FadType(ndot, urand.number());
01941     for (unsigned int k=0; k<ndot; k++)
01942       y[i].fastAccessDx(k) = urand.number();
01943   }
01944   ScalarType alpha = urand.number();
01945   
01946   Teuchos::BLAS<int,FadType> teuchos_blas;
01947   teuchos_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A1[0], m);
01948 
01949   Teuchos::BLAS<int,FadType> sacado_blas(false);
01950   sacado_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A2[0], m);
01951 
01952   COMPARE_FAD_VECTORS(A1, A2, m*n);
01953 
01954   unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
01955   Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
01956   sacado_blas2.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A3[0], m);
01957 
01958   COMPARE_FAD_VECTORS(A1, A3, m*n);
01959 }
01960 
01961 // Tests constant x
01962 template <class FadType, class ScalarType>
01963 void
01964 FadBLASUnitTests<FadType,ScalarType>::
01965 testGER4() {
01966 
01967   // GER is apparently not defined in the BLAS for complex types
01968   if (Teuchos::ScalarTraits<ScalarType>::isComplex)
01969     return; 
01970 
01971   VectorType A1(m*n,ndot), A2(m*n,ndot), A3(m*n,ndot), A4(m*n,ndot), 
01972     A5(m*n,ndot), x(m,ndot), y(n,ndot);
01973   std::vector<ScalarType> xx(m);
01974   for (unsigned int j=0; j<n; j++) {
01975     for (unsigned int i=0; i<m; i++) {
01976       ScalarType val = urand.number();
01977       A1[i+j*m] = FadType(ndot, val);
01978       A2[i+j*m] = FadType(ndot, val);
01979       A3[i+j*m] = FadType(ndot, val);
01980       A4[i+j*m] = FadType(ndot, val);
01981       A5[i+j*m] = FadType(ndot, val);
01982       for (unsigned int k=0; k<ndot; k++) {
01983         val = urand.number();
01984         A1[i+j*m].fastAccessDx(k) = val;
01985         A2[i+j*m].fastAccessDx(k) = val;
01986         A3[i+j*m].fastAccessDx(k) = val;
01987   A4[i+j*m].fastAccessDx(k) = val;
01988   A5[i+j*m].fastAccessDx(k) = val;
01989       }
01990     }
01991   }
01992   for (unsigned int i=0; i<m; i++) {
01993     xx[i] = urand.number();
01994     x[i] = xx[i];
01995   }
01996   for (unsigned int i=0; i<n; i++) {
01997     y[i] = FadType(ndot, urand.number());
01998     for (unsigned int k=0; k<ndot; k++)
01999       y[i].fastAccessDx(k) = urand.number();
02000   }
02001   FadType alpha(ndot, urand.number());
02002   for (unsigned int k=0; k<ndot; k++) {
02003     alpha.fastAccessDx(k) = urand.number();
02004   }
02005   
02006   Teuchos::BLAS<int,FadType> teuchos_blas;
02007   teuchos_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A1[0], m);
02008 
02009   Teuchos::BLAS<int,FadType> sacado_blas(false);
02010   sacado_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A2[0], m);
02011 
02012   COMPARE_FAD_VECTORS(A1, A2, m*n);
02013 
02014   unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m;
02015   Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
02016   sacado_blas2.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A3[0], m);
02017 
02018   COMPARE_FAD_VECTORS(A1, A3, m*n);
02019 
02020   sacado_blas.GER(m, n, alpha, &xx[0], 1, &y[0], 1, &A4[0], m);
02021 
02022   COMPARE_FAD_VECTORS(A1, A4, m*n);
02023 
02024   sacado_blas2.GER(m, n, alpha, &xx[0], 1, &y[0], 1, &A5[0], m);
02025 
02026   COMPARE_FAD_VECTORS(A1, A5, m*n);
02027 }
02028 
02029 // Tests constant y
02030 template <class FadType, class ScalarType>
02031 void
02032 FadBLASUnitTests<FadType,ScalarType>::
02033 testGER5() {
02034 
02035   // GER is apparently not defined in the BLAS for complex types
02036   if (Teuchos::ScalarTraits<ScalarType>::isComplex)
02037     return; 
02038 
02039   VectorType A1(m*n,ndot), A2(m*n,ndot), A3(m*n,ndot), A4(m*n,ndot), 
02040     A5(m*n,ndot), x(m,ndot), y(n,ndot);
02041   std::vector<ScalarType> yy(n);
02042   for (unsigned int j=0; j<n; j++) {
02043     for (unsigned int i=0; i<m; i++) {
02044       ScalarType val = urand.number();
02045       A1[i+j*m] = FadType(ndot, val);
02046       A2[i+j*m] = FadType(ndot, val);
02047       A3[i+j*m] = FadType(ndot, val);
02048       A4[i+j*m] = FadType(ndot, val);
02049       A5[i+j*m] = FadType(ndot, val);
02050       for (unsigned int k=0; k<ndot; k++) {
02051         val = urand.number();
02052         A1[i+j*m].fastAccessDx(k) = val;
02053         A2[i+j*m].fastAccessDx(k) = val;
02054         A3[i+j*m].fastAccessDx(k) = val;
02055   A4[i+j*m].fastAccessDx(k) = val;
02056   A5[i+j*m].fastAccessDx(k) = val;
02057       }
02058     }
02059   }
02060   for (unsigned int i=0; i<m; i++) {
02061     x[i] = FadType(ndot, urand.number());
02062     for (unsigned int k=0; k<ndot; k++)
02063       x[i].fastAccessDx(k) = urand.number();
02064   }
02065   for (unsigned int i=0; i<n; i++) {
02066     yy[i] = urand.number();
02067     y[i] = yy[i];
02068   }
02069   FadType alpha(ndot, urand.number());
02070   for (unsigned int k=0; k<ndot; k++) {
02071     alpha.fastAccessDx(k) = urand.number();
02072   }
02073   
02074   Teuchos::BLAS<int,FadType> teuchos_blas;
02075   teuchos_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A1[0], m);
02076 
02077   Teuchos::BLAS<int,FadType> sacado_blas(false);
02078   sacado_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A2[0], m);
02079 
02080   COMPARE_FAD_VECTORS(A1, A2, m*n);
02081 
02082   unsigned int sz = m*n*(1+ndot) + m*(1+ndot) + n;
02083   Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
02084   sacado_blas2.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A3[0], m);
02085 
02086   COMPARE_FAD_VECTORS(A1, A3, m*n);
02087 
02088   sacado_blas.GER(m, n, alpha, &x[0], 1, &yy[0], 1, &A4[0], m);
02089 
02090   COMPARE_FAD_VECTORS(A1, A4, m*n);
02091 
02092   sacado_blas2.GER(m, n, alpha, &x[0], 1, &yy[0], 1, &A5[0], m);
02093 
02094   COMPARE_FAD_VECTORS(A1, A5, m*n);
02095 }
02096 
02097 // Tests constant x and y
02098 template <class FadType, class ScalarType>
02099 void
02100 FadBLASUnitTests<FadType,ScalarType>::
02101 testGER6() {
02102 
02103   // GER is apparently not defined in the BLAS for complex types
02104   if (Teuchos::ScalarTraits<ScalarType>::isComplex)
02105     return; 
02106 
02107   VectorType A1(m*n,ndot), A2(m*n,ndot), A3(m*n,ndot), A4(m*n,ndot), 
02108     A5(m*n,ndot), x(m,ndot), y(n,ndot);
02109   std::vector<ScalarType> xx(n), yy(n);
02110   for (unsigned int j=0; j<n; j++) {
02111     for (unsigned int i=0; i<m; i++) {
02112       ScalarType val = urand.number();
02113       A1[i+j*m] = FadType(ndot, val);
02114       A2[i+j*m] = FadType(ndot, val);
02115       A3[i+j*m] = FadType(ndot, val);
02116       A4[i+j*m] = FadType(ndot, val);
02117       A5[i+j*m] = FadType(ndot, val);
02118       for (unsigned int k=0; k<ndot; k++) {
02119         val = urand.number();
02120         A1[i+j*m].fastAccessDx(k) = val;
02121         A2[i+j*m].fastAccessDx(k) = val;
02122         A3[i+j*m].fastAccessDx(k) = val;
02123   A4[i+j*m].fastAccessDx(k) = val;
02124   A5[i+j*m].fastAccessDx(k) = val;
02125       }
02126     }
02127   }
02128   for (unsigned int i=0; i<m; i++) {
02129     xx[i] = urand.number();
02130     x[i] = xx[i];
02131   }
02132   for (unsigned int i=0; i<n; i++) {
02133     yy[i] = urand.number();
02134     y[i] = yy[i];
02135   }
02136   FadType alpha(ndot, urand.number());
02137   for (unsigned int k=0; k<ndot; k++) {
02138     alpha.fastAccessDx(k) = urand.number();
02139   }
02140   
02141   Teuchos::BLAS<int,FadType> teuchos_blas;
02142   teuchos_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A1[0], m);
02143 
02144   Teuchos::BLAS<int,FadType> sacado_blas(false);
02145   sacado_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A2[0], m);
02146 
02147   COMPARE_FAD_VECTORS(A1, A2, m*n);
02148 
02149   unsigned int sz = m*n*(1+ndot) + m + n;
02150   Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
02151   sacado_blas2.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A3[0], m);
02152 
02153   COMPARE_FAD_VECTORS(A1, A3, m*n);
02154 
02155   sacado_blas.GER(m, n, alpha, &xx[0], 1, &yy[0], 1, &A4[0], m);
02156 
02157   COMPARE_FAD_VECTORS(A1, A4, m*n);
02158 
02159   sacado_blas2.GER(m, n, alpha, &xx[0], 1, &yy[0], 1, &A5[0], m);
02160 
02161   COMPARE_FAD_VECTORS(A1, A5, m*n);
02162 }
02163 
02164 // Tests constant A
02165 template <class FadType, class ScalarType>
02166 void
02167 FadBLASUnitTests<FadType,ScalarType>::
02168 testGER7() {
02169 
02170   // GER is apparently not defined in the BLAS for complex types
02171   if (Teuchos::ScalarTraits<ScalarType>::isComplex)
02172     return; 
02173 
02174   VectorType A1(m*n,ndot), A2(m*n,ndot), A3(m*n,ndot), x(m,ndot), y(n,ndot);
02175   for (unsigned int j=0; j<n; j++) {
02176     for (unsigned int i=0; i<m; i++) {
02177       ScalarType val = urand.number();
02178       A1[i+j*m] = val;
02179       A2[i+j*m] = val;
02180       A3[i+j*m] = val;
02181     }
02182   }
02183   for (unsigned int i=0; i<m; i++) {
02184     x[i] = FadType(ndot, urand.number());
02185     for (unsigned int k=0; k<ndot; k++)
02186       x[i].fastAccessDx(k) = urand.number();
02187   }
02188   for (unsigned int i=0; i<n; i++) {
02189     y[i] = FadType(ndot, urand.number());
02190     for (unsigned int k=0; k<ndot; k++)
02191       y[i].fastAccessDx(k) = urand.number();
02192   }
02193   FadType alpha(ndot, urand.number());
02194   for (unsigned int k=0; k<ndot; k++) {
02195     alpha.fastAccessDx(k) = urand.number();
02196   }
02197   
02198   Teuchos::BLAS<int,FadType> teuchos_blas;
02199   teuchos_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A1[0], m);
02200 
02201   Teuchos::BLAS<int,FadType> sacado_blas(false);
02202   sacado_blas.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A2[0], m);
02203 
02204   COMPARE_FAD_VECTORS(A1, A2, m*n);
02205 
02206   unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
02207   Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
02208   sacado_blas2.GER(m, n, alpha, &x[0], 1, &y[0], 1, &A3[0], m);
02209 
02210   COMPARE_FAD_VECTORS(A1, A3, m*n);
02211 }
02212 
02213 // Tests all arguments
02214 template <class FadType, class ScalarType>
02215 void
02216 FadBLASUnitTests<FadType,ScalarType>::
02217 testGEMM1() {
02218   VectorType A(m*l,ndot), B(l*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
02219   for (unsigned int j=0; j<l; j++) {
02220     for (unsigned int i=0; i<m; i++) {
02221       A[i+j*m] = FadType(ndot, urand.number());
02222       for (unsigned int k=0; k<ndot; k++)
02223         A[i+j*m].fastAccessDx(k) = urand.number();
02224     }
02225   }
02226   for (unsigned int j=0; j<n; j++) {
02227     for (unsigned int i=0; i<l; i++) {
02228       B[i+j*l] = FadType(ndot, urand.number());
02229       for (unsigned int k=0; k<ndot; k++)
02230   B[i+j*l].fastAccessDx(k) = urand.number();
02231     }
02232   }
02233   FadType alpha(ndot, urand.number());
02234   FadType beta(ndot, urand.number());
02235   for (unsigned int k=0; k<ndot; k++) {
02236     alpha.fastAccessDx(k) = urand.number();
02237     beta.fastAccessDx(k) = urand.number();
02238   }
02239 
02240   for (unsigned int j=0; j<n; j++) {
02241     for (unsigned int i=0; i<m; i++) {
02242       ScalarType val = urand.number();
02243       C1[i+j*m] = FadType(ndot, val);
02244       C2[i+j*m] = FadType(ndot, val);
02245       C3[i+j*m] = FadType(ndot, val);
02246       for (unsigned int k=0; k<ndot; k++) {
02247   val = urand.number();
02248   C1[i+j*m].fastAccessDx(k) = val;
02249   C2[i+j*m].fastAccessDx(k) = val;
02250   C3[i+j*m].fastAccessDx(k) = val;
02251       } 
02252     }
02253   }
02254   
02255   Teuchos::BLAS<int,FadType> teuchos_blas;
02256   teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha, 
02257         &A[0], m, &B[0], l, beta, &C1[0], m);
02258 
02259   Teuchos::BLAS<int,FadType> sacado_blas(false);
02260   sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha, 
02261         &A[0], m, &B[0], l, beta, &C2[0], m);
02262 
02263   COMPARE_FAD_VECTORS(C1, C2, m*n);
02264 
02265   unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
02266   Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
02267   sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha, 
02268         &A[0], m, &B[0], l, beta, &C3[0], m);
02269 
02270   COMPARE_FAD_VECTORS(C1, C3, m*n);
02271 
02272   // transa
02273   teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha, 
02274         &A[0], l, &B[0], l, beta, &C1[0], m);
02275   sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha, 
02276         &A[0], l, &B[0], l, beta, &C2[0], m);
02277   sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha, 
02278         &A[0], l, &B[0], l, beta, &C3[0], m);
02279 
02280   COMPARE_FAD_VECTORS(C1, C2, m*n);
02281   COMPARE_FAD_VECTORS(C1, C3, m*n);
02282 
02283   // transb
02284   teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha, 
02285         &A[0], m, &B[0], n, beta, &C1[0], m);
02286   sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha, 
02287         &A[0], m, &B[0], n, beta, &C2[0], m);
02288   sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha, 
02289         &A[0], m, &B[0], n, beta, &C3[0], m);
02290 
02291   COMPARE_FAD_VECTORS(C1, C2, m*n);
02292   COMPARE_FAD_VECTORS(C1, C3, m*n);
02293 
02294   // transa and transb
02295   teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha, 
02296         &A[0], l, &B[0], n, beta, &C1[0], m);
02297   sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha, 
02298         &A[0], l, &B[0], n, beta, &C2[0], m);
02299   sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha, 
02300         &A[0], l, &B[0], n, beta, &C3[0], m);
02301 
02302   COMPARE_FAD_VECTORS(C1, C2, m*n);
02303   COMPARE_FAD_VECTORS(C1, C3, m*n);
02304 }
02305 
02306 // Tests different lda, ldb, ldc
02307 template <class FadType, class ScalarType>
02308 void
02309 FadBLASUnitTests<FadType,ScalarType>::
02310 testGEMM2() {
02311   unsigned int lda = m+4;
02312   unsigned int ldb = l+4;
02313   unsigned int ldc = m+5;
02314   VectorType A(lda*l,ndot), B(ldb*n,ndot), C1(ldc*n,ndot), C2(ldc*n,ndot), 
02315     C3(ldc*n,ndot);
02316   for (unsigned int j=0; j<l; j++) {
02317     for (unsigned int i=0; i<lda; i++) {
02318       A[i+j*lda] = FadType(ndot, urand.number());
02319       for (unsigned int k=0; k<ndot; k++)
02320         A[i+j*lda].fastAccessDx(k) = urand.number();
02321     }
02322   }
02323   for (unsigned int j=0; j<n; j++) {
02324     for (unsigned int i=0; i<ldb; i++) {
02325       B[i+j*ldb] = FadType(ndot, urand.number());
02326       for (unsigned int k=0; k<ndot; k++)
02327   B[i+j*ldb].fastAccessDx(k) = urand.number();
02328     }
02329   }
02330   FadType alpha(ndot, urand.number());
02331   FadType beta(ndot, urand.number());
02332   for (unsigned int k=0; k<ndot; k++) {
02333     alpha.fastAccessDx(k) = urand.number();
02334     beta.fastAccessDx(k) = urand.number();
02335   }
02336 
02337   for (unsigned int j=0; j<n; j++) {
02338     for (unsigned int i=0; i<ldc; i++) {
02339       ScalarType val = urand.number();
02340       C1[i+j*ldc] = FadType(ndot, val);
02341       C2[i+j*ldc] = FadType(ndot, val);
02342       C3[i+j*ldc] = FadType(ndot, val);
02343       for (unsigned int k=0; k<ndot; k++) {
02344   val = urand.number();
02345   C1[i+j*ldc].fastAccessDx(k) = val;
02346   C2[i+j*ldc].fastAccessDx(k) = val;
02347   C3[i+j*ldc].fastAccessDx(k) = val;
02348       } 
02349     }
02350   }
02351   
02352   Teuchos::BLAS<int,FadType> teuchos_blas;
02353   teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha, 
02354         &A[0], lda, &B[0], ldb, beta, &C1[0], ldc);
02355 
02356   Teuchos::BLAS<int,FadType> sacado_blas(false);
02357   sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha, 
02358         &A[0], lda, &B[0], ldb, beta, &C2[0], ldc);
02359 
02360   COMPARE_FAD_VECTORS(C1, C2, ldc*n);
02361 
02362   unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
02363   Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
02364   sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha, 
02365         &A[0], lda, &B[0], ldb, beta, &C3[0], ldc);
02366 
02367   COMPARE_FAD_VECTORS(C1, C3, ldc*n);
02368 }
02369 
02370 // Tests different lda, ldb, ldc with transa
02371 template <class FadType, class ScalarType>
02372 void
02373 FadBLASUnitTests<FadType,ScalarType>::
02374 testGEMM3() {
02375   unsigned int lda = l+3;
02376   unsigned int ldb = l+4;
02377   unsigned int ldc = m+5;
02378   VectorType A(lda*m,ndot), B(ldb*n,ndot), C1(ldc*n,ndot), C2(ldc*n,ndot), 
02379     C3(ldc*n,ndot);
02380   for (unsigned int j=0; j<m; j++) {
02381     for (unsigned int i=0; i<lda; i++) {
02382       A[i+j*lda] = FadType(ndot, urand.number());
02383       for (unsigned int k=0; k<ndot; k++)
02384         A[i+j*lda].fastAccessDx(k) = urand.number();
02385     }
02386   }
02387   for (unsigned int j=0; j<n; j++) {
02388     for (unsigned int i=0; i<ldb; i++) {
02389       B[i+j*ldb] = FadType(ndot, urand.number());
02390       for (unsigned int k=0; k<ndot; k++)
02391   B[i+j*ldb].fastAccessDx(k) = urand.number();
02392     }
02393   }
02394   FadType alpha(ndot, urand.number());
02395   FadType beta(ndot, urand.number());
02396   for (unsigned int k=0; k<ndot; k++) {
02397     alpha.fastAccessDx(k) = urand.number();
02398     beta.fastAccessDx(k) = urand.number();
02399   }
02400 
02401   for (unsigned int j=0; j<n; j++) {
02402     for (unsigned int i=0; i<ldc; i++) {
02403       ScalarType val = urand.number();
02404       C1[i+j*ldc] = FadType(ndot, val);
02405       C2[i+j*ldc] = FadType(ndot, val);
02406       C3[i+j*ldc] = FadType(ndot, val);
02407       for (unsigned int k=0; k<ndot; k++) {
02408   val = urand.number();
02409   C1[i+j*ldc].fastAccessDx(k) = val;
02410   C2[i+j*ldc].fastAccessDx(k) = val;
02411   C3[i+j*ldc].fastAccessDx(k) = val;
02412       } 
02413     }
02414   }
02415   
02416   Teuchos::BLAS<int,FadType> teuchos_blas;
02417   teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha, 
02418         &A[0], lda, &B[0], ldb, beta, &C1[0], ldc);
02419 
02420   Teuchos::BLAS<int,FadType> sacado_blas(false);
02421   sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha, 
02422         &A[0], lda, &B[0], ldb, beta, &C2[0], ldc);
02423 
02424   COMPARE_FAD_VECTORS(C1, C2, ldc*n);
02425 
02426   unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
02427   Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
02428   sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha, 
02429         &A[0], lda, &B[0], ldb, beta, &C3[0], ldc);
02430 
02431   COMPARE_FAD_VECTORS(C1, C3, ldc*n);
02432 }
02433 
02434 // Tests different lda, ldb, ldc with transb
02435 template <class FadType, class ScalarType>
02436 void
02437 FadBLASUnitTests<FadType,ScalarType>::
02438 testGEMM4() {
02439   unsigned int lda = m+4;
02440   unsigned int ldb = n+4;
02441   unsigned int ldc = m+5;
02442   VectorType A(lda*l,ndot), B(ldb*l,ndot), C1(ldc*n,ndot), C2(ldc*n,ndot), 
02443     C3(ldc*n,ndot);
02444   for (unsigned int j=0; j<l; j++) {
02445     for (unsigned int i=0; i<lda; i++) {
02446       A[i+j*lda] = FadType(ndot, urand.number());
02447       for (unsigned int k=0; k<ndot; k++)
02448         A[i+j*lda].fastAccessDx(k) = urand.number();
02449     }
02450   }
02451   for (unsigned int j=0; j<l; j++) {
02452     for (unsigned int i=0; i<ldb; i++) {
02453       B[i+j*ldb] = FadType(ndot, urand.number());
02454       for (unsigned int k=0; k<ndot; k++)
02455   B[i+j*ldb].fastAccessDx(k) = urand.number();
02456     }
02457   }
02458   FadType alpha(ndot, urand.number());
02459   FadType beta(ndot, urand.number());
02460   for (unsigned int k=0; k<ndot; k++) {
02461     alpha.fastAccessDx(k) = urand.number();
02462     beta.fastAccessDx(k) = urand.number();
02463   }
02464 
02465   for (unsigned int j=0; j<n; j++) {
02466     for (unsigned int i=0; i<ldc; i++) {
02467       ScalarType val = urand.number();
02468       C1[i+j*ldc] = FadType(ndot, val);
02469       C2[i+j*ldc] = FadType(ndot, val);
02470       C3[i+j*ldc] = FadType(ndot, val);
02471       for (unsigned int k=0; k<ndot; k++) {
02472   val = urand.number();
02473   C1[i+j*ldc].fastAccessDx(k) = val;
02474   C2[i+j*ldc].fastAccessDx(k) = val;
02475   C3[i+j*ldc].fastAccessDx(k) = val;
02476       } 
02477     }
02478   }
02479   
02480   Teuchos::BLAS<int,FadType> teuchos_blas;
02481   teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha, 
02482         &A[0], lda, &B[0], ldb, beta, &C1[0], ldc);
02483 
02484   Teuchos::BLAS<int,FadType> sacado_blas(false);
02485   sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha, 
02486         &A[0], lda, &B[0], ldb, beta, &C2[0], ldc);
02487 
02488   COMPARE_FAD_VECTORS(C1, C2, ldc*n);
02489 
02490   unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
02491   Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
02492   sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha, 
02493         &A[0], lda, &B[0], ldb, beta, &C3[0], ldc);
02494 
02495   COMPARE_FAD_VECTORS(C1, C3, ldc*n);
02496 }
02497 
02498 // Tests different lda, ldb, ldc with transa and transb
02499 template <class FadType, class ScalarType>
02500 void
02501 FadBLASUnitTests<FadType,ScalarType>::
02502 testGEMM5() {
02503   unsigned int lda = l+3;
02504   unsigned int ldb = n+4;
02505   unsigned int ldc = m+5;
02506   VectorType A(lda*m,ndot), B(ldb*l,ndot), C1(ldc*n,ndot), C2(ldc*n,ndot), 
02507     C3(ldc*n,ndot);
02508   for (unsigned int j=0; j<m; j++) {
02509     for (unsigned int i=0; i<lda; i++) {
02510       A[i+j*lda] = FadType(ndot, urand.number());
02511       for (unsigned int k=0; k<ndot; k++)
02512         A[i+j*lda].fastAccessDx(k) = urand.number();
02513     }
02514   }
02515   for (unsigned int j=0; j<l; j++) {
02516     for (unsigned int i=0; i<ldb; i++) {
02517       B[i+j*ldb] = FadType(ndot, urand.number());
02518       for (unsigned int k=0; k<ndot; k++)
02519   B[i+j*ldb].fastAccessDx(k) = urand.number();
02520     }
02521   }
02522   FadType alpha(ndot, urand.number());
02523   FadType beta(ndot, urand.number());
02524   for (unsigned int k=0; k<ndot; k++) {
02525     alpha.fastAccessDx(k) = urand.number();
02526     beta.fastAccessDx(k) = urand.number();
02527   }
02528 
02529   for (unsigned int j=0; j<n; j++) {
02530     for (unsigned int i=0; i<ldc; i++) {
02531       ScalarType val = urand.number();
02532       C1[i+j*ldc] = FadType(ndot, val);
02533       C2[i+j*ldc] = FadType(ndot, val);
02534       C3[i+j*ldc] = FadType(ndot, val);
02535       for (unsigned int k=0; k<ndot; k++) {
02536   val = urand.number();
02537   C1[i+j*ldc].fastAccessDx(k) = val;
02538   C2[i+j*ldc].fastAccessDx(k) = val;
02539   C3[i+j*ldc].fastAccessDx(k) = val;
02540       } 
02541     }
02542   }
02543   
02544   Teuchos::BLAS<int,FadType> teuchos_blas;
02545   teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha, 
02546         &A[0], lda, &B[0], ldb, beta, &C1[0], ldc);
02547 
02548   Teuchos::BLAS<int,FadType> sacado_blas(false);
02549   sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha, 
02550         &A[0], lda, &B[0], ldb, beta, &C2[0], ldc);
02551 
02552   COMPARE_FAD_VECTORS(C1, C2, ldc*n);
02553 
02554   unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
02555   Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
02556   sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha, 
02557         &A[0], lda, &B[0], ldb, beta, &C3[0], ldc);
02558 
02559   COMPARE_FAD_VECTORS(C1, C3, ldc*n);
02560 }
02561 
02562 // Tests with constant C
02563 template <class FadType, class ScalarType>
02564 void
02565 FadBLASUnitTests<FadType,ScalarType>::
02566 testGEMM6() {
02567   VectorType A(m*l,ndot), B(l*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
02568   for (unsigned int j=0; j<l; j++) {
02569     for (unsigned int i=0; i<m; i++) {
02570       A[i+j*m] = FadType(ndot, urand.number());
02571       for (unsigned int k=0; k<ndot; k++)
02572         A[i+j*m].fastAccessDx(k) = urand.number();
02573     }
02574   }
02575   for (unsigned int j=0; j<n; j++) {
02576     for (unsigned int i=0; i<l; i++) {
02577       B[i+j*l] = FadType(ndot, urand.number());
02578       for (unsigned int k=0; k<ndot; k++)
02579   B[i+j*l].fastAccessDx(k) = urand.number();
02580     }
02581   }
02582   FadType alpha(ndot, urand.number());
02583   FadType beta(ndot, urand.number());
02584   for (unsigned int k=0; k<ndot; k++) {
02585     alpha.fastAccessDx(k) = urand.number();
02586     beta.fastAccessDx(k) = urand.number();
02587   }
02588 
02589   for (unsigned int j=0; j<n; j++) {
02590     for (unsigned int i=0; i<m; i++) {
02591       ScalarType val = urand.number();
02592       C1[i+j*m] = val;
02593       C2[i+j*m] = val;
02594       C3[i+j*m] = val;
02595     }
02596   }
02597   
02598   Teuchos::BLAS<int,FadType> teuchos_blas;
02599   teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha, 
02600         &A[0], m, &B[0], l, beta, &C1[0], m);
02601 
02602   Teuchos::BLAS<int,FadType> sacado_blas(false);
02603   sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha, 
02604         &A[0], m, &B[0], l, beta, &C2[0], m);
02605 
02606   COMPARE_FAD_VECTORS(C1, C2, m*n);
02607 
02608   unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
02609   Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
02610   sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha, 
02611         &A[0], m, &B[0], l, beta, &C3[0], m);
02612 
02613   COMPARE_FAD_VECTORS(C1, C3, m*n);
02614 
02615   // transa
02616   teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha, 
02617         &A[0], l, &B[0], l, beta, &C1[0], m);
02618   sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha, 
02619         &A[0], l, &B[0], l, beta, &C2[0], m);
02620   sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha, 
02621         &A[0], l, &B[0], l, beta, &C3[0], m);
02622 
02623   COMPARE_FAD_VECTORS(C1, C2, m*n);
02624   COMPARE_FAD_VECTORS(C1, C3, m*n);
02625 
02626   // transb
02627   teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha, 
02628         &A[0], m, &B[0], n, beta, &C1[0], m);
02629   sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha, 
02630         &A[0], m, &B[0], n, beta, &C2[0], m);
02631   sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha, 
02632         &A[0], m, &B[0], n, beta, &C3[0], m);
02633 
02634   COMPARE_FAD_VECTORS(C1, C2, m*n);
02635   COMPARE_FAD_VECTORS(C1, C3, m*n);
02636 
02637   // transa and transb
02638   teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha, 
02639         &A[0], l, &B[0], n, beta, &C1[0], m);
02640   sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha, 
02641         &A[0], l, &B[0], n, beta, &C2[0], m);
02642   sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha, 
02643         &A[0], l, &B[0], n, beta, &C3[0], m);
02644 
02645   COMPARE_FAD_VECTORS(C1, C2, m*n);
02646   COMPARE_FAD_VECTORS(C1, C3, m*n);
02647 }
02648 
02649 // Tests with constant alpha, beta
02650 template <class FadType, class ScalarType>
02651 void
02652 FadBLASUnitTests<FadType,ScalarType>::
02653 testGEMM7() {
02654   VectorType A(m*l,ndot), B(l*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
02655   for (unsigned int j=0; j<l; j++) {
02656     for (unsigned int i=0; i<m; i++) {
02657       A[i+j*m] = FadType(ndot, urand.number());
02658       for (unsigned int k=0; k<ndot; k++)
02659         A[i+j*m].fastAccessDx(k) = urand.number();
02660     }
02661   }
02662   for (unsigned int j=0; j<n; j++) {
02663     for (unsigned int i=0; i<l; i++) {
02664       B[i+j*l] = FadType(ndot, urand.number());
02665       for (unsigned int k=0; k<ndot; k++)
02666   B[i+j*l].fastAccessDx(k) = urand.number();
02667     }
02668   }
02669   ScalarType alpha = urand.number();
02670   ScalarType beta = urand.number();
02671 
02672   for (unsigned int j=0; j<n; j++) {
02673     for (unsigned int i=0; i<m; i++) {
02674       ScalarType val = urand.number();
02675       C1[i+j*m] = FadType(ndot, val);
02676       C2[i+j*m] = FadType(ndot, val);
02677       C3[i+j*m] = FadType(ndot, val);
02678       for (unsigned int k=0; k<ndot; k++) {
02679   val = urand.number();
02680   C1[i+j*m].fastAccessDx(k) = val;
02681   C2[i+j*m].fastAccessDx(k) = val;
02682   C3[i+j*m].fastAccessDx(k) = val;
02683       }
02684     }
02685   }
02686   
02687   Teuchos::BLAS<int,FadType> teuchos_blas;
02688   teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha, 
02689         &A[0], m, &B[0], l, beta, &C1[0], m);
02690 
02691   Teuchos::BLAS<int,FadType> sacado_blas(false);
02692   sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha, 
02693         &A[0], m, &B[0], l, beta, &C2[0], m);
02694 
02695   COMPARE_FAD_VECTORS(C1, C2, m*n);
02696 
02697   unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
02698   Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
02699   sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha, 
02700         &A[0], m, &B[0], l, beta, &C3[0], m);
02701 
02702   COMPARE_FAD_VECTORS(C1, C3, m*n);
02703 
02704   // transa
02705   teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha, 
02706         &A[0], l, &B[0], l, beta, &C1[0], m);
02707   sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha, 
02708         &A[0], l, &B[0], l, beta, &C2[0], m);
02709   sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha, 
02710         &A[0], l, &B[0], l, beta, &C3[0], m);
02711 
02712   COMPARE_FAD_VECTORS(C1, C2, m*n);
02713   COMPARE_FAD_VECTORS(C1, C3, m*n);
02714 
02715   // transb
02716   teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha, 
02717         &A[0], m, &B[0], n, beta, &C1[0], m);
02718   sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha, 
02719         &A[0], m, &B[0], n, beta, &C2[0], m);
02720   sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha, 
02721         &A[0], m, &B[0], n, beta, &C3[0], m);
02722 
02723   COMPARE_FAD_VECTORS(C1, C2, m*n);
02724   COMPARE_FAD_VECTORS(C1, C3, m*n);
02725 
02726   // transa and transb
02727   teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha, 
02728         &A[0], l, &B[0], n, beta, &C1[0], m);
02729   sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha, 
02730         &A[0], l, &B[0], n, beta, &C2[0], m);
02731   sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha, 
02732         &A[0], l, &B[0], n, beta, &C3[0], m);
02733 
02734   COMPARE_FAD_VECTORS(C1, C2, m*n);
02735   COMPARE_FAD_VECTORS(C1, C3, m*n);
02736 }
02737 
02738 // Tests with constant A
02739 template <class FadType, class ScalarType>
02740 void
02741 FadBLASUnitTests<FadType,ScalarType>::
02742 testGEMM8() {
02743   VectorType A(m*l,ndot), B(l*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot),
02744     C4(m*n,ndot), C5(m*n,ndot);
02745   std::vector<ScalarType> a(m*l);
02746   for (unsigned int j=0; j<l; j++) {
02747     for (unsigned int i=0; i<m; i++) {
02748       a[i+j*m] = urand.number();
02749       A[i+j*m] = a[i+j*m];
02750     }
02751   }
02752   for (unsigned int j=0; j<n; j++) {
02753     for (unsigned int i=0; i<l; i++) {
02754       B[i+j*l] = FadType(ndot, urand.number());
02755       for (unsigned int k=0; k<ndot; k++)
02756   B[i+j*l].fastAccessDx(k) = urand.number();
02757     }
02758   }
02759   FadType alpha(ndot, urand.number());
02760   FadType beta(ndot, urand.number());
02761   for (unsigned int k=0; k<ndot; k++) {
02762     alpha.fastAccessDx(k) = urand.number();
02763     beta.fastAccessDx(k) = urand.number();
02764   }
02765 
02766   for (unsigned int j=0; j<n; j++) {
02767     for (unsigned int i=0; i<m; i++) {
02768       ScalarType val = urand.number();
02769       C1[i+j*m] = FadType(ndot, val);
02770       C2[i+j*m] = FadType(ndot, val);
02771       C3[i+j*m] = FadType(ndot, val);
02772       C4[i+j*m] = FadType(ndot, val);
02773       C5[i+j*m] = FadType(ndot, val);
02774       for (unsigned int k=0; k<ndot; k++) {
02775   val = urand.number();
02776   C1[i+j*m].fastAccessDx(k) = val;
02777   C2[i+j*m].fastAccessDx(k) = val;
02778   C3[i+j*m].fastAccessDx(k) = val;
02779   C4[i+j*m].fastAccessDx(k) = val;
02780   C5[i+j*m].fastAccessDx(k) = val;
02781       }
02782     }
02783   }
02784   
02785   Teuchos::BLAS<int,FadType> teuchos_blas;
02786   teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha, 
02787         &A[0], m, &B[0], l, beta, &C1[0], m);
02788 
02789   Teuchos::BLAS<int,FadType> sacado_blas(false);
02790   sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha, 
02791         &A[0], m, &B[0], l, beta, &C2[0], m);
02792 
02793   COMPARE_FAD_VECTORS(C1, C2, m*n);
02794 
02795   unsigned int sz = m*l + l*n*(1+ndot) + m*n*(1+ndot);
02796   Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
02797   sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha, 
02798         &A[0], m, &B[0], l, beta, &C3[0], m);
02799 
02800   COMPARE_FAD_VECTORS(C1, C3, m*n);
02801 
02802   sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha, 
02803         &a[0], m, &B[0], l, beta, &C4[0], m);
02804 
02805   COMPARE_FAD_VECTORS(C1, C4, m*n);
02806 
02807   sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha, 
02808         &a[0], m, &B[0], l, beta, &C5[0], m);
02809 
02810   COMPARE_FAD_VECTORS(C1, C5, m*n);
02811 
02812   // transa
02813   teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha, 
02814         &A[0], l, &B[0], l, beta, &C1[0], m);
02815   sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha, 
02816         &A[0], l, &B[0], l, beta, &C2[0], m);
02817   sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha, 
02818         &A[0], l, &B[0], l, beta, &C3[0], m);
02819   sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha, 
02820         &a[0], l, &B[0], l, beta, &C4[0], m);
02821   sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha, 
02822         &a[0], l, &B[0], l, beta, &C5[0], m);
02823 
02824   COMPARE_FAD_VECTORS(C1, C2, m*n);
02825   COMPARE_FAD_VECTORS(C1, C3, m*n);
02826   COMPARE_FAD_VECTORS(C1, C4, m*n);
02827   COMPARE_FAD_VECTORS(C1, C5, m*n);
02828 
02829   // transb
02830   teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha, 
02831         &A[0], m, &B[0], n, beta, &C1[0], m);
02832   sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha, 
02833         &A[0], m, &B[0], n, beta, &C2[0], m);
02834   sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha, 
02835         &A[0], m, &B[0], n, beta, &C3[0], m);
02836   sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha, 
02837         &a[0], m, &B[0], n, beta, &C4[0], m);
02838   sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha, 
02839         &a[0], m, &B[0], n, beta, &C5[0], m);
02840 
02841   COMPARE_FAD_VECTORS(C1, C2, m*n);
02842   COMPARE_FAD_VECTORS(C1, C3, m*n);
02843   COMPARE_FAD_VECTORS(C1, C4, m*n);
02844   COMPARE_FAD_VECTORS(C1, C5, m*n);
02845 
02846   // transa and transb
02847   teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha, 
02848         &A[0], l, &B[0], n, beta, &C1[0], m);
02849   sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha, 
02850         &A[0], l, &B[0], n, beta, &C2[0], m);
02851   sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha, 
02852         &A[0], l, &B[0], n, beta, &C3[0], m);
02853   sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha, 
02854         &a[0], l, &B[0], n, beta, &C4[0], m);
02855   sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha, 
02856         &a[0], l, &B[0], n, beta, &C5[0], m);
02857 
02858   COMPARE_FAD_VECTORS(C1, C2, m*n);
02859   COMPARE_FAD_VECTORS(C1, C3, m*n);
02860   COMPARE_FAD_VECTORS(C1, C4, m*n);
02861   COMPARE_FAD_VECTORS(C1, C5, m*n);
02862 }
02863 
02864 // Tests with constant B
02865 template <class FadType, class ScalarType>
02866 void
02867 FadBLASUnitTests<FadType,ScalarType>::
02868 testGEMM9() {
02869   VectorType A(m*l,ndot), B(l*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot),
02870     C4(m*n,ndot), C5(m*n,ndot);
02871   std::vector<ScalarType> b(l*n);
02872   for (unsigned int j=0; j<l; j++) {
02873     for (unsigned int i=0; i<m; i++) {
02874       A[i+j*m] = FadType(ndot, urand.number());
02875       for (unsigned int k=0; k<ndot; k++)
02876         A[i+j*m].fastAccessDx(k) = urand.number();
02877     }
02878   }
02879   for (unsigned int j=0; j<n; j++) {
02880     for (unsigned int i=0; i<l; i++) {
02881       b[i+j*l] = urand.number();
02882       B[i+j*l] = b[i+j*l];
02883     }
02884   }
02885   FadType alpha(ndot, urand.number());
02886   FadType beta(ndot, urand.number());
02887   for (unsigned int k=0; k<ndot; k++) {
02888     alpha.fastAccessDx(k) = urand.number();
02889     beta.fastAccessDx(k) = urand.number();
02890   }
02891 
02892   for (unsigned int j=0; j<n; j++) {
02893     for (unsigned int i=0; i<m; i++) {
02894       ScalarType val = urand.number();
02895       C1[i+j*m] = FadType(ndot, val);
02896       C2[i+j*m] = FadType(ndot, val);
02897       C3[i+j*m] = FadType(ndot, val);
02898       C4[i+j*m] = FadType(ndot, val);
02899       C5[i+j*m] = FadType(ndot, val);
02900       for (unsigned int k=0; k<ndot; k++) {
02901       val = urand.number();
02902       C1[i+j*m].fastAccessDx(k) = val;
02903       C2[i+j*m].fastAccessDx(k) = val;
02904       C3[i+j*m].fastAccessDx(k) = val;
02905       C4[i+j*m].fastAccessDx(k) = val;
02906       C5[i+j*m].fastAccessDx(k) = val;
02907       } 
02908     }
02909   }
02910   
02911   Teuchos::BLAS<int,FadType> teuchos_blas;
02912   teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha, 
02913         &A[0], m, &B[0], l, beta, &C1[0], m);
02914 
02915   Teuchos::BLAS<int,FadType> sacado_blas(false);
02916   sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha, 
02917         &A[0], m, &B[0], l, beta, &C2[0], m);
02918 
02919   COMPARE_FAD_VECTORS(C1, C2, m*n);
02920 
02921   unsigned int sz = m*l*(1+ndot) + l*n + m*n*(1+ndot);
02922   Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
02923   sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha, 
02924         &A[0], m, &B[0], l, beta, &C3[0], m);
02925 
02926   COMPARE_FAD_VECTORS(C1, C3, m*n);
02927 
02928   sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha, 
02929         &A[0], m, &b[0], l, beta, &C4[0], m);
02930 
02931   COMPARE_FAD_VECTORS(C1, C4, m*n);
02932 
02933   sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha, 
02934         &A[0], m, &b[0], l, beta, &C5[0], m);
02935 
02936   COMPARE_FAD_VECTORS(C1, C5, m*n);
02937 
02938   // transa
02939   teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha, 
02940         &A[0], l, &B[0], l, beta, &C1[0], m);
02941   sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha, 
02942         &A[0], l, &B[0], l, beta, &C2[0], m);
02943   sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha, 
02944         &A[0], l, &B[0], l, beta, &C3[0], m);
02945   sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha, 
02946         &A[0], l, &b[0], l, beta, &C4[0], m);
02947   sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha, 
02948         &A[0], l, &b[0], l, beta, &C5[0], m);
02949 
02950   COMPARE_FAD_VECTORS(C1, C2, m*n);
02951   COMPARE_FAD_VECTORS(C1, C3, m*n);
02952   COMPARE_FAD_VECTORS(C1, C4, m*n);
02953   COMPARE_FAD_VECTORS(C1, C5, m*n);
02954 
02955   // transb
02956   teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha, 
02957         &A[0], m, &B[0], n, beta, &C1[0], m);
02958   sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha, 
02959         &A[0], m, &B[0], n, beta, &C2[0], m);
02960   sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha, 
02961         &A[0], m, &B[0], n, beta, &C3[0], m);
02962   sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha, 
02963         &A[0], m, &b[0], n, beta, &C4[0], m);
02964   sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha, 
02965         &A[0], m, &b[0], n, beta, &C5[0], m);
02966 
02967   COMPARE_FAD_VECTORS(C1, C2, m*n);
02968   COMPARE_FAD_VECTORS(C1, C3, m*n);
02969   COMPARE_FAD_VECTORS(C1, C4, m*n);
02970   COMPARE_FAD_VECTORS(C1, C5, m*n);
02971 
02972   // transa and transb
02973   teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha, 
02974         &A[0], l, &B[0], n, beta, &C1[0], m);
02975   sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha, 
02976         &A[0], l, &B[0], n, beta, &C2[0], m);
02977   sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha, 
02978         &A[0], l, &B[0], n, beta, &C3[0], m);
02979   sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha, 
02980         &A[0], l, &b[0], n, beta, &C4[0], m);
02981   sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha, 
02982         &A[0], l, &b[0], n, beta, &C5[0], m);
02983 
02984   COMPARE_FAD_VECTORS(C1, C2, m*n);
02985   COMPARE_FAD_VECTORS(C1, C3, m*n);
02986   COMPARE_FAD_VECTORS(C1, C4, m*n);
02987   COMPARE_FAD_VECTORS(C1, C5, m*n);
02988 }
02989 
02990 // Tests with constant A and B
02991 template <class FadType, class ScalarType>
02992 void
02993 FadBLASUnitTests<FadType,ScalarType>::
02994 testGEMM10() {
02995   VectorType A(m*l,ndot), B(l*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot),
02996     C4(m*n,ndot), C5(m*n,ndot);
02997   std::vector<ScalarType> a(m*l), b(l*n);
02998   for (unsigned int j=0; j<l; j++) {
02999     for (unsigned int i=0; i<m; i++) {
03000       a[i+j*m] = urand.number();
03001       A[i+j*m] = a[i+j*m];
03002     }
03003   }
03004   for (unsigned int j=0; j<n; j++) {
03005     for (unsigned int i=0; i<l; i++) {
03006       b[i+j*l] = urand.number();
03007       B[i+j*l] = b[i+j*l];
03008     }
03009   }
03010   FadType alpha(ndot, urand.number());
03011   FadType beta(ndot, urand.number());
03012   for (unsigned int k=0; k<ndot; k++) {
03013     alpha.fastAccessDx(k) = urand.number();
03014     beta.fastAccessDx(k) = urand.number();
03015   }
03016 
03017   for (unsigned int j=0; j<n; j++) {
03018     for (unsigned int i=0; i<m; i++) {
03019       ScalarType val = urand.number();
03020       C1[i+j*m] = FadType(ndot, val);
03021       C2[i+j*m] = FadType(ndot, val);
03022       C3[i+j*m] = FadType(ndot, val);
03023       C4[i+j*m] = FadType(ndot, val);
03024       C5[i+j*m] = FadType(ndot, val);
03025       for (unsigned int k=0; k<ndot; k++) {
03026       val = urand.number();
03027       C1[i+j*m].fastAccessDx(k) = val;
03028       C2[i+j*m].fastAccessDx(k) = val;
03029       C3[i+j*m].fastAccessDx(k) = val;
03030       C4[i+j*m].fastAccessDx(k) = val;
03031       C5[i+j*m].fastAccessDx(k) = val;
03032       } 
03033     }
03034   }
03035   
03036   Teuchos::BLAS<int,FadType> teuchos_blas;
03037   teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha, 
03038         &A[0], m, &B[0], l, beta, &C1[0], m);
03039 
03040   Teuchos::BLAS<int,FadType> sacado_blas(false);
03041   sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha, 
03042         &A[0], m, &B[0], l, beta, &C2[0], m);
03043 
03044   COMPARE_FAD_VECTORS(C1, C2, m*n);
03045 
03046   unsigned int sz = m*l + l*n + m*n*(1+ndot);
03047   Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
03048   sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha, 
03049         &A[0], m, &B[0], l, beta, &C3[0], m);
03050 
03051   COMPARE_FAD_VECTORS(C1, C3, m*n);
03052 
03053   sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha, 
03054         &a[0], m, &b[0], l, beta, &C4[0], m);
03055 
03056   COMPARE_FAD_VECTORS(C1, C4, m*n);
03057 
03058   sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha, 
03059         &a[0], m, &b[0], l, beta, &C5[0], m);
03060 
03061   COMPARE_FAD_VECTORS(C1, C5, m*n);
03062 
03063   // transa
03064   teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha, 
03065         &A[0], l, &B[0], l, beta, &C1[0], m);
03066   sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha, 
03067         &A[0], l, &B[0], l, beta, &C2[0], m);
03068   sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha, 
03069         &A[0], l, &B[0], l, beta, &C3[0], m);
03070   sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha, 
03071         &a[0], l, &b[0], l, beta, &C4[0], m);
03072   sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha, 
03073         &a[0], l, &b[0], l, beta, &C5[0], m);
03074 
03075   COMPARE_FAD_VECTORS(C1, C2, m*n);
03076   COMPARE_FAD_VECTORS(C1, C3, m*n);
03077   COMPARE_FAD_VECTORS(C1, C4, m*n);
03078   COMPARE_FAD_VECTORS(C1, C5, m*n);
03079 
03080   // transb
03081   teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha, 
03082         &A[0], m, &B[0], n, beta, &C1[0], m);
03083   sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha, 
03084         &A[0], m, &B[0], n, beta, &C2[0], m);
03085   sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha, 
03086         &A[0], m, &B[0], n, beta, &C3[0], m);
03087   sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha, 
03088         &a[0], m, &b[0], n, beta, &C4[0], m);
03089   sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha, 
03090         &a[0], m, &b[0], n, beta, &C5[0], m);
03091 
03092   COMPARE_FAD_VECTORS(C1, C2, m*n);
03093   COMPARE_FAD_VECTORS(C1, C3, m*n);
03094   COMPARE_FAD_VECTORS(C1, C4, m*n);
03095   COMPARE_FAD_VECTORS(C1, C5, m*n);
03096 
03097   // transa and transb
03098   teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha, 
03099         &A[0], l, &B[0], n, beta, &C1[0], m);
03100   sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha, 
03101         &A[0], l, &B[0], n, beta, &C2[0], m);
03102   sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha, 
03103         &A[0], l, &B[0], n, beta, &C3[0], m);
03104   sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha, 
03105         &a[0], l, &b[0], n, beta, &C4[0], m);
03106   sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha, 
03107         &a[0], l, &b[0], n, beta, &C5[0], m);
03108 
03109   COMPARE_FAD_VECTORS(C1, C2, m*n);
03110   COMPARE_FAD_VECTORS(C1, C3, m*n);
03111   COMPARE_FAD_VECTORS(C1, C4, m*n);
03112   COMPARE_FAD_VECTORS(C1, C5, m*n);
03113 }
03114 
03115 // Tests all arguments, left side
03116 template <class FadType, class ScalarType>
03117 void
03118 FadBLASUnitTests<FadType,ScalarType>::
03119 testSYMM1() {
03120 
03121   // SYMM is apparently not defined in the BLAS for complex types
03122   if (Teuchos::ScalarTraits<ScalarType>::isComplex)
03123     return; 
03124 
03125   VectorType A(m*m,ndot), B(m*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
03126   for (unsigned int j=0; j<m; j++) {
03127     for (unsigned int i=0; i<m; i++) {
03128       A[i+j*m] = FadType(ndot, urand.number());
03129       for (unsigned int k=0; k<ndot; k++)
03130         A[i+j*m].fastAccessDx(k) = urand.number();
03131     }
03132   }
03133   for (unsigned int j=0; j<n; j++) {
03134     for (unsigned int i=0; i<m; i++) {
03135       B[i+j*m] = FadType(ndot, urand.number());
03136       for (unsigned int k=0; k<ndot; k++)
03137   B[i+j*m].fastAccessDx(k) = urand.number();
03138     }
03139   }
03140   FadType alpha(ndot, urand.number());
03141   FadType beta(ndot, urand.number());
03142   for (unsigned int k=0; k<ndot; k++) {
03143     alpha.fastAccessDx(k) = urand.number();
03144     beta.fastAccessDx(k) = urand.number();
03145   }
03146 
03147   for (unsigned int j=0; j<n; j++) {
03148     for (unsigned int i=0; i<m; i++) {
03149       ScalarType val = urand.number();
03150       C1[i+j*m] = FadType(ndot, val);
03151       C2[i+j*m] = FadType(ndot, val);
03152       C3[i+j*m] = FadType(ndot, val);
03153       for (unsigned int k=0; k<ndot; k++) {
03154   val = urand.number();
03155   C1[i+j*m].fastAccessDx(k) = val;
03156   C2[i+j*m].fastAccessDx(k) = val;
03157   C3[i+j*m].fastAccessDx(k) = val;
03158       } 
03159     }
03160   }
03161   
03162   Teuchos::BLAS<int,FadType> teuchos_blas;
03163   teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha, 
03164         &A[0], m, &B[0], m, beta, &C1[0], m);
03165 
03166   Teuchos::BLAS<int,FadType> sacado_blas(false);
03167   sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha, 
03168         &A[0], m, &B[0], m, beta, &C2[0], m);
03169 
03170   COMPARE_FAD_VECTORS(C1, C2, m*n);
03171 
03172   unsigned int sz = m*m*(1+ndot) + 2*m*n*(1+ndot);
03173   Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
03174   sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha, 
03175         &A[0], m, &B[0], m, beta, &C3[0], m);
03176 
03177   COMPARE_FAD_VECTORS(C1, C3, m*n);
03178 
03179   // lower tri
03180   teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha, 
03181         &A[0], m, &B[0], m, beta, &C1[0], m);
03182   sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha, 
03183         &A[0], m, &B[0], m, beta, &C2[0], m);
03184   sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha, 
03185         &A[0], m, &B[0], m, beta, &C3[0], m);
03186 
03187   COMPARE_FAD_VECTORS(C1, C2, m*n);
03188   COMPARE_FAD_VECTORS(C1, C3, m*n);
03189 }
03190 
03191 // Tests all arguments, right side
03192 template <class FadType, class ScalarType>
03193 void
03194 FadBLASUnitTests<FadType,ScalarType>::
03195 testSYMM2() {
03196 
03197   // SYMM is apparently not defined in the BLAS for complex types
03198   if (Teuchos::ScalarTraits<ScalarType>::isComplex)
03199     return; 
03200 
03201   VectorType A(n*n,ndot), B(m*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
03202   for (unsigned int j=0; j<n; j++) {
03203     for (unsigned int i=0; i<n; i++) {
03204       A[i+j*n] = FadType(ndot, urand.number());
03205       for (unsigned int k=0; k<ndot; k++)
03206         A[i+j*n].fastAccessDx(k) = urand.number();
03207     }
03208   }
03209   for (unsigned int j=0; j<n; j++) {
03210     for (unsigned int i=0; i<m; i++) {
03211       B[i+j*m] = FadType(ndot, urand.number());
03212       for (unsigned int k=0; k<ndot; k++)
03213   B[i+j*m].fastAccessDx(k) = urand.number();
03214     }
03215   }
03216   FadType alpha(ndot, urand.number());
03217   FadType beta(ndot, urand.number());
03218   for (unsigned int k=0; k<ndot; k++) {
03219     alpha.fastAccessDx(k) = urand.number();
03220     beta.fastAccessDx(k) = urand.number();
03221   }
03222 
03223   for (unsigned int j=0; j<n; j++) {
03224     for (unsigned int i=0; i<m; i++) {
03225       ScalarType val = urand.number();
03226       C1[i+j*m] = FadType(ndot, val);
03227       C2[i+j*m] = FadType(ndot, val);
03228       C3[i+j*m] = FadType(ndot, val);
03229       for (unsigned int k=0; k<ndot; k++) {
03230   val = urand.number();
03231   C1[i+j*m].fastAccessDx(k) = val;
03232   C2[i+j*m].fastAccessDx(k) = val;
03233   C3[i+j*m].fastAccessDx(k) = val;
03234       } 
03235     }
03236   }
03237   
03238   Teuchos::BLAS<int,FadType> teuchos_blas;
03239   teuchos_blas.SYMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, m, n, alpha, 
03240         &A[0], n, &B[0], m, beta, &C1[0], m);
03241 
03242   Teuchos::BLAS<int,FadType> sacado_blas(false);
03243   sacado_blas.SYMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, m, n, alpha, 
03244         &A[0], n, &B[0], m, beta, &C2[0], m);
03245 
03246   COMPARE_FAD_VECTORS(C1, C2, m*n);
03247 
03248   unsigned int sz = n*n*(1+ndot) + 2*m*n*(1+ndot);
03249   Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
03250   sacado_blas2.SYMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, m, n, alpha, 
03251         &A[0], n, &B[0], m, beta, &C3[0], m);
03252 
03253   COMPARE_FAD_VECTORS(C1, C3, m*n);
03254 
03255   // lower tri
03256   teuchos_blas.SYMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, m, n, alpha, 
03257         &A[0], n, &B[0], m, beta, &C1[0], m);
03258   sacado_blas.SYMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, m, n, alpha, 
03259         &A[0], n, &B[0], m, beta, &C2[0], m);
03260   sacado_blas2.SYMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, m, n, alpha, 
03261         &A[0], n, &B[0], m, beta, &C3[0], m);
03262 
03263   COMPARE_FAD_VECTORS(C1, C2, m*n);
03264   COMPARE_FAD_VECTORS(C1, C3, m*n);
03265 }
03266 
03267 // Tests different lda, ldb, ldc, left side
03268 template <class FadType, class ScalarType>
03269 void
03270 FadBLASUnitTests<FadType,ScalarType>::
03271 testSYMM3() {
03272 
03273   // SYMM is apparently not defined in the BLAS for complex types
03274   if (Teuchos::ScalarTraits<ScalarType>::isComplex)
03275     return; 
03276 
03277   unsigned int lda = m+4;
03278   unsigned int ldb = m+5;
03279   unsigned int ldc = m+6;
03280   VectorType A(lda*m,ndot), B(ldb*n,ndot), C1(ldc*n,ndot), C2(ldc*n,ndot), 
03281     C3(ldc*n,ndot);
03282   for (unsigned int j=0; j<m; j++) {
03283     for (unsigned int i=0; i<lda; i++) {
03284       A[i+j*lda] = FadType(ndot, urand.number());
03285       for (unsigned int k=0; k<ndot; k++)
03286         A[i+j*lda].fastAccessDx(k) = urand.number();
03287     }
03288   }
03289   for (unsigned int j=0; j<n; j++) {
03290     for (unsigned int i=0; i<ldb; i++) {
03291       B[i+j*ldb] = FadType(ndot, urand.number());
03292       for (unsigned int k=0; k<ndot; k++)
03293   B[i+j*ldb].fastAccessDx(k) = urand.number();
03294     }
03295   }
03296   FadType alpha(ndot, urand.number());
03297   FadType beta(ndot, urand.number());
03298   for (unsigned int k=0; k<ndot; k++) {
03299     alpha.fastAccessDx(k) = urand.number();
03300     beta.fastAccessDx(k) = urand.number();
03301   }
03302 
03303   for (unsigned int j=0; j<n; j++) {
03304     for (unsigned int i=0; i<ldc; i++) {
03305       ScalarType val = urand.number();
03306       C1[i+j*ldc] = FadType(ndot, val);
03307       C2[i+j*ldc] = FadType(ndot, val);
03308       C3[i+j*ldc] = FadType(ndot, val);
03309       for (unsigned int k=0; k<ndot; k++) {
03310   val = urand.number();
03311   C1[i+j*ldc].fastAccessDx(k) = val;
03312   C2[i+j*ldc].fastAccessDx(k) = val;
03313   C3[i+j*ldc].fastAccessDx(k) = val;
03314       } 
03315     }
03316   }
03317   
03318   Teuchos::BLAS<int,FadType> teuchos_blas;
03319   teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha, 
03320         &A[0], lda, &B[0], ldb, beta, &C1[0], ldc);
03321 
03322   Teuchos::BLAS<int,FadType> sacado_blas(false);
03323   sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha, 
03324         &A[0], lda, &B[0], ldb, beta, &C2[0], ldc);
03325 
03326   COMPARE_FAD_VECTORS(C1, C2, ldc*n);
03327 
03328   unsigned int sz = m*m*(1+ndot) + 2*m*n*(1+ndot);
03329   Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
03330   sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha, 
03331         &A[0], lda, &B[0], ldb, beta, &C3[0], ldc);
03332 
03333   COMPARE_FAD_VECTORS(C1, C3, ldc*n);
03334 
03335   // lower tri
03336   teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha, 
03337         &A[0], lda, &B[0], ldb, beta, &C1[0], ldc);
03338   sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha, 
03339         &A[0], lda, &B[0], ldb, beta, &C2[0], ldc);
03340   sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha, 
03341         &A[0], lda, &B[0], ldb, beta, &C3[0], ldc);
03342 
03343   COMPARE_FAD_VECTORS(C1, C2, ldc*n);
03344   COMPARE_FAD_VECTORS(C1, C3, ldc*n);
03345 }
03346 
03347 // Tests different lda, ldb, ldc, right side
03348 template <class FadType, class ScalarType>
03349 void
03350 FadBLASUnitTests<FadType,ScalarType>::
03351 testSYMM4() {
03352 
03353   // SYMM is apparently not defined in the BLAS for complex types
03354   if (Teuchos::ScalarTraits<ScalarType>::isComplex)
03355     return; 
03356 
03357   unsigned int lda = n+4;
03358   unsigned int ldb = m+5;
03359   unsigned int ldc = m+6;
03360   VectorType A(lda*n,ndot), B(ldb*n,ndot), C1(ldc*n,ndot), C2(ldc*n,ndot), 
03361     C3(ldc*n,ndot);
03362   for (unsigned int j=0; j<n; j++) {
03363     for (unsigned int i=0; i<lda; i++) {
03364       A[i+j*lda] = FadType(ndot, urand.number());
03365       for (unsigned int k=0; k<ndot; k++)
03366         A[i+j*lda].fastAccessDx(k) = urand.number();
03367     }
03368   }
03369   for (unsigned int j=0; j<n; j++) {
03370     for (unsigned int i=0; i<ldb; i++) {
03371       B[i+j*ldb] = FadType(ndot, urand.number());
03372       for (unsigned int k=0; k<ndot; k++)
03373   B[i+j*ldb].fastAccessDx(k) = urand.number();
03374     }
03375   }
03376   FadType alpha(ndot, urand.number());
03377   FadType beta(ndot, urand.number());
03378   for (unsigned int k=0; k<ndot; k++) {
03379     alpha.fastAccessDx(k) = urand.number();
03380     beta.fastAccessDx(k) = urand.number();
03381   }
03382 
03383   for (unsigned int j=0; j<n; j++) {
03384     for (unsigned int i=0; i<ldc; i++) {
03385       ScalarType val = urand.number();
03386       C1[i+j*ldc] = FadType(ndot, val);
03387       C2[i+j*ldc] = FadType(ndot, val);
03388       C3[i+j*ldc] = FadType(ndot, val);
03389       for (unsigned int k=0; k<ndot; k++) {
03390   val = urand.number();
03391   C1[i+j*ldc].fastAccessDx(k) = val;
03392   C2[i+j*ldc].fastAccessDx(k) = val;
03393   C3[i+j*ldc].fastAccessDx(k) = val;
03394       } 
03395     }
03396   }
03397   
03398   Teuchos::BLAS<int,FadType> teuchos_blas;
03399   teuchos_blas.SYMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, m, n, alpha, 
03400         &A[0], lda, &B[0], ldb, beta, &C1[0], ldc);
03401 
03402   Teuchos::BLAS<int,FadType> sacado_blas(false);
03403   sacado_blas.SYMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, m, n, alpha, 
03404         &A[0], lda, &B[0], ldb, beta, &C2[0], ldc);
03405 
03406   COMPARE_FAD_VECTORS(C1, C2, ldc*n);
03407 
03408   unsigned int sz = n*n*(1+ndot) + 2*m*n*(1+ndot);
03409   Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
03410   sacado_blas2.SYMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, m, n, alpha, 
03411         &A[0], lda, &B[0], ldb, beta, &C3[0], ldc);
03412 
03413   COMPARE_FAD_VECTORS(C1, C3, ldc*n);
03414 
03415   // lower tri
03416   teuchos_blas.SYMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, m, n, alpha, 
03417         &A[0], lda, &B[0], ldb, beta, &C1[0], ldc);
03418   sacado_blas.SYMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, m, n, alpha, 
03419         &A[0], lda, &B[0], ldb, beta, &C2[0], ldc);
03420   sacado_blas2.SYMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, m, n, alpha, 
03421         &A[0], lda, &B[0], ldb, beta, &C3[0], ldc);
03422 
03423   COMPARE_FAD_VECTORS(C1, C2, ldc*n);
03424   COMPARE_FAD_VECTORS(C1, C3, ldc*n);
03425 }
03426 
03427 // Tests with constant C
03428 template <class FadType, class ScalarType>
03429 void
03430 FadBLASUnitTests<FadType,ScalarType>::
03431 testSYMM5() {
03432 
03433   // SYMM is apparently not defined in the BLAS for complex types
03434   if (Teuchos::ScalarTraits<ScalarType>::isComplex)
03435     return; 
03436 
03437   VectorType A(m*m,ndot), B(m*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
03438   for (unsigned int j=0; j<m; j++) {
03439     for (unsigned int i=0; i<m; i++) {
03440       A[i+j*m] = FadType(ndot, urand.number());
03441       for (unsigned int k=0; k<ndot; k++)
03442         A[i+j*m].fastAccessDx(k) = urand.number();
03443     }
03444   }
03445   for (unsigned int j=0; j<n; j++) {
03446     for (unsigned int i=0; i<m; i++) {
03447       B[i+j*m] = FadType(ndot, urand.number());
03448       for (unsigned int k=0; k<ndot; k++)
03449   B[i+j*m].fastAccessDx(k) = urand.number();
03450     }
03451   }
03452   FadType alpha(ndot, urand.number());
03453   FadType beta(ndot, urand.number());
03454   for (unsigned int k=0; k<ndot; k++) {
03455     alpha.fastAccessDx(k) = urand.number();
03456     beta.fastAccessDx(k) = urand.number();
03457   }
03458 
03459   for (unsigned int j=0; j<n; j++) {
03460     for (unsigned int i=0; i<m; i++) {
03461       ScalarType val = urand.number();
03462       C1[i+j*m] = val;
03463       C2[i+j*m] = val;
03464       C3[i+j*m] = val;
03465     }
03466   }
03467   
03468   Teuchos::BLAS<int,FadType> teuchos_blas;
03469   teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha, 
03470         &A[0], m, &B[0], m, beta, &C1[0], m);
03471 
03472   Teuchos::BLAS<int,FadType> sacado_blas(false);
03473   sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha, 
03474         &A[0], m, &B[0], m, beta, &C2[0], m);
03475 
03476   COMPARE_FAD_VECTORS(C1, C2, m*n);
03477 
03478   unsigned int sz = m*m*(1+ndot) + 2*m*n*(1+ndot);
03479   Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
03480   sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha, 
03481         &A[0], m, &B[0], m, beta, &C3[0], m);
03482 
03483   COMPARE_FAD_VECTORS(C1, C3, m*n);
03484 
03485   // lower tri
03486   teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha, 
03487         &A[0], m, &B[0], m, beta, &C1[0], m);
03488   sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha, 
03489         &A[0], m, &B[0], m, beta, &C2[0], m);
03490   sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha, 
03491         &A[0], m, &B[0], m, beta, &C3[0], m);
03492 
03493   COMPARE_FAD_VECTORS(C1, C2, m*n);
03494   COMPARE_FAD_VECTORS(C1, C3, m*n);
03495 }
03496 
03497 // Tests with constant alpha, beta
03498 template <class FadType, class ScalarType>
03499 void
03500 FadBLASUnitTests<FadType,ScalarType>::
03501 testSYMM6() {
03502 
03503   // SYMM is apparently not defined in the BLAS for complex types
03504   if (Teuchos::ScalarTraits<ScalarType>::isComplex)
03505     return; 
03506 
03507   VectorType A(m*m,ndot), B(m*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
03508   for (unsigned int j=0; j<m; j++) {
03509     for (unsigned int i=0; i<m; i++) {
03510       A[i+j*m] = FadType(ndot, urand.number());
03511       for (unsigned int k=0; k<ndot; k++)
03512         A[i+j*m].fastAccessDx(k) = urand.number();
03513     }
03514   }
03515   for (unsigned int j=0; j<n; j++) {
03516     for (unsigned int i=0; i<m; i++) {
03517       B[i+j*m] = FadType(ndot, urand.number());
03518       for (unsigned int k=0; k<ndot; k++)
03519   B[i+j*m].fastAccessDx(k) = urand.number();
03520     }
03521   }
03522   ScalarType alpha = urand.number();
03523   ScalarType beta = urand.number();
03524 
03525   for (unsigned int j=0; j<n; j++) {
03526     for (unsigned int i=0; i<m; i++) {
03527       ScalarType val = urand.number();
03528       C1[i+j*m] = FadType(ndot, val);
03529       C2[i+j*m] = FadType(ndot, val);
03530       C3[i+j*m] = FadType(ndot, val);
03531       for (unsigned int k=0; k<ndot; k++) {
03532   val = urand.number();
03533   C1[i+j*m].fastAccessDx(k) = val;
03534   C2[i+j*m].fastAccessDx(k) = val;
03535   C3[i+j*m].fastAccessDx(k) = val;
03536       } 
03537     }
03538   }
03539   
03540   Teuchos::BLAS<int,FadType> teuchos_blas;
03541   teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha, 
03542         &A[0], m, &B[0], m, beta, &C1[0], m);
03543 
03544   Teuchos::BLAS<int,FadType> sacado_blas(false);
03545   sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha, 
03546         &A[0], m, &B[0], m, beta, &C2[0], m);
03547 
03548   COMPARE_FAD_VECTORS(C1, C2, m*n);
03549 
03550   unsigned int sz = m*m*(1+ndot) + 2*m*n*(1+ndot);
03551   Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
03552   sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha, 
03553         &A[0], m, &B[0], m, beta, &C3[0], m);
03554 
03555   COMPARE_FAD_VECTORS(C1, C3, m*n);
03556 
03557   // lower tri
03558   teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha, 
03559         &A[0], m, &B[0], m, beta, &C1[0], m);
03560   sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha, 
03561         &A[0], m, &B[0], m, beta, &C2[0], m);
03562   sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha, 
03563         &A[0], m, &B[0], m, beta, &C3[0], m);
03564 
03565   COMPARE_FAD_VECTORS(C1, C2, m*n);
03566   COMPARE_FAD_VECTORS(C1, C3, m*n);
03567 }
03568 
03569 // Tests with constant A
03570 template <class FadType, class ScalarType>
03571 void
03572 FadBLASUnitTests<FadType,ScalarType>::
03573 testSYMM7() {
03574 
03575   // SYMM is apparently not defined in the BLAS for complex types
03576   if (Teuchos::ScalarTraits<ScalarType>::isComplex)
03577     return; 
03578 
03579   VectorType A(m*m,ndot), B(m*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot),
03580     C4(m*n,ndot), C5(m*n,ndot);
03581   std::vector<ScalarType> a(m*m);
03582   for (unsigned int j=0; j<m; j++) {
03583     for (unsigned int i=0; i<m; i++) {
03584       a[i+j*m] = urand.number();
03585       A[i+j*m] = a[i+j*m];
03586     }
03587   }
03588   for (unsigned int j=0; j<n; j++) {
03589     for (unsigned int i=0; i<m; i++) {
03590       B[i+j*m] = FadType(ndot, urand.number());
03591       for (unsigned int k=0; k<ndot; k++)
03592   B[i+j*m].fastAccessDx(k) = urand.number();
03593     }
03594   }
03595   FadType alpha(ndot, urand.number());
03596   FadType beta(ndot, urand.number());
03597   for (unsigned int k=0; k<ndot; k++) {
03598     alpha.fastAccessDx(k) = urand.number();
03599     beta.fastAccessDx(k) = urand.number();
03600   }
03601 
03602   for (unsigned int j=0; j<n; j++) {
03603     for (unsigned int i=0; i<m; i++) {
03604       ScalarType val = urand.number();
03605       C1[i+j*m] = FadType(ndot, val);
03606       C2[i+j*m] = FadType(ndot, val);
03607       C3[i+j*m] = FadType(ndot, val);
03608       C4[i+j*m] = FadType(ndot, val);
03609       C5[i+j*m] = FadType(ndot, val);
03610       for (unsigned int k=0; k<ndot; k++) {
03611   val = urand.number();
03612   C1[i+j*m].fastAccessDx(k) = val;
03613   C2[i+j*m].fastAccessDx(k) = val;
03614   C3[i+j*m].fastAccessDx(k) = val;
03615   C4[i+j*m].fastAccessDx(k) = val;
03616   C5[i+j*m].fastAccessDx(k) = val;
03617       } 
03618     }
03619   }
03620   
03621   Teuchos::BLAS<int,FadType> teuchos_blas;
03622   teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha, 
03623         &A[0], m, &B[0], m, beta, &C1[0], m);
03624 
03625   Teuchos::BLAS<int,FadType> sacado_blas(false);
03626   sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha, 
03627         &A[0], m, &B[0], m, beta, &C2[0], m);
03628 
03629   COMPARE_FAD_VECTORS(C1, C2, m*n);
03630 
03631   unsigned int sz = m*m + 2*m*n*(1+ndot);
03632   Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
03633   sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha, 
03634         &A[0], m, &B[0], m, beta, &C3[0], m);
03635 
03636   COMPARE_FAD_VECTORS(C1, C3, m*n);
03637 
03638   sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha, 
03639         &a[0], m, &B[0], m, beta, &C4[0], m);
03640 
03641   COMPARE_FAD_VECTORS(C1, C4, m*n);
03642 
03643   sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha, 
03644         &a[0], m, &B[0], m, beta, &C5[0], m);
03645 
03646   COMPARE_FAD_VECTORS(C1, C5, m*n);
03647 
03648   // lower tri
03649   teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha, 
03650         &A[0], m, &B[0], m, beta, &C1[0], m);
03651   sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha, 
03652         &A[0], m, &B[0], m, beta, &C2[0], m);
03653   sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha, 
03654         &A[0], m, &B[0], m, beta, &C3[0], m);
03655   sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha, 
03656         &a[0], m, &B[0], m, beta, &C4[0], m);
03657   sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha, 
03658         &a[0], m, &B[0], m, beta, &C5[0], m);
03659 
03660   COMPARE_FAD_VECTORS(C1, C2, m*n);
03661   COMPARE_FAD_VECTORS(C1, C3, m*n);
03662   COMPARE_FAD_VECTORS(C1, C4, m*n);
03663   COMPARE_FAD_VECTORS(C1, C5, m*n);
03664 }
03665 
03666 // Tests with constant B
03667 template <class FadType, class ScalarType>
03668 void
03669 FadBLASUnitTests<FadType,ScalarType>::
03670 testSYMM8() {
03671 
03672   // SYMM is apparently not defined in the BLAS for complex types
03673   if (Teuchos::ScalarTraits<ScalarType>::isComplex)
03674     return; 
03675 
03676   VectorType A(m*m,ndot), B(m*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot),
03677     C4(m*n,ndot), C5(m*n,ndot);
03678   std::vector<ScalarType> b(m*n);
03679   for (unsigned int j=0; j<m; j++) {
03680     for (unsigned int i=0; i<m; i++) {
03681       A[i+j*m] = FadType(ndot, urand.number());
03682       for (unsigned int k=0; k<ndot; k++)
03683         A[i+j*m].fastAccessDx(k) = urand.number();
03684     }
03685   }
03686   for (unsigned int j=0; j<n; j++) {
03687     for (unsigned int i=0; i<m; i++) {
03688       b[i+j*m] = urand.number();
03689       B[i+j*m] = b[i+j*m];
03690     }
03691   }
03692   FadType alpha(ndot, urand.number());
03693   FadType beta(ndot, urand.number());
03694   for (unsigned int k=0; k<ndot; k++) {
03695     alpha.fastAccessDx(k) = urand.number();
03696     beta.fastAccessDx(k) = urand.number();
03697   }
03698 
03699   for (unsigned int j=0; j<n; j++) {
03700     for (unsigned int i=0; i<m; i++) {
03701       ScalarType val = urand.number();
03702       C1[i+j*m] = FadType(ndot, val);
03703       C2[i+j*m] = FadType(ndot, val);
03704       C3[i+j*m] = FadType(ndot, val);
03705       C4[i+j*m] = FadType(ndot, val);
03706       C5[i+j*m] = FadType(ndot, val);
03707       for (unsigned int k=0; k<ndot; k++) {
03708   val = urand.number();
03709   C1[i+j*m].fastAccessDx(k) = val;
03710   C2[i+j*m].fastAccessDx(k) = val;
03711   C3[i+j*m].fastAccessDx(k) = val;
03712   C4[i+j*m].fastAccessDx(k) = val;
03713   C5[i+j*m].fastAccessDx(k) = val;
03714       } 
03715     }
03716   }
03717   
03718   Teuchos::BLAS<int,FadType> teuchos_blas;
03719   teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha, 
03720         &A[0], m, &B[0], m, beta, &C1[0], m);
03721 
03722   Teuchos::BLAS<int,FadType> sacado_blas(false);
03723   sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha, 
03724         &A[0], m, &B[0], m, beta, &C2[0], m);
03725 
03726   COMPARE_FAD_VECTORS(C1, C2, m*n);
03727 
03728   unsigned int sz = m*m*(1+ndot) + m*n*(2+ndot);
03729   Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
03730   sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha, 
03731         &A[0], m, &B[0], m, beta, &C3[0], m);
03732 
03733   COMPARE_FAD_VECTORS(C1, C3, m*n);
03734 
03735   sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha, 
03736         &A[0], m, &b[0], m, beta, &C4[0], m);
03737 
03738   COMPARE_FAD_VECTORS(C1, C4, m*n);
03739 
03740   sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha, 
03741         &A[0], m, &b[0], m, beta, &C5[0], m);
03742 
03743   COMPARE_FAD_VECTORS(C1, C5, m*n);
03744 
03745   // lower tri
03746   teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha, 
03747         &A[0], m, &B[0], m, beta, &C1[0], m);
03748   sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha, 
03749         &A[0], m, &B[0], m, beta, &C2[0], m);
03750   sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha, 
03751         &A[0], m, &B[0], m, beta, &C3[0], m);
03752   sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha, 
03753         &A[0], m, &b[0], m, beta, &C4[0], m);
03754   sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha, 
03755         &A[0], m, &b[0], m, beta, &C5[0], m);
03756 
03757   COMPARE_FAD_VECTORS(C1, C2, m*n);
03758   COMPARE_FAD_VECTORS(C1, C3, m*n);
03759   COMPARE_FAD_VECTORS(C1, C4, m*n);
03760   COMPARE_FAD_VECTORS(C1, C5, m*n);
03761 }
03762 
03763 // Tests with constant A and B
03764 template <class FadType, class ScalarType>
03765 void
03766 FadBLASUnitTests<FadType,ScalarType>::
03767 testSYMM9() {
03768 
03769   // SYMM is apparently not defined in the BLAS for complex types
03770   if (Teuchos::ScalarTraits<ScalarType>::isComplex)
03771     return; 
03772 
03773   VectorType A(m*m,ndot), B(m*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot),
03774     C4(m*n,ndot), C5(m*n,ndot);
03775   std::vector<ScalarType> a(m*m), b(m*n);
03776   for (unsigned int j=0; j<m; j++) {
03777     for (unsigned int i=0; i<m; i++) {
03778       a[i+j*m] = urand.number();
03779       A[i+j*m] = a[i+j*m];
03780     }
03781   }
03782   for (unsigned int j=0; j<n; j++) {
03783     for (unsigned int i=0; i<m; i++) {
03784       b[i+j*m] = urand.number();
03785       B[i+j*m] = b[i+j*m];
03786     }
03787   }
03788   FadType alpha(ndot, urand.number());
03789   FadType beta(ndot, urand.number());
03790   for (unsigned int k=0; k<ndot; k++) {
03791     alpha.fastAccessDx(k) = urand.number();
03792     beta.fastAccessDx(k) = urand.number();
03793   }
03794 
03795   for (unsigned int j=0; j<n; j++) {
03796     for (unsigned int i=0; i<m; i++) {
03797       ScalarType val = urand.number();
03798       C1[i+j*m] = FadType(ndot, val);
03799       C2[i+j*m] = FadType(ndot, val);
03800       C3[i+j*m] = FadType(ndot, val);
03801       C4[i+j*m] = FadType(ndot, val);
03802       C5[i+j*m] = FadType(ndot, val);
03803       for (unsigned int k=0; k<ndot; k++) {
03804   val = urand.number();
03805   C1[i+j*m].fastAccessDx(k) = val;
03806   C2[i+j*m].fastAccessDx(k) = val;
03807   C3[i+j*m].fastAccessDx(k) = val;
03808   C4[i+j*m].fastAccessDx(k) = val;
03809   C5[i+j*m].fastAccessDx(k) = val;
03810       } 
03811     }
03812   }
03813   
03814   Teuchos::BLAS<int,FadType> teuchos_blas;
03815   teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha, 
03816         &A[0], m, &B[0], m, beta, &C1[0], m);
03817 
03818   Teuchos::BLAS<int,FadType> sacado_blas(false);
03819   sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha, 
03820         &A[0], m, &B[0], m, beta, &C2[0], m);
03821 
03822   COMPARE_FAD_VECTORS(C1, C2, m*n);
03823 
03824   unsigned int sz = m*m + m*n*(2+ndot);
03825   Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
03826   sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha, 
03827         &A[0], m, &B[0], m, beta, &C3[0], m);
03828 
03829   COMPARE_FAD_VECTORS(C1, C3, m*n);
03830 
03831   sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha, 
03832         &a[0], m, &b[0], m, beta, &C4[0], m);
03833 
03834   COMPARE_FAD_VECTORS(C1, C4, m*n);
03835 
03836   sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha, 
03837         &a[0], m, &b[0], m, beta, &C5[0], m);
03838 
03839   COMPARE_FAD_VECTORS(C1, C5, m*n);
03840 
03841   // lower tri
03842   teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha, 
03843         &A[0], m, &B[0], m, beta, &C1[0], m);
03844   sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha, 
03845         &A[0], m, &B[0], m, beta, &C2[0], m);
03846   sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha, 
03847         &A[0], m, &B[0], m, beta, &C3[0], m);
03848   sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha, 
03849         &a[0], m, &b[0], m, beta, &C4[0], m);
03850   sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha, 
03851         &a[0], m, &b[0], m, beta, &C5[0], m);
03852 
03853   COMPARE_FAD_VECTORS(C1, C2, m*n);
03854   COMPARE_FAD_VECTORS(C1, C3, m*n);
03855   COMPARE_FAD_VECTORS(C1, C4, m*n);
03856   COMPARE_FAD_VECTORS(C1, C5, m*n);
03857 }
03858 
03859 // Tests all arguments, left side
03860 template <class FadType, class ScalarType>
03861 void
03862 FadBLASUnitTests<FadType,ScalarType>::
03863 testTRMM1() {
03864   VectorType A(m*m,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot);
03865   for (unsigned int j=0; j<m; j++) {
03866     for (unsigned int i=0; i<m; i++) {
03867       A[i+j*m] = FadType(ndot, urand.number());
03868       for (unsigned int k=0; k<ndot; k++)
03869         A[i+j*m].fastAccessDx(k) = urand.number();
03870     }
03871   }
03872   FadType alpha(ndot, urand.number());
03873   for (unsigned int k=0; k<ndot; k++) {
03874     alpha.fastAccessDx(k) = urand.number();
03875   }
03876 
03877   for (unsigned int j=0; j<n; j++) {
03878     for (unsigned int i=0; i<m; i++) {
03879       ScalarType val = urand.number();
03880       B1[i+j*m] = FadType(ndot, val);
03881       B2[i+j*m] = FadType(ndot, val);
03882       B3[i+j*m] = FadType(ndot, val);
03883       for (unsigned int k=0; k<ndot; k++) {
03884         val = urand.number();
03885         B1[i+j*m].fastAccessDx(k) = val;
03886         B2[i+j*m].fastAccessDx(k) = val;
03887         B3[i+j*m].fastAccessDx(k) = val;
03888       } 
03889     }
03890   }
03891   
03892   Teuchos::BLAS<int,FadType> teuchos_blas;
03893   teuchos_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS, 
03894         Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
03895 
03896   Teuchos::BLAS<int,FadType> sacado_blas(false);
03897   sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS, 
03898         Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
03899 
03900   COMPARE_FAD_VECTORS(B1, B2, m*n);
03901 
03902   unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
03903   Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
03904   sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS, 
03905         Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
03906 
03907   COMPARE_FAD_VECTORS(B1, B3, m*n);
03908 
03909   teuchos_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS, 
03910         Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
03911   sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS, 
03912         Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
03913   sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS, 
03914         Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
03915   COMPARE_FAD_VECTORS(B1, B2, m*n);
03916   COMPARE_FAD_VECTORS(B1, B3, m*n);
03917 
03918   teuchos_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS, 
03919         Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
03920   sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS, 
03921         Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
03922   sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS, 
03923         Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
03924   COMPARE_FAD_VECTORS(B1, B2, m*n);
03925   COMPARE_FAD_VECTORS(B1, B3, m*n);
03926 
03927   for (unsigned int i=0; i<m; i++) {
03928     A[i*m+i].val() = 1.0;
03929     for (unsigned int k=0; k<ndot; k++)
03930       A[i*m+i].fastAccessDx(k) = 0.0;
03931   }
03932   teuchos_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS, 
03933         Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B1[0], m);
03934   sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS, 
03935         Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B2[0], m);
03936   sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS, 
03937         Teuchos::UNIT_DIAG, m, n, alpha, &A[0], m, &B3[0], m);
03938   COMPARE_FAD_VECTORS(B1, B2, m*n);
03939   COMPARE_FAD_VECTORS(B1, B3, m*n);
03940 }
03941 
03942 // Tests all arguments, right side
03943 template <class FadType, class ScalarType>
03944 void
03945 FadBLASUnitTests<FadType,ScalarType>::
03946 testTRMM2() {
03947   VectorType A(n*n,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot);
03948   for (unsigned int j=0; j<n; j++) {
03949     for (unsigned int i=0; i<n; i++) {
03950       A[i+j*n] = FadType(ndot, urand.number());
03951       for (unsigned int k=0; k<ndot; k++)
03952         A[i+j*n].fastAccessDx(k) = urand.number();
03953     }
03954   }
03955   FadType alpha(ndot, urand.number());
03956   for (unsigned int k=0; k<ndot; k++) {
03957     alpha.fastAccessDx(k) = urand.number();
03958   }
03959 
03960   for (unsigned int j=0; j<n; j++) {
03961     for (unsigned int i=0; i<m; i++) {
03962       ScalarType val = urand.number();
03963       B1[i+j*m] = FadType(ndot, val);
03964       B2[i+j*m] = FadType(ndot, val);
03965       B3[i+j*m] = FadType(ndot, val);
03966       for (unsigned int k=0; k<ndot; k++) {
03967         val = urand.number();
03968         B1[i+j*m].fastAccessDx(k) = val;
03969         B2[i+j*m].fastAccessDx(k) = val;
03970         B3[i+j*m].fastAccessDx(k) = val;
03971       } 
03972     }
03973   }
03974   
03975   Teuchos::BLAS<int,FadType> teuchos_blas;
03976   teuchos_blas.TRMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS, 
03977         Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B1[0], m);
03978 
03979   Teuchos::BLAS<int,FadType> sacado_blas(false);
03980   sacado_blas.TRMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS, 
03981         Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B2[0], m);
03982 
03983   COMPARE_FAD_VECTORS(B1, B2, m*n);
03984 
03985   unsigned int sz = n*n*(1+ndot) + m*n*(1+ndot);
03986   Teuchos::BLAS<int,FadType> sacado_blas2(false,false,sz);
03987   sacado_blas2.TRMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS, 
03988         Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B3[0], m);
03989 
03990   COMPARE_FAD_VECTORS(B1, B3, m*n);
03991 
03992   teuchos_blas.TRMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS, 
03993         Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B1[0], m);
03994   sacado_blas.TRMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS, 
03995         Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B2[0], m);
03996   sacado_blas2.TRMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS, 
03997         Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B3[0], m);
03998   COMPARE_FAD_VECTORS(B1, B2, m*n);
03999   COMPARE_FAD_VECTORS(B1, B3, m*n);
04000 
04001   teuchos_blas.TRMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS, 
04002         Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B1[0], m);
04003   sacado_blas.TRMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS, 
04004         Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B2[0], m);
04005   sacado_blas2.TRMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS, 
04006         Teuchos::NON_UNIT_DIAG, m, n, alpha, &A[0], n, &B3[0], m);
04007   COMPARE_FAD_VECTORS(B1, B2, m*n);
04008   COMPARE_FAD_VECTORS(B1, B3, m*n);
04009 
04010   for (unsigned int i=0; i<n; i++) {
04011     A[i*n+i].val() = 1.0;
04012     for (unsigned int k=0; k<ndot; k++)
04013       A[i*n+i].fastAccessDx(k) = 0.0;
04014   }
04015   teuchos_blas.TRMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS, 
04016         Teuchos::UNIT_DIAG, m, n, alpha, &