Kokkos Node API and Local Linear Algebra Kernels Version of the Day
Kokkos_DefaultArithmetic.hpp
00001 //@HEADER
00002 // ************************************************************************
00003 // 
00004 //          Kokkos: Node API and Parallel Node Kernels
00005 //              Copyright (2008) Sandia Corporation
00006 // 
00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00008 // the U.S. Government retains certain rights in this software.
00009 // 
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00038 // 
00039 // ************************************************************************
00040 //@HEADER
00041 
00042 #ifndef KOKKOS_DEFAULTARITHMETIC_H
00043 #define KOKKOS_DEFAULTARITHMETIC_H
00044 
00045 #include <Teuchos_BLAS_types.hpp>
00046 #include <Teuchos_Assert.hpp>
00047 #include <Teuchos_TypeNameTraits.hpp>
00048 #include <Teuchos_ArrayView.hpp>
00049 #include <Teuchos_Tuple.hpp>
00050 #include <stdexcept>
00051 
00052 #include "Kokkos_MultiVector.hpp"
00053 #include "Kokkos_MultiVectorKernelOps.hpp"
00054 #include "Kokkos_NodeHelpers.hpp"
00055 #ifdef HAVE_KOKKOS_TBB
00056 #include "Kokkos_TBBNode.hpp"
00057 #endif
00058 #ifdef HAVE_KOKKOS_OPENMP
00059 #include "Kokkos_OpenMPNode.hpp"
00060 #endif
00061 #ifdef HAVE_KOKKOS_THREADPOOL
00062 #include "Kokkos_TPINode.hpp"
00063 #endif
00064 #ifdef HAVE_KOKKOS_THRUST
00065 #include "Kokkos_ThrustGPUNode.hpp"
00066 #include "cublas.h"
00067 #endif
00068 #include "Kokkos_SerialNode.hpp"
00069 #include <Teuchos_BLAS.hpp>
00070 
00071 namespace Kokkos {
00072 
00073   // Class for providing GEMM for a particular Node
00074   template <typename Scalar, typename Node> 
00075   struct NodeGEMM {
00076     public:
00077       static void GEMM(Teuchos::ETransp transA, Teuchos::ETransp transB, Scalar alpha, const MultiVector<Scalar,Node> &A, const MultiVector<Scalar,Node> &B, Scalar beta, MultiVector<Scalar,Node> &C) {
00078         TEUCHOS_TEST_FOR_EXCEPT(true);
00079       }
00080   };
00081 
00082   template <typename Scalar> 
00083   struct NodeGEMM<Scalar,SerialNode> {
00084     public:
00085       static void GEMM(Teuchos::ETransp transA, Teuchos::ETransp transB, Scalar alpha, const MultiVector<Scalar,SerialNode> &A, const MultiVector<Scalar,SerialNode> &B, Scalar beta, MultiVector<Scalar,SerialNode> &C) {
00086         Teuchos::BLAS<int,Scalar> blas;
00087         const int m = Teuchos::as<int>(C.getNumRows()),
00088                   n = Teuchos::as<int>(C.getNumCols()),
00089                   k = (transA == Teuchos::NO_TRANS ? A.getNumCols() : A.getNumRows()),
00090                   lda = Teuchos::as<int>(A.getStride()),
00091                   ldb = Teuchos::as<int>(B.getStride()),
00092                   ldc = Teuchos::as<int>(C.getStride());
00093         blas.GEMM(transA, transB, m, n, k, alpha, A.getValues().getRawPtr(), lda, B.getValues().getRawPtr(), ldb, beta, C.getValuesNonConst().getRawPtr(), ldc);
00094       }
00095   };
00096 
00097 #ifdef HAVE_KOKKOS_TBB
00098   template <typename Scalar> 
00099   struct NodeGEMM<Scalar,TBBNode> {
00100     public:
00101       static void GEMM(Teuchos::ETransp transA, Teuchos::ETransp transB, Scalar alpha, const MultiVector<Scalar,TBBNode> &A, const MultiVector<Scalar,TBBNode> &B, Scalar beta, MultiVector<Scalar,TBBNode> &C) {
00102         Teuchos::BLAS<int,Scalar> blas;
00103         const int m = Teuchos::as<int>(C.getNumRows()),
00104                   n = Teuchos::as<int>(C.getNumCols()),
00105                   k = (transA == Teuchos::NO_TRANS ? A.getNumCols() : A.getNumRows()),
00106                   lda = Teuchos::as<int>(A.getStride()),
00107                   ldb = Teuchos::as<int>(B.getStride()),
00108                   ldc = Teuchos::as<int>(C.getStride());
00109         blas.GEMM(transA, transB, m, n, k, alpha, A.getValues().getRawPtr(), lda, B.getValues().getRawPtr(), ldb, beta, C.getValuesNonConst().getRawPtr(), ldc);
00110       }
00111   };
00112 #endif
00113 
00114 #ifdef HAVE_KOKKOS_THREADPOOL
00115   template <typename Scalar> 
00116   struct NodeGEMM<Scalar,TPINode> {
00117     public:
00118       static void GEMM(Teuchos::ETransp transA, Teuchos::ETransp transB, Scalar alpha, const MultiVector<Scalar,TPINode> &A, const MultiVector<Scalar,TPINode> &B, Scalar beta, MultiVector<Scalar,TPINode> &C) {
00119 #ifndef KOKKOS_DONT_BLOCK_TPI_GEMM
00120         TPI_Block();
00121 #endif
00122         Teuchos::BLAS<int,Scalar> blas;
00123         const int m = Teuchos::as<int>(C.getNumRows()),
00124                   n = Teuchos::as<int>(C.getNumCols()),
00125                   k = (transA == Teuchos::NO_TRANS ? A.getNumCols() : A.getNumRows()),
00126                   lda = Teuchos::as<int>(A.getStride()),
00127                   ldb = Teuchos::as<int>(B.getStride()),
00128                   ldc = Teuchos::as<int>(C.getStride());
00129         blas.GEMM(transA, transB, m, n, k, alpha, A.getValues().getRawPtr(), lda, B.getValues().getRawPtr(), ldb, beta, C.getValuesNonConst().getRawPtr(), ldc);
00130 #ifndef KOKKOS_DONT_BLOCK_TPI_GEMM
00131         TPI_Unblock();
00132 #endif
00133       }
00134   };
00135 #endif
00136 
00137 #ifdef HAVE_KOKKOS_OPENMP
00138   template <typename Scalar> 
00139   struct NodeGEMM<Scalar,OpenMPNode> {
00140     public:
00141       static void GEMM(Teuchos::ETransp transA, Teuchos::ETransp transB, Scalar alpha, const MultiVector<Scalar,OpenMPNode> &A, const MultiVector<Scalar,OpenMPNode> &B, Scalar beta, MultiVector<Scalar,OpenMPNode> &C) {
00142         Teuchos::BLAS<int,Scalar> blas;
00143         const int m = Teuchos::as<int>(C.getNumRows()),
00144                   n = Teuchos::as<int>(C.getNumCols()),
00145                   k = (transA == Teuchos::NO_TRANS ? A.getNumCols() : A.getNumRows()),
00146                   lda = Teuchos::as<int>(A.getStride()),
00147                   ldb = Teuchos::as<int>(B.getStride()),
00148                   ldc = Teuchos::as<int>(C.getStride());
00149         blas.GEMM(transA, transB, m, n, k, alpha, A.getValues().getRawPtr(), lda, B.getValues().getRawPtr(), ldb, beta, C.getValuesNonConst().getRawPtr(), ldc);
00150       }
00151   };
00152 #endif
00153 
00154 #ifdef HAVE_KOKKOS_THRUST 
00155   template <typename Scalar>
00156   struct NodeGEMM<Scalar,ThrustGPUNode> {
00157     public:
00158       static void GEMM(Teuchos::ETransp transA, Teuchos::ETransp transB, Scalar alpha, const MultiVector<Scalar,ThrustGPUNode> &A, const MultiVector<Scalar,ThrustGPUNode> &B, Scalar beta, MultiVector<Scalar,ThrustGPUNode> &C) {
00159         TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, "NodeGEMM: ThrustGPUNode has no support for GEMM operations over Scalar=" << Teuchos::typeName(alpha) << ".");
00160       }
00161   };
00162 
00163 
00164 #ifdef HAVE_KOKKOS_CUDA_FLOAT
00165   template <>
00166   struct NodeGEMM<float,ThrustGPUNode> {
00167     public:
00168       static void GEMM(Teuchos::ETransp transA, Teuchos::ETransp transB, float alpha, const MultiVector<float,ThrustGPUNode> &A, const MultiVector<float,ThrustGPUNode> &B, float beta, MultiVector<float,ThrustGPUNode> &C) {
00169         const int m = Teuchos::as<int>(C.getNumRows()),
00170                   n = Teuchos::as<int>(C.getNumCols()),
00171                   k = (transA == Teuchos::NO_TRANS ? A.getNumCols() : A.getNumRows()),
00172                   lda = Teuchos::as<int>(A.getStride()),
00173                   ldb = Teuchos::as<int>(B.getStride()),
00174                   ldc = Teuchos::as<int>(C.getStride());
00175         const char char_transA = (transA == Teuchos::NO_TRANS ? 'N' : 'T'),
00176                    char_transB = (transB == Teuchos::NO_TRANS ? 'N' : 'T');
00177         cublasSgemm(char_transA, char_transB, m, n, k, alpha, A.getValues().getRawPtr(), lda, B.getValues().getRawPtr(), ldb, beta, C.getValuesNonConst().getRawPtr(), ldc);
00178 #ifdef HAVE_KOKKOS_DEBUG
00179         cublasStatus info = cublasGetError();
00180         TEUCHOS_TEST_FOR_EXCEPTION( info != CUBLAS_STATUS_SUCCESS, std::runtime_error, "cublasSgemm failed with status " << info << "." );
00181 #endif
00182       }
00183   };
00184 #endif
00185 
00186 #ifdef HAVE_KOKKOS_CUDA_DOUBLE
00187   template <>
00188   struct NodeGEMM<double,ThrustGPUNode> {
00189     public:
00190       static void GEMM(Teuchos::ETransp transA, Teuchos::ETransp transB, double alpha, const MultiVector<double,ThrustGPUNode> &A, const MultiVector<double,ThrustGPUNode> &B, double beta, MultiVector<double,ThrustGPUNode> &C) {
00191         const int m = Teuchos::as<int>(C.getNumRows()),
00192                   n = Teuchos::as<int>(C.getNumCols()),
00193                   k = (transA == Teuchos::NO_TRANS ? A.getNumCols() : A.getNumRows()),
00194                   lda = Teuchos::as<int>(A.getStride()),
00195                   ldb = Teuchos::as<int>(B.getStride()),
00196                   ldc = Teuchos::as<int>(C.getStride());
00197         const char char_transA = (transA == Teuchos::NO_TRANS ? 'N' : 'T'),
00198                    char_transB = (transB == Teuchos::NO_TRANS ? 'N' : 'T');
00199         cublasDgemm(char_transA, char_transB, m, n, k, alpha, A.getValues().getRawPtr(), lda, B.getValues().getRawPtr(), ldb, beta, C.getValuesNonConst().getRawPtr(), ldc);
00200 #ifdef HAVE_KOKKOS_DEBUG
00201         cublasStatus info = cublasGetError();
00202         TEUCHOS_TEST_FOR_EXCEPTION( info != CUBLAS_STATUS_SUCCESS, std::runtime_error, "cublasDgemm failed with status " << info << "." );
00203 #endif
00204       }
00205   };
00206 #endif
00207 
00208 #endif
00209 
00212   template <class MV>
00213   class DefaultArithmetic {
00214     // nothing here
00215   };
00216 
00219   template <class Scalar, class Node>
00220   class DefaultArithmetic<MultiVector<Scalar,Node> > {
00221 
00222     public:
00224       inline static void Init(MultiVector<Scalar,Node> &A, Scalar alpha) {
00225         const size_t nR = A.getNumRows();
00226         const size_t nC = A.getNumCols();
00227         if (nR*nC == 0) return;
00228         const size_t stride = A.getStride();
00229         RCP<Node> node = A.getNode();
00230         ArrayRCP<Scalar> data = A.getValuesNonConst();
00231         // prepare buffers
00232         ReadyBufferHelper<Node> rbh(node);
00233         rbh.begin();
00234         rbh.template addNonConstBuffer<Scalar>(data);
00235         rbh.end();
00236         // prepare op
00237         InitOp<Scalar> wdp;
00238         wdp.alpha = alpha;
00239         if (stride == nR) {
00240           // one kernel invocation for whole multivector
00241           wdp.x = data(0,nR*nC).getRawPtr();
00242           node->template parallel_for<InitOp<Scalar> >(0,nR*nC,wdp);
00243         }
00244         else {
00245           // one kernel invocation for each column
00246           for (size_t j=0; j<nC; ++j) {
00247             wdp.x = data(0,nR).getRawPtr();
00248             node->template parallel_for<InitOp<Scalar> >(0,nR,wdp);
00249             data += stride;
00250           }
00251         }
00252       }
00253 
00255       inline static void Recip(MultiVector<Scalar,Node> &A, const MultiVector<Scalar,Node> &B) {
00256         const size_t nR = A.getNumRows();
00257         const size_t nC = A.getNumCols();
00258         const size_t Astride = A.getStride();
00259         const size_t Bstride = B.getStride();
00260         TEUCHOS_TEST_FOR_EXCEPTION(nC != B.getNumCols() || nR != B.getNumRows(), std::runtime_error,
00261                            "DefaultArithmetic<" << Teuchos::typeName(A) << ">::Recip(A,B): A and B must have the same dimensions.");
00262         RCP<Node> node = B.getNode();
00263         ArrayRCP<const Scalar> Bdata = B.getValues();
00264         ArrayRCP<Scalar>       Adata = A.getValuesNonConst();
00265         // prepare buffers
00266         ReadyBufferHelper<Node> rbh(node);
00267         rbh.begin();
00268         rbh.template addConstBuffer<Scalar>(Bdata);
00269         rbh.template addNonConstBuffer<Scalar>(Adata);
00270         rbh.end();
00271         RecipOp<Scalar> wdp;
00272         if (A.getStride() == nR && B.getStride() == nR) {
00273           // one kernel invocation for whole multivector
00274           wdp.scale = Bdata(0,nR*nC).getRawPtr();
00275           wdp.x = Adata(0,nR*nC).getRawPtr();
00276           node->template parallel_for<RecipOp<Scalar> >(0,nR*nC,wdp);
00277         }
00278         else {
00279           // one kernel invocation for each column
00280           for (size_t j=0; j<nC; ++j) {
00281             wdp.x = Adata(0,nR).getRawPtr();
00282             wdp.scale = Bdata(0,nR).getRawPtr();
00283             node->template parallel_for<RecipOp<Scalar> >(0,nR,wdp);
00284             Adata += Astride;
00285             Bdata += Bstride;
00286           }
00287         }
00288       }
00289 
00291 
00293       inline static void ElemMult(MultiVector<Scalar,Node> &C, Scalar scalarC,
00294                                Scalar scalarAB,
00295                                const MultiVector<Scalar,Node> &A,
00296                                const MultiVector<Scalar,Node> &B) {
00297         const size_t nR_A = A.getNumRows();
00298         const size_t nC_A = A.getNumCols();
00299         TEUCHOS_TEST_FOR_EXCEPTION(nC_A != 1, std::runtime_error,
00300                            "DefaultArithmetic<"<<Teuchos::typeName(A) << ">::ElemMult(C,sC,sAB,A,B): A must have just 1 column.");
00301         const size_t Cstride = C.getStride();
00302         const size_t Bstride = B.getStride();
00303         const size_t nC_C = C.getNumCols();
00304         const size_t nR_C = C.getNumRows();
00305         TEUCHOS_TEST_FOR_EXCEPTION(nC_C != B.getNumCols() || nR_A != B.getNumRows() || nR_C != B.getNumRows(), std::runtime_error,
00306                            "DefaultArithmetic<" << Teuchos::typeName(A) << ">::ElemMult(C,sC,sAB,A,B): A, B and C must have the same num-rows, B and C must have the same num-cols.");
00307         RCP<Node> node = B.getNode();
00308         ArrayRCP<Scalar> Cdata = C.getValuesNonConst();
00309         ArrayRCP<const Scalar> Bdata = B.getValues();
00310         ArrayRCP<const Scalar>       Adata = A.getValues();
00311         // prepare buffers
00312         ReadyBufferHelper<Node> rbh(node);
00313         rbh.begin();
00314         rbh.template addNonConstBuffer<Scalar>(Cdata);
00315         rbh.template addConstBuffer<Scalar>(Bdata);
00316         rbh.template addConstBuffer<Scalar>(Adata);
00317         rbh.end();
00318         MVElemMultOp<Scalar> wdp;
00319         wdp.scalarX = scalarC;
00320         wdp.scalarYZ = scalarAB;
00321         // one kernel invocation for each column
00322         for (size_t j=0; j<nC_C; ++j) {
00323           wdp.x = Cdata(0,nR_C).getRawPtr();
00324           wdp.y = Adata(0,nR_C).getRawPtr();
00325           wdp.z = Bdata(0,nR_C).getRawPtr();
00326           node->template parallel_for<MVElemMultOp<Scalar> >(0,nR_C,wdp);
00327           Cdata += Cstride;
00328           Bdata += Bstride;
00329         }
00330       }
00331 
00333       inline static void Assign(MultiVector<Scalar,Node> &A, const MultiVector<Scalar,Node> &B) {
00334         const size_t nR = A.getNumRows();
00335         const size_t nC = A.getNumCols();
00336         const size_t Astride = A.getStride();
00337         const size_t Bstride = B.getStride();
00338         TEUCHOS_TEST_FOR_EXCEPTION(nC != B.getNumCols() || nR != B.getNumRows(), std::runtime_error,
00339                            "DefaultArithmetic<" << Teuchos::typeName(A) << ">::Assign(A,B): A and B must have the same dimensions.");
00340         if (nC*nR == 0) return;
00341         RCP<Node> node = A.getNode();
00342         ArrayRCP<const Scalar> Bdata = B.getValues();
00343         ArrayRCP<Scalar>       Adata = A.getValuesNonConst();
00344         // prepare buffers
00345         ReadyBufferHelper<Node> rbh(node);
00346         rbh.begin();
00347         rbh.template addConstBuffer<Scalar>(Bdata);
00348         rbh.template addNonConstBuffer<Scalar>(Adata);
00349         rbh.end();
00350         // prepare op
00351         AssignOp<Scalar> wdp;
00352         if (Astride == nR && Bstride == nR) {
00353           // one kernel invocation for whole multivector assignment
00354           wdp.x = Adata(0,nR*nC).getRawPtr();
00355           wdp.y = Bdata(0,nR*nC).getRawPtr();
00356           node->template parallel_for<AssignOp<Scalar> >(0,nR*nC,wdp);
00357         }
00358         else {
00359           // one kernel invocation for each column
00360           for (size_t j=0; j<nC; ++j) {
00361             wdp.x = Adata(0,nR).getRawPtr();
00362             wdp.y = Bdata(0,nR).getRawPtr();
00363             node->template parallel_for<AssignOp<Scalar> >(0,nR,wdp);
00364             Adata += Astride;
00365             Bdata += Bstride;
00366           }
00367         }
00368       }
00369 
00370       inline static void Dot(const MultiVector<Scalar,Node> &A, const MultiVector<Scalar,Node> &B, const ArrayView<Scalar> &dots) {
00371         const size_t nR = A.getNumRows();
00372         const size_t nC = A.getNumCols();
00373         const size_t Astride = A.getStride();
00374         const size_t Bstride = B.getStride();
00375         TEUCHOS_TEST_FOR_EXCEPTION(nC != B.getNumCols() || nR != B.getNumRows(), std::runtime_error,
00376                            "DefaultArithmetic<" << Teuchos::typeName(A) << ">::Dot(A,B,dots): A and B must have the same dimensions.");
00377         TEUCHOS_TEST_FOR_EXCEPTION(nC > Teuchos::as<size_t>(dots.size()), std::runtime_error, 
00378             "DefaultArithmetic<" << Teuchos::typeName(A) << ">::Dot(A,B,dots): dots must have length as large as number of columns of A and B.");
00379         if (nR*nC == 0) {
00380           std::fill( dots.begin(), dots.begin() + nC, Teuchos::ScalarTraits<Scalar>::zero() );
00381           return;
00382         }
00383         RCP<Node> node = A.getNode();
00384         ArrayRCP<const Scalar> Bdata = B.getValues(),
00385                                         Adata = A.getValues();
00386         // prepare buffers
00387         ReadyBufferHelper<Node> rbh(node);
00388         rbh.begin();
00389         rbh.template addConstBuffer<Scalar>(Bdata);
00390         rbh.template addConstBuffer<Scalar>(Adata);
00391         rbh.end();
00392         DotOp2<Scalar> op;
00393         for (size_t j=0; j<nC; ++j) {
00394           op.x = Adata(0,nR).getRawPtr();
00395           op.y = Bdata(0,nR).getRawPtr();
00396           dots[j] = node->parallel_reduce(0,nR,op);
00397           Adata += Astride;
00398           Bdata += Bstride;
00399         }
00400       }
00401 
00402       inline static Scalar Dot(const MultiVector<Scalar,Node> &A, const MultiVector<Scalar,Node> &B) {
00403         const size_t nR = A.getNumRows();
00404         const size_t nC = A.getNumCols();
00405         TEUCHOS_TEST_FOR_EXCEPTION(nR != B.getNumRows(), std::runtime_error,
00406                            "DefaultArithmetic<" << Teuchos::typeName(A) << ">::Dot(A,B,dots): A and B must have the same number of rows.");
00407         if (nR*nC == 0) {
00408           return Teuchos::ScalarTraits<Scalar>::zero();
00409         }
00410         RCP<Node> node = A.getNode();
00411         ArrayRCP<const Scalar> Bdata = B.getValues(0),
00412                                         Adata = A.getValues(0);
00413         // prepare buffers
00414         ReadyBufferHelper<Node> rbh(node);
00415         rbh.begin();
00416         rbh.template addConstBuffer<Scalar>(Bdata);
00417         rbh.template addConstBuffer<Scalar>(Adata);
00418         rbh.end();
00419         DotOp2<Scalar> op;
00420         op.x = Adata(0,nR).getRawPtr();
00421         op.y = Bdata(0,nR).getRawPtr();
00422         return node->parallel_reduce(0,nR,op);
00423       }
00424 
00425       inline static void GEMM(MultiVector<Scalar,Node> &C, Teuchos::ETransp transA, Teuchos::ETransp transB, Scalar alpha, const MultiVector<Scalar,Node> &A, const MultiVector<Scalar,Node> &B, Scalar beta) {
00426         NodeGEMM<Scalar,Node>::GEMM(transA, transB, alpha, A, B, beta, C);
00427       }
00428 
00429       inline static void GESUM(MultiVector<Scalar,Node> &B, Scalar alpha, const MultiVector<Scalar,Node> &A, Scalar beta) {
00430         const size_t nR = A.getNumRows();
00431         const size_t nC = A.getNumCols();
00432         const size_t Astride = A.getStride();
00433         const size_t Bstride = B.getStride();
00434         TEUCHOS_TEST_FOR_EXCEPTION(nC != B.getNumCols() || nR != B.getNumRows(), std::runtime_error,
00435                            "DefaultArithmetic<" << Teuchos::typeName(A) << ">::GESUM(B,alpha,A,beta): A and B must have the same dimensions.");
00436         RCP<Node> node = B.getNode();
00437         ArrayRCP<const Scalar> Adata = A.getValues();
00438         ArrayRCP<Scalar>       Bdata = B.getValuesNonConst();
00439         // prepare buffers
00440         ReadyBufferHelper<Node> rbh(node);
00441         rbh.begin();
00442         rbh.template addConstBuffer<Scalar>(Adata);
00443         rbh.template addNonConstBuffer<Scalar>(Bdata);
00444         rbh.end();
00445         GESUMOp<Scalar> wdp;
00446         wdp.alpha = alpha;
00447         wdp.beta  = beta;
00448         if (Astride == nR && Bstride == nR) {
00449           // one kernel invocation for whole multivector
00450           wdp.y = Bdata(0,nR*nC).getRawPtr();
00451           wdp.x = Adata(0,nR*nC).getRawPtr();
00452           node->template parallel_for<GESUMOp<Scalar> >(0,nR*nC,wdp);
00453         }
00454         else {
00455           // one kernel invocation for each column
00456           for (size_t j=0; j<nC; ++j) {
00457             wdp.y = Bdata(0,nR).getRawPtr();
00458             wdp.x = Adata(0,nR).getRawPtr();
00459             node->template parallel_for<GESUMOp<Scalar> >(0,nR,wdp);
00460             Adata += Astride;
00461             Bdata += Bstride;
00462           }
00463         }
00464       }
00465 
00466       inline static void GESUM(MultiVector<Scalar,Node> &C, Scalar alpha, const MultiVector<Scalar,Node> &A, Scalar beta, const MultiVector<Scalar,Node> &B, Scalar gamma) {
00467         const size_t nR = A.getNumRows();
00468         const size_t nC = A.getNumCols();
00469         const size_t Astride = A.getStride();
00470         const size_t Bstride = B.getStride();
00471         const size_t Cstride = C.getStride();
00472         TEUCHOS_TEST_FOR_EXCEPTION(nC != B.getNumCols() || nR != B.getNumRows(), std::runtime_error,
00473                            "DefaultArithmetic<" << Teuchos::typeName(A) << ">::GESUM(C,alpha,A,beta,B,gamma): A and B must have the same dimensions.");
00474         RCP<Node> node = B.getNode();
00475         ArrayRCP<const Scalar> Adata = A.getValues(),
00476                                         Bdata = B.getValues();
00477         ArrayRCP<Scalar>       Cdata = C.getValuesNonConst();
00478         // prepare buffers
00479         ReadyBufferHelper<Node> rbh(node);
00480         rbh.begin();
00481         rbh.template addConstBuffer<Scalar>(Adata);
00482         rbh.template addConstBuffer<Scalar>(Bdata);
00483         rbh.template addNonConstBuffer<Scalar>(Cdata);
00484         rbh.end();
00485         GESUMOp3<Scalar> wdp;
00486         wdp.alpha = alpha;
00487         wdp.beta  = beta;
00488         wdp.gamma = gamma;
00489         if (Astride == nR && Bstride == nR && Cstride == nR) {
00490           // one kernel invocation for whole multivector
00491           wdp.z = Cdata(0,nR*nC).getRawPtr();
00492           wdp.y = Bdata(0,nR*nC).getRawPtr();
00493           wdp.x = Adata(0,nR*nC).getRawPtr();
00494           node->template parallel_for<GESUMOp3<Scalar> >(0,nR*nC,wdp);
00495         }
00496         else {
00497           // one kernel invocation for each column
00498           for (size_t j=0; j<nC; ++j) {
00499             wdp.z = Cdata(0,nR).getRawPtr();
00500             wdp.y = Bdata(0,nR).getRawPtr();
00501             wdp.x = Adata(0,nR).getRawPtr();
00502             node->template parallel_for<GESUMOp3<Scalar> >(0,nR,wdp);
00503             Adata += Astride;
00504             Bdata += Bstride;
00505             Cdata += Cstride;
00506           }
00507         }
00508       }
00509 
00510       inline static void Norm1(const MultiVector<Scalar,Node> &A, const ArrayView<typename Teuchos::ScalarTraits<Scalar>::magnitudeType> &norms) {
00511         const size_t nR = A.getNumRows();
00512         const size_t nC = A.getNumCols();
00513         const size_t Astride = A.getStride();
00514         TEUCHOS_TEST_FOR_EXCEPTION(nC > Teuchos::as<size_t>(norms.size()), std::runtime_error, 
00515             "DefaultArithmetic<" << Teuchos::typeName(A) << ">::Norm1(A,norms): norms must have length as large as number of columns of A.");
00516         if (nR*nC == 0) {
00517           std::fill( norms.begin(), norms.begin() + nC, Teuchos::ScalarTraits<typename Teuchos::ScalarTraits<Scalar>::magnitudeType>::zero() );
00518           return;
00519         }
00520         RCP<Node> node = A.getNode();
00521         ArrayRCP<const Scalar> Adata = A.getValues();
00522         // prepare buffers
00523         ReadyBufferHelper<Node> rbh(node);
00524         rbh.begin();
00525         rbh.template addConstBuffer<Scalar>(Adata);
00526         rbh.end();
00527         SumAbsOp<Scalar> op;
00528         for (size_t j=0; j<nC; ++j) {
00529           op.x = Adata(0,nR).getRawPtr();
00530           norms[j] = node->parallel_reduce(0,nR,op);
00531           Adata += Astride;
00532         }
00533       }
00534 
00535       inline static typename Teuchos::ScalarTraits<Scalar>::magnitudeType
00536       Norm1(const MultiVector<Scalar,Node> &A) {
00537         const size_t nR = A.getNumRows();
00538         const size_t nC = A.getNumCols();
00539         if (nR*nC == 0) {
00540           return Teuchos::ScalarTraits<typename Teuchos::ScalarTraits<Scalar>::magnitudeType>::zero();
00541         }
00542         RCP<Node> node = A.getNode();
00543         ArrayRCP<const Scalar> Adata = A.getValues(0);
00544         // prepare buffers
00545         ReadyBufferHelper<Node> rbh(node);
00546         rbh.begin();
00547         rbh.template addConstBuffer<Scalar>(Adata);
00548         rbh.end();
00549         SumAbsOp<Scalar> op;
00550         op.x = Adata(0,nR).getRawPtr();
00551         return node->parallel_reduce(0,nR,op);
00552       }
00553 
00554       inline static void Sum(const MultiVector<Scalar,Node> &A, const ArrayView<Scalar> &sums) {
00555         const size_t nR = A.getNumRows();
00556         const size_t nC = A.getNumCols();
00557         const size_t Astride = A.getStride();
00558         TEUCHOS_TEST_FOR_EXCEPTION(nC > (size_t)sums.size(), std::runtime_error, 
00559             "DefaultArithmetic<" << Teuchos::typeName(A) << ">::Sum(A,sums): sums must have length as large as number of columns of A.");
00560         if (nR*nC == 0) {
00561           std::fill( sums.begin(), sums.begin() + nC, Teuchos::ScalarTraits<Scalar>::zero() );
00562           return;
00563         }
00564         RCP<Node> node = A.getNode();
00565         ArrayRCP<const Scalar> Adata = A.getValues();
00566         // prepare buffers
00567         ReadyBufferHelper<Node> rbh(node);
00568         rbh.begin();
00569         rbh.template addConstBuffer<Scalar>(Adata);
00570         rbh.end();
00571         SumOp<Scalar> op;
00572         for (size_t j=0; j<nC; ++j) {
00573           op.x = Adata(0,nR).getRawPtr();
00574           sums[j] = node->parallel_reduce(0,nR,op);
00575           Adata += Astride;
00576         }
00577       }
00578 
00579       inline static Scalar Sum(const MultiVector<Scalar,Node> &A) {
00580         const size_t nR = A.getNumRows();
00581         const size_t nC = A.getNumCols();
00582         if (nR*nC == 0) {
00583           return Teuchos::ScalarTraits<Scalar>::zero();
00584         }
00585         RCP<Node> node = A.getNode();
00586         ArrayRCP<const Scalar> Adata = A.getValues(0);
00587         // prepare buffers
00588         ReadyBufferHelper<Node> rbh(node);
00589         rbh.begin();
00590         rbh.template addConstBuffer<Scalar>(Adata);
00591         rbh.end();
00592         SumOp<Scalar> op;
00593         op.x = Adata(0,nR).getRawPtr();
00594         return node->parallel_reduce(0,nR,op);
00595       }
00596 
00597       inline static typename Teuchos::ScalarTraits<Scalar>::magnitudeType NormInf(const MultiVector<Scalar,Node> &A) {
00598         const size_t nR = A.getNumRows();
00599         const size_t nC = A.getNumCols();
00600         if (nR*nC == 0) {
00601           return Teuchos::ScalarTraits<typename Teuchos::ScalarTraits<Scalar>::magnitudeType>::zero();
00602         }
00603         RCP<Node> node = A.getNode();
00604         ArrayRCP<const Scalar> Adata = A.getValues(0);
00605         // prepare buffers
00606         ReadyBufferHelper<Node> rbh(node);
00607         rbh.begin();
00608         rbh.template addConstBuffer<Scalar>(Adata);
00609         rbh.end();
00610         MaxAbsOp<Scalar> op;
00611         op.x = Adata(0,nR).getRawPtr();
00612         return node->parallel_reduce(0,nR,op);
00613       }
00614 
00615       inline static void NormInf(const MultiVector<Scalar,Node> &A, const ArrayView<typename Teuchos::ScalarTraits<Scalar>::magnitudeType> &norms) {
00616         const size_t nR = A.getNumRows();
00617         const size_t nC = A.getNumCols();
00618         const size_t Astride = A.getStride();
00619         TEUCHOS_TEST_FOR_EXCEPTION(nC > Teuchos::as<size_t>(norms.size()), std::runtime_error, 
00620             "DefaultArithmetic<" << Teuchos::typeName(A) << ">::NormInf(A,norms): norms must have length as large as number of columns of A.");
00621         if (nR*nC == 0) {
00622           std::fill( norms.begin(), norms.begin() + nC, Teuchos::ScalarTraits<typename Teuchos::ScalarTraits<Scalar>::magnitudeType>::zero() );
00623           return;
00624         }
00625         RCP<Node> node = A.getNode();
00626         ArrayRCP<const Scalar> Adata = A.getValues();
00627         // prepare buffers
00628         ReadyBufferHelper<Node> rbh(node);
00629         rbh.begin();
00630         rbh.template addConstBuffer<Scalar>(Adata);
00631         rbh.end();
00632         MaxAbsOp<Scalar> op;
00633         for (size_t j=0; j<nC; ++j) {
00634           op.x = Adata(0,nR).getRawPtr();
00635           norms[j] = node->parallel_reduce(0,nR,op);
00636           Adata += Astride;
00637         }
00638       }
00639 
00640       inline static void Norm2Squared(const MultiVector<Scalar,Node> &A, const ArrayView<typename Teuchos::ScalarTraits<Scalar>::magnitudeType> &norms) {
00641         const size_t nR = A.getNumRows();
00642         const size_t nC = A.getNumCols();
00643         const size_t Astride = A.getStride();
00644         TEUCHOS_TEST_FOR_EXCEPTION(nC > Teuchos::as<size_t>(norms.size()), std::runtime_error, 
00645             "DefaultArithmetic<" << Teuchos::typeName(A) << ">::Norm2Squared(A,norms): norms must have length as large as number of columns of A.");
00646         if (nR*nC == 0) {
00647           std::fill( norms.begin(), norms.begin() + nC, Teuchos::ScalarTraits<typename Teuchos::ScalarTraits<Scalar>::magnitudeType>::zero() );
00648           return;
00649         }
00650         RCP<Node> node = A.getNode();
00651         ArrayRCP<const Scalar> Adata = A.getValues();
00652         // prepare buffers
00653         ReadyBufferHelper<Node> rbh(node);
00654         rbh.begin();
00655         rbh.template addConstBuffer<Scalar>(Adata);
00656         rbh.end();
00657         DotOp1<Scalar> op;
00658         for (size_t j=0; j<nC; ++j) {
00659           op.x = Adata(0,nR).getRawPtr();
00660           norms[j] = node->parallel_reduce(0,nR,op);
00661           Adata += Astride;
00662         }
00663       }
00664 
00665       inline static typename Teuchos::ScalarTraits<Scalar>::magnitudeType 
00666       Norm2Squared(const MultiVector<Scalar,Node> &A) {
00667         const size_t nR = A.getNumRows();
00668         if (nR == 0) {
00669           return Teuchos::ScalarTraits<typename Teuchos::ScalarTraits<Scalar>::magnitudeType>::zero();
00670         }
00671         RCP<Node> node = A.getNode();
00672         ArrayRCP<const Scalar> Adata = A.getValues(0);
00673         // prepare buffers
00674         ReadyBufferHelper<Node> rbh(node);
00675         rbh.begin();
00676         rbh.template addConstBuffer<Scalar>(Adata);
00677         rbh.end();
00678         DotOp1<Scalar> op;
00679         op.x = Adata(0,nR).getRawPtr();
00680         return node->parallel_reduce(0,nR,op);
00681       }
00682 
00683       inline static typename Teuchos::ScalarTraits<Scalar>::magnitudeType
00684       WeightedNorm(const MultiVector<Scalar,Node> &A, const MultiVector<Scalar,Node> &weightVector) {
00685         const size_t nR = A.getNumRows();
00686         const size_t nC = A.getNumCols();
00687         if (nR*nC == 0) {
00688           return Teuchos::ScalarTraits<typename Teuchos::ScalarTraits<Scalar>::magnitudeType>::zero();
00689         }
00690         RCP<Node> node = A.getNode();
00691         ArrayRCP<const Scalar> Adata = A.getValues(0),
00692                                         Wdata = weightVector.getValues(0);
00693         // prepare buffers
00694         ReadyBufferHelper<Node> rbh(node);
00695         rbh.begin();
00696         rbh.template addConstBuffer<Scalar>(Adata);
00697         rbh.template addConstBuffer<Scalar>(Wdata);
00698         rbh.end();
00699         WeightNormOp<Scalar> op;
00700         op.x = Adata(0,nR).getRawPtr();
00701         op.w = Wdata(0,nR).getRawPtr();
00702         return node->parallel_reduce(0,nR,op);
00703       }
00704 
00705       inline static void WeightedNorm(const MultiVector<Scalar,Node> &A, const MultiVector<Scalar,Node> &weightVector, const ArrayView<typename Teuchos::ScalarTraits<Scalar>::magnitudeType> &norms) {
00706         const size_t nR = A.getNumRows();
00707         const size_t nC = A.getNumCols();
00708         const size_t Astride = A.getStride(),
00709                      Wstride = weightVector.getStride();
00710         TEUCHOS_TEST_FOR_EXCEPTION(nC > Teuchos::as<size_t>(norms.size()), std::runtime_error, 
00711             "DefaultArithmetic<" << Teuchos::typeName(A) << ">::Norm1(A,norms): norms must have length as large as number of columns of A.");
00712         if (nR*nC == 0) {
00713           std::fill( norms.begin(), norms.begin() + nC, Teuchos::ScalarTraits<typename Teuchos::ScalarTraits<Scalar>::magnitudeType>::zero() );
00714           return;
00715         }
00716         RCP<Node> node = A.getNode();
00717         ArrayRCP<const Scalar> Adata = A.getValues(),
00718                                         Wdata = weightVector.getValues();
00719         const bool OneW = (weightVector.getNumCols() == 1);
00720         // prepare buffers
00721         ReadyBufferHelper<Node> rbh(node);
00722         rbh.begin();
00723         rbh.template addConstBuffer<Scalar>(Adata);
00724         rbh.template addConstBuffer<Scalar>(Wdata);
00725         rbh.end();
00726         WeightNormOp<Scalar> op;
00727         if (OneW) {
00728           op.w = Wdata(0,nR).getRawPtr();
00729           for (size_t j=0; j<nC; ++j) {
00730             op.x = Adata(0,nR).getRawPtr();
00731             norms[j] = node->parallel_reduce(0,nR,op);
00732             Adata += Astride;
00733           }
00734         }
00735         else {
00736           for (size_t j=0; j<nC; ++j) {
00737             op.x = Adata(0,nR).getRawPtr();
00738             op.w = Wdata(0,nR).getRawPtr();
00739             norms[j] = node->parallel_reduce(0,nR,op);
00740             Adata += Astride;
00741             Wdata += Wstride;
00742           }
00743         }
00744       }
00745 
00746       inline static void Random(MultiVector<Scalar,Node> &A) {
00747         // TODO: consider adding rand() functionality to node
00748         // in the meantime, just generate random numbers via Teuchos and then copy to node
00749         typedef Teuchos::ScalarTraits<Scalar> SCT;
00750         const size_t stride = A.getStride();
00751         const size_t nR = A.getNumRows();
00752         const size_t nC = A.getNumCols();
00753         if (nR*nC == 0) return;
00754         RCP<Node> node = A.getNode();
00755         ArrayRCP<Scalar> Adata = A.getValuesNonConst();
00756         // we'll overwrite all data covered by the multivector, but not off-stride data
00757         // therefore, we are write-only only in the case that stride=nR
00758         ReadWriteOption rw = (stride == nR ? WriteOnly : ReadWrite);
00759         ArrayRCP<Scalar> mvdata = node->template viewBufferNonConst<Scalar>(rw,stride*(nC-1)+nR,Adata);
00760         for (size_t j=0; j<nC; ++j) {
00761           for (size_t i=0; i<nR; ++i) {
00762             mvdata[j*stride + i] = SCT::random();
00763           }
00764         }
00765         mvdata = null;
00766       }
00767 
00768       inline static void Abs(MultiVector<Scalar,Node> &A, const MultiVector<Scalar,Node> &B) {
00769         const size_t nR = A.getNumRows();
00770         const size_t nC = A.getNumCols();
00771         const size_t Astride = A.getStride();
00772         const size_t Bstride = B.getStride();
00773         TEUCHOS_TEST_FOR_EXCEPTION(nC != B.getNumCols() || nR != B.getNumRows(), std::runtime_error,
00774                            "DefaultArithmetic<" << Teuchos::typeName(A) << ">::Abs(A,B): A and B must have the same dimensions.");
00775         if (nC*nR == 0) return;
00776         RCP<Node> node = A.getNode();
00777         ArrayRCP<const Scalar> Bdata = B.getValues();
00778         ArrayRCP<Scalar>       Adata = A.getValuesNonConst();
00779         // prepare buffers
00780         ReadyBufferHelper<Node> rbh(node);
00781         rbh.begin();
00782         rbh.template addConstBuffer<Scalar>(Bdata);
00783         rbh.template addNonConstBuffer<Scalar>(Adata);
00784         rbh.end();
00785         // prepare op
00786         AbsOp<Scalar> wdp;
00787         if (Astride == nR && Bstride == nR) {
00788           // one kernel invocation for whole multivector assignment
00789           wdp.x = Adata(0,nR*nC).getRawPtr();
00790           wdp.y = Bdata(0,nR*nC).getRawPtr();
00791           node->template parallel_for<AbsOp<Scalar> >(0,nR*nC,wdp);
00792         }
00793         else {
00794           // one kernel invocation for each column
00795           for (size_t j=0; j<nC; ++j) {
00796             wdp.x = Adata(0,nR).getRawPtr();
00797             wdp.y = Bdata(0,nR).getRawPtr();
00798             node->template parallel_for<AbsOp<Scalar> >(0,nR,wdp);
00799             Adata += Astride;
00800             Bdata += Bstride;
00801           }
00802         }
00803       }
00804 
00805       inline static void Scale(MultiVector<Scalar,Node> &B, Scalar alpha, const MultiVector<Scalar,Node> &A) {
00806         const size_t nR = A.getNumRows();
00807         const size_t nC = A.getNumCols();
00808         const size_t Astride = A.getStride();
00809         const size_t Bstride = B.getStride();
00810         TEUCHOS_TEST_FOR_EXCEPTION(nC != B.getNumCols() || nR != B.getNumRows(), std::runtime_error,
00811                            "DefaultArithmetic<" << Teuchos::typeName(A) << ">::Scale(B,alpha,A): A and B must have the same dimensions.");
00812         RCP<Node> node = B.getNode();
00813         ArrayRCP<const Scalar> Adata = A.getValues();
00814         ArrayRCP<Scalar>       Bdata = B.getValuesNonConst();
00815         // prepare buffers
00816         ReadyBufferHelper<Node> rbh(node);
00817         rbh.begin();
00818         rbh.template addConstBuffer<Scalar>(Adata);
00819         rbh.template addNonConstBuffer<Scalar>(Bdata);
00820         rbh.end();
00821         MVScaleOp<Scalar> wdp;
00822         wdp.alpha = alpha;
00823         if (Astride == nR && Bstride == nR) {
00824           // one kernel invocation for whole multivector
00825           wdp.x = Bdata(0,nR*nC).getRawPtr();
00826           wdp.y = Adata(0,nR*nC).getRawPtr();
00827           node->template parallel_for<MVScaleOp<Scalar> >(0,nR*nC,wdp);
00828         }
00829         else {
00830           // one kernel invocation for each column
00831           for (size_t j=0; j<nC; ++j) {
00832             wdp.x = Bdata(0,nR).getRawPtr();
00833             wdp.y = Adata(0,nR).getRawPtr();
00834             node->template parallel_for<MVScaleOp<Scalar> >(0,nR,wdp);
00835             Adata += Astride;
00836             Bdata += Bstride;
00837           }
00838         }
00839       }
00840 
00841       inline static void Scale(MultiVector<Scalar,Node> &A, Scalar alpha) {
00842         const size_t nR = A.getNumRows();
00843         const size_t nC = A.getNumCols();
00844         const size_t stride = A.getStride();
00845         RCP<Node> node = A.getNode();
00846         ArrayRCP<Scalar> data = A.getValuesNonConst();
00847         // prepare buffers
00848         ReadyBufferHelper<Node> rbh(node);
00849         rbh.begin();
00850         rbh.template addNonConstBuffer<Scalar>(data);
00851         rbh.end();
00852         // prepare op
00853         SingleScaleOp<Scalar> wdp;
00854         wdp.alpha = alpha;
00855         if (stride == nR) {
00856           // one kernel invocation for whole multivector
00857           wdp.x = data(0,nR*nC).getRawPtr();
00858           node->template parallel_for<SingleScaleOp<Scalar> >(0,nR*nC,wdp);
00859         }
00860         else {
00861           // one kernel invocation for each column
00862           for (size_t j=0; j<nC; ++j) {
00863             wdp.x = data(0,nR).getRawPtr();
00864             node->template parallel_for<SingleScaleOp<Scalar> >(0,nR,wdp);
00865             data += stride;
00866           }
00867         }
00868       }
00869 
00870       inline static void initializeValues(MultiVector<Scalar,Node> &A,
00871                                    size_t numRows, size_t numCols, 
00872                                    const ArrayRCP<Scalar> &values,
00873                                    size_t stride) {
00874         A.initializeValues(numRows,numCols,values,stride);
00875       }
00876 
00877       inline static ArrayRCP<const Scalar> getValues(const MultiVector<Scalar,Node> &A) {
00878         return A.getValues();
00879       }
00880 
00881       inline static ArrayRCP<const Scalar> getValues(const MultiVector<Scalar,Node> &A, size_t j) {
00882         return A.getValues(j);
00883       }
00884 
00885       inline static ArrayRCP<Scalar> getValuesNonConst(MultiVector<Scalar,Node> &A) {
00886         return A.getValuesNonConst();
00887       }
00888 
00889       inline static ArrayRCP<Scalar> getValuesNonConst(MultiVector<Scalar,Node> &A, size_t j) {
00890         return A.getValuesNonConst(j);
00891       }
00892 
00893       inline static size_t getNumRows(const MultiVector<Scalar,Node> &A) {
00894         return A.getNumRows();
00895       }
00896 
00897       inline static size_t getNumCols(const MultiVector<Scalar,Node> &A) {
00898         return A.getNumCols();
00899       }
00900 
00901       inline static size_t getStride(const MultiVector<Scalar,Node> &A) {
00902         return A.getStride();
00903       }
00904 
00905       inline static RCP<Node> getNode(const MultiVector<Scalar,Node> &A) {
00906         return A.getNode();
00907       }
00908 
00909   };
00910 
00911 }
00912 
00913 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends