Kokkos Node API and Local Linear Algebra Kernels Version of the Day
Kokkos_DefaultSparseOps.hpp
00001 //@HEADER
00002 // ************************************************************************
00003 // 
00004 //          Kokkos: Node API and Parallel Node Kernels
00005 //              Copyright (2009) Sandia Corporation
00006 // 
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 // 
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //  
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //  
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00025 // 
00026 // ************************************************************************
00027 //@HEADER
00028 
00029 #ifndef KOKKOS_DEFAULTSPARSEOPS_HPP
00030 #define KOKKOS_DEFAULTSPARSEOPS_HPP
00031 
00032 #include <Teuchos_ArrayRCP.hpp>
00033 #include <Teuchos_DataAccess.hpp>
00034 #include <Teuchos_TestForException.hpp>
00035 #include <Teuchos_TypeNameTraits.hpp>
00036 #include <Teuchos_CompileTimeAssert.hpp>
00037 #include <stdexcept>
00038 
00039 #include "Kokkos_ConfigDefs.hpp"
00040 #include "Kokkos_CrsMatrix.hpp" 
00041 #include "Kokkos_CrsGraph.hpp" 
00042 #include "Kokkos_MultiVector.hpp"
00043 #include "Kokkos_NodeHelpers.hpp"
00044 #include "Kokkos_DefaultArithmetic.hpp"
00045 #include "Kokkos_DefaultSparseScaleKernelOps.hpp"
00046 #include "Kokkos_DefaultSparseSolveKernelOps.hpp"
00047 #include "Kokkos_DefaultSparseMultiplyKernelOps.hpp"
00048 
00049 namespace Kokkos {
00050 
00054   template <class Scalar, class Ordinal, class Node>
00055   class DefaultHostSparseOps {
00056   public:
00058 
00059 
00061     typedef Scalar  ScalarType;
00063     typedef Ordinal OrdinalType;
00065     typedef Node    NodeType;
00066 
00071     template <class S2>
00072     struct rebind {
00073       typedef DefaultHostSparseOps<S2,Ordinal,Node> other;
00074     };
00075 
00077 
00078 
00079 
00081     DefaultHostSparseOps(const RCP<Node> &node);
00082 
00084     ~DefaultHostSparseOps();
00085 
00087 
00088 
00089     
00091     RCP<Node> getNode() const;
00092 
00094 
00095 
00096 
00098     void initializeStructure(const CrsGraphHostCompute<Ordinal,Node,DefaultHostSparseOps<void,Ordinal,Node> > &graph);
00099 
00101     void initializeValues(const CrsMatrixHostCompute<Scalar,Ordinal,Node,DefaultHostSparseOps<void,Ordinal,Node> > &matrix);
00102 
00104     void clear();
00105 
00107 
00108 
00109 
00111     template <class DomainScalar, class RangeScalar>
00112     void multiply(Teuchos::ETransp trans, RangeScalar alpha, const MultiVector<DomainScalar,Node> &X, MultiVector<RangeScalar,Node> &Y) const;
00113 
00115     template <class DomainScalar, class RangeScalar>
00116     void multiply(Teuchos::ETransp trans, 
00117                   RangeScalar alpha, const MultiVector<DomainScalar,Node> &X, RangeScalar beta, MultiVector<RangeScalar,Node> &Y) const;
00118 
00120     template <class DomainScalar, class RangeScalar>
00121     void solve(Teuchos::ETransp trans, Teuchos::EUplo uplo, Teuchos::EDiag diag, 
00122                const MultiVector<DomainScalar,Node> &Y, MultiVector<RangeScalar,Node> &X) const;
00123 
00125     template <class VectorScalar>
00126     void leftScale(const MultiVector<VectorScalar,Node> &X);
00127 
00129 
00130   protected:
00132     DefaultHostSparseOps(const DefaultHostSparseOps& source);
00133 
00134     RCP<Node> node_;
00135 
00136     // we do this one of two ways: 
00137     // 1D/packed: arrays of offsets, array of ordinals, array of values.
00138     ArrayRCP<const Ordinal> inds1D_;
00139     ArrayRCP<const size_t>  begs1D_, ends1D_;
00140     ArrayRCP<Scalar>  vals1D_;
00141     // 2D: array of pointers
00142     ArrayRCP<const ArrayRCP<Ordinal> > inds2D_;
00143     ArrayRCP<const ArrayRCP<Scalar> >  vals2D_;
00144     ArrayRCP<const size_t>          numEntries_;
00145     ArrayRCP<const Ordinal *> indPtrs_;
00146     ArrayRCP<Scalar  *> valPtrs_;
00147 
00148     size_t numRows_;
00149     bool indsInit_, valsInit_, isEmpty_;
00150   };
00151 
00152   template<class Scalar, class Ordinal, class Node>
00153   DefaultHostSparseOps<Scalar,Ordinal,Node>::DefaultHostSparseOps(const RCP<Node> &node)
00154   : node_(node) 
00155   {
00156     Teuchos::CompileTimeAssert<Node::isHostNode == false> cta; (void)cta;
00157     clear();
00158   }
00159 
00160   template<class Scalar, class Ordinal, class Node>
00161   DefaultHostSparseOps<Scalar,Ordinal,Node>::~DefaultHostSparseOps() {
00162   }
00163 
00164   template <class Scalar, class Ordinal, class Node>
00165   RCP<Node> DefaultHostSparseOps<Scalar,Ordinal,Node>::getNode() const {
00166     return node_; 
00167   }
00168 
00169   template <class Scalar, class Ordinal, class Node>
00170   void DefaultHostSparseOps<Scalar,Ordinal,Node>::clear() {
00171     begs1D_     = null;
00172     ends1D_     = null;
00173     inds1D_     = null;
00174     vals1D_     = null;
00175     inds2D_     = null;
00176     vals2D_     = null;
00177     numEntries_ = null;
00178     indPtrs_    = null;
00179     valPtrs_    = null;
00180     numRows_  = 0;
00181     indsInit_ = false;
00182     valsInit_ = false;
00183     isEmpty_  = false;
00184   }
00185 
00186   template <class Scalar, class Ordinal, class Node>
00187   void DefaultHostSparseOps<Scalar,Ordinal,Node>::initializeStructure(const CrsGraphHostCompute<Ordinal,Node,DefaultHostSparseOps<void,Ordinal,Node> > &graph) {
00188     using Teuchos::arcp;
00189     TEST_FOR_EXCEPTION(indsInit_ == true || valsInit_ == true, std::runtime_error, 
00190         Teuchos::typeName(*this) << "::initializeStructure(): structure already initialized.");
00191     numRows_ = graph.getNumRows();
00192     if (graph.isEmpty() || numRows_ == 0) {
00193       isEmpty_ = true;
00194     }
00195     else if (graph.is1DStructure()) {
00196       isEmpty_ = false;
00197       ArrayRCP<Ordinal> inds;
00198       ArrayRCP<size_t> begs, ends;
00199       const_cast<CrsGraphHostCompute<Ordinal,Node,DefaultHostSparseOps<void,Ordinal,Node> > &>(graph).get1DStructure( inds, begs, ends );
00200       inds1D_ = inds;
00201       begs1D_ = begs;
00202       ends1D_ = ends;
00203     }
00204     else {
00205       isEmpty_  = false;
00206       {
00207         ArrayRCP<ArrayRCP<Ordinal> > inds;
00208         ArrayRCP<size_t>  sizes;
00209         const_cast<CrsGraphHostCompute<Ordinal,Node,DefaultHostSparseOps<void,Ordinal,Node> > &>(graph).get2DStructure(inds,sizes);
00210         inds2D_     = inds;
00211         numEntries_ = sizes;
00212       }
00213       indPtrs_    = arcp<const Ordinal *>(numRows_);
00214       for (size_t r=0; r < numRows_; ++r) {
00215         indPtrs_[r] = inds2D_[r].getRawPtr();
00216       }
00217     }
00218     indsInit_ = true;
00219   }
00220 
00221 
00222   template <class Scalar, class Ordinal, class Node>
00223   void DefaultHostSparseOps<Scalar,Ordinal,Node>::initializeValues(const CrsMatrixHostCompute<Scalar,Ordinal,Node,DefaultHostSparseOps<void,Ordinal,Node> > &matrix) {
00224     using Teuchos::arcp;
00225     TEST_FOR_EXCEPTION(indsInit_ == false, std::runtime_error, 
00226         Teuchos::typeName(*this) << "::initializeValues(): must initialize values after graph.");
00227     TEST_FOR_EXCEPTION(numRows_ != matrix.getNumRows() || isEmpty_ != matrix.isEmpty() || 
00228                        (inds2D_ != null && matrix.is1DStructure()) || (inds1D_ != null && matrix.is2DStructure()),
00229                        std::runtime_error, Teuchos::typeName(*this) << "::initializeValues(): matrix not compatible with previously supplied graph.");
00230     if (!isEmpty_) {
00231       if (matrix.is1DStructure()) {
00232         ArrayRCP<Scalar> vals;
00233         const_cast<CrsMatrixHostCompute<Scalar,Ordinal,Node,DefaultHostSparseOps<void,Ordinal,Node> > &>(matrix).get1DValues( vals );
00234         vals1D_ = vals;
00235       }
00236       else {
00237         {
00238           ArrayRCP<ArrayRCP<Scalar> > vals;
00239           const_cast<CrsMatrixHostCompute<Scalar,Ordinal,Node,DefaultHostSparseOps<void,Ordinal,Node> > &>(matrix).get2DValues(vals);
00240           vals2D_ = vals;
00241         }
00242         valPtrs_ = arcp<Scalar *>(numRows_);
00243         for (size_t r=0; r < numRows_; ++r) {
00244           valPtrs_[r] = vals2D_[r].getRawPtr();
00245         }
00246       }
00247     }
00248     valsInit_ = true;
00249   }
00250 
00251 
00252   template <class Scalar, class Ordinal, class Node>
00253   template <class DomainScalar, class RangeScalar>
00254   void DefaultHostSparseOps<Scalar,Ordinal,Node>::solve(
00255                       Teuchos::ETransp trans, Teuchos::EUplo uplo, Teuchos::EDiag diag, 
00256                       const MultiVector<DomainScalar,Node> &Y,
00257                             MultiVector<RangeScalar,Node> &X) const 
00258   {
00259     typedef DefaultSparseSolveOp1<Scalar,Ordinal,DomainScalar,RangeScalar>  Op1D;
00260     typedef DefaultSparseSolveOp2<Scalar,Ordinal,DomainScalar,RangeScalar>  Op2D;
00261     typedef DefaultSparseTransposeSolveOp1<Scalar,Ordinal,DomainScalar,RangeScalar>  TOp1D;
00262     typedef DefaultSparseTransposeSolveOp2<Scalar,Ordinal,DomainScalar,RangeScalar>  TOp2D;
00263     TEST_FOR_EXCEPTION(indsInit_ == false || valsInit_ == false, std::runtime_error,
00264         Teuchos::typeName(*this) << "::solve(): this solve was not fully initialized.");
00265     TEST_FOR_EXCEPTION(X.getNumCols() != Y.getNumCols(), std::runtime_error,
00266         Teuchos::typeName(*this) << "::solve(): Left hand side and right hand side multivectors have differing numbers of vectors.");
00267     TEST_FOR_EXCEPTION(X.getNumRows() < numRows_, std::runtime_error,
00268         Teuchos::typeName(*this) << "::solve(): Left-hand-side multivector does not have enough rows. Likely cause is that the column map was not provided to the Tpetra::CrsMatrix in the case of an implicit unit diagonal.");
00269 
00270     ReadyBufferHelper<Node> rbh(node_);
00271     if (numRows_ == 0) {
00272       // null op
00273     }
00274     else if (isEmpty_) {
00275       TEST_FOR_EXCEPTION(diag != Teuchos::UNIT_DIAG, std::runtime_error,
00276           Teuchos::typeName(*this) << "::solve(): solve of empty matrix only valid for an implicit unit diagonal.");
00277       // solve I * X = Y for X = Y
00278       DefaultArithmetic<MultiVector<RangeScalar,Node> >::Assign(X,Y);
00279     }
00280     else if (begs1D_ != null) {
00281       if (trans == Teuchos::NO_TRANS) {
00282         Op1D wdp;
00283         rbh.begin();
00284         wdp.begs    = rbh.template addConstBuffer<size_t>(begs1D_);
00285         wdp.ends    = rbh.template addConstBuffer<size_t>(ends1D_);
00286         wdp.inds    = rbh.template addConstBuffer<Ordinal>(inds1D_);
00287         wdp.vals    = rbh.template addConstBuffer< Scalar>(vals1D_);
00288         wdp.x       = rbh.template addNonConstBuffer<DomainScalar>(X.getValuesNonConst());
00289         wdp.y       = rbh.template addConstBuffer<RangeScalar>(Y.getValues());
00290         rbh.end();
00291         wdp.numRows = numRows_;
00292         wdp.unitDiag = (diag == Teuchos::UNIT_DIAG ? true : false);
00293         wdp.upper    = (uplo == Teuchos::UPPER_TRI ? true : false);
00294         wdp.xstride = X.getStride();
00295         wdp.ystride = Y.getStride();
00296         const size_t numRHS = X.getNumCols();
00297         node_->template parallel_for<Op1D>(0,numRHS,wdp);
00298       }
00299       else {
00300         TOp1D wdp;
00301         rbh.begin();
00302         wdp.begs    = rbh.template addConstBuffer<size_t>(begs1D_);
00303         wdp.ends    = rbh.template addConstBuffer<size_t>(ends1D_);
00304         wdp.inds    = rbh.template addConstBuffer<Ordinal>(inds1D_);
00305         wdp.vals    = rbh.template addConstBuffer< Scalar>(vals1D_);
00306         wdp.x       = rbh.template addNonConstBuffer<DomainScalar>(X.getValuesNonConst());
00307         wdp.y       = rbh.template addConstBuffer<RangeScalar>(Y.getValues());
00308         rbh.end();
00309         wdp.numRows = numRows_;
00310         wdp.unitDiag = (diag == Teuchos::UNIT_DIAG ? true : false);
00311         wdp.upper    = (uplo == Teuchos::UPPER_TRI ? true : false);
00312         wdp.xstride = X.getStride();
00313         wdp.ystride = Y.getStride();
00314         const size_t numRHS = X.getNumCols();
00315         node_->template parallel_for<TOp1D>(0,numRHS,wdp);
00316       }
00317     }
00318     else {
00319       if (trans == Teuchos::NO_TRANS) {
00320         Op2D wdp;
00321         rbh.begin();
00322         wdp.numEntries = rbh.template addConstBuffer<size_t>(numEntries_);
00323         wdp.inds_beg   = rbh.template addConstBuffer<const Ordinal *>(indPtrs_);
00324         wdp.vals_beg   = rbh.template addConstBuffer< Scalar *>(valPtrs_);
00325         wdp.x       = rbh.template addNonConstBuffer<DomainScalar>(X.getValuesNonConst());
00326         wdp.y       = rbh.template addConstBuffer<RangeScalar>(Y.getValues());
00327         rbh.end();
00328         wdp.numRows = numRows_;
00329         wdp.unitDiag = (diag == Teuchos::UNIT_DIAG ? true : false);
00330         wdp.upper    = (uplo == Teuchos::UPPER_TRI ? true : false);
00331         wdp.xstride = X.getStride();
00332         wdp.ystride = Y.getStride();
00333         const size_t numRHS = X.getNumCols();
00334         node_->template parallel_for<Op2D>(0,numRHS,wdp);
00335       }
00336       else {
00337         TOp2D wdp;
00338         rbh.begin();
00339         wdp.numEntries = rbh.template addConstBuffer<size_t>(numEntries_);
00340         wdp.inds_beg   = rbh.template addConstBuffer<const Ordinal *>(indPtrs_);
00341         wdp.vals_beg   = rbh.template addConstBuffer< Scalar *>(valPtrs_);
00342         wdp.x       = rbh.template addNonConstBuffer<DomainScalar>(X.getValuesNonConst());
00343         wdp.y       = rbh.template addConstBuffer<RangeScalar>(Y.getValues());
00344         rbh.end();
00345         wdp.numRows = numRows_;
00346         wdp.unitDiag = (diag == Teuchos::UNIT_DIAG ? true : false);
00347         wdp.upper    = (uplo == Teuchos::UPPER_TRI ? true : false);
00348         wdp.xstride = X.getStride();
00349         wdp.ystride = Y.getStride();
00350         const size_t numRHS = X.getNumCols();
00351         node_->template parallel_for<TOp2D>(0,numRHS,wdp);
00352       }
00353     }
00354     return;
00355   }
00356 
00357 
00358   template <class Scalar, class Ordinal, class Node>
00359   template <class DomainScalar, class RangeScalar>
00360   void DefaultHostSparseOps<Scalar,Ordinal,Node>::multiply(
00361                                 Teuchos::ETransp trans, 
00362                                 RangeScalar alpha,
00363                                 const MultiVector<DomainScalar,Node> &X, 
00364                                 MultiVector<RangeScalar,Node> &Y) const 
00365   {
00366     // the 1 parameter to the template means that beta is ignored and the output multivector enjoys overwrite semantics
00367     typedef DefaultSparseMultiplyOp1<Scalar,Ordinal,DomainScalar,RangeScalar, 1>  Op1D;
00368     typedef DefaultSparseMultiplyOp2<Scalar,Ordinal,DomainScalar,RangeScalar, 1>  Op2D;
00369     typedef DefaultSparseTransposeMultiplyOp1<Scalar,Ordinal,DomainScalar,RangeScalar, 1> TOp1D;
00370     typedef DefaultSparseTransposeMultiplyOp2<Scalar,Ordinal,DomainScalar,RangeScalar, 1> TOp2D;
00371     TEST_FOR_EXCEPTION(indsInit_ == false || valsInit_ == false, std::runtime_error,
00372         Teuchos::typeName(*this) << "::multiply(): operation not fully initialized.");
00373     TEST_FOR_EXCEPT(X.getNumCols() != Y.getNumCols());
00374     ReadyBufferHelper<Node> rbh(node_);
00375     if (isEmpty_ == true) {
00376       // Y <= 0 * X
00377       //   <= 0
00378       DefaultArithmetic<MultiVector<RangeScalar,Node> >::Init(Y,Teuchos::ScalarTraits<RangeScalar>::zero());
00379     }
00380     else if (begs1D_ != null) {
00381       if (trans == Teuchos::NO_TRANS) {
00382         Op1D wdp;
00383         rbh.begin();
00384         wdp.alpha   = alpha;
00385         wdp.beta    = Teuchos::ScalarTraits<RangeScalar>::zero(); // not used
00386         wdp.numRows = numRows_;
00387         wdp.begs    = rbh.template addConstBuffer<size_t>(begs1D_);
00388         wdp.ends    = rbh.template addConstBuffer<size_t>(ends1D_);
00389         wdp.inds    = rbh.template addConstBuffer<Ordinal>(inds1D_);
00390         wdp.vals    = rbh.template addConstBuffer<Scalar>(vals1D_);
00391         wdp.x       = rbh.template addConstBuffer<DomainScalar>(X.getValues());
00392         wdp.y       = rbh.template addNonConstBuffer<RangeScalar>(Y.getValuesNonConst());
00393         wdp.xstride = X.getStride();
00394         wdp.ystride = Y.getStride();
00395         rbh.end();
00396         const size_t numRHS = X.getNumCols();
00397         node_->template parallel_for<Op1D>(0,numRows_*numRHS,wdp);
00398       }
00399       else {
00400         TOp1D wdp;
00401         rbh.begin();
00402         wdp.alpha   = alpha;
00403         wdp.beta    = Teuchos::ScalarTraits<RangeScalar>::zero(); // not used
00404         wdp.numRows = numRows_;
00405         wdp.numCols = Y.getNumRows();
00406         wdp.begs    = rbh.template addConstBuffer<size_t>(begs1D_);
00407         wdp.ends    = rbh.template addConstBuffer<size_t>(ends1D_);
00408         wdp.inds    = rbh.template addConstBuffer<Ordinal>(inds1D_);
00409         wdp.vals    = rbh.template addConstBuffer<Scalar>(vals1D_);
00410         wdp.x       = rbh.template addConstBuffer<DomainScalar>(X.getValues());
00411         wdp.y       = rbh.template addNonConstBuffer<RangeScalar>(Y.getValuesNonConst());
00412         wdp.xstride = X.getStride();
00413         wdp.ystride = Y.getStride();
00414         rbh.end();
00415         const size_t numRHS = X.getNumCols();
00416         node_->template parallel_for<TOp1D>(0,numRHS,wdp);
00417       }
00418     }
00419     else {
00420       if (trans == Teuchos::NO_TRANS) {
00421         Op2D wdp;
00422         rbh.begin();
00423         wdp.alpha   = alpha;
00424         wdp.beta    = Teuchos::ScalarTraits<RangeScalar>::zero(); // not used
00425         wdp.numRows = numRows_;
00426         wdp.numEntries = rbh.template addConstBuffer<size_t>(numEntries_);
00427         wdp.inds_beg   = rbh.template addConstBuffer<const Ordinal *>(indPtrs_);
00428         wdp.vals_beg   = rbh.template addConstBuffer<Scalar *>(valPtrs_);
00429         wdp.x          = rbh.template addConstBuffer<DomainScalar>(X.getValues());
00430         wdp.y          = rbh.template addNonConstBuffer<RangeScalar>(Y.getValuesNonConst());
00431         rbh.end();
00432         wdp.xstride = X.getStride();
00433         wdp.ystride = Y.getStride();
00434         const size_t numRHS = X.getNumCols();
00435         node_->template parallel_for<Op2D>(0,numRows_*numRHS,wdp);
00436       }
00437       else {
00438         TOp2D wdp;
00439         rbh.begin();
00440         wdp.alpha   = alpha;
00441         wdp.beta    = Teuchos::ScalarTraits<RangeScalar>::zero(); // not used
00442         wdp.numRows = numRows_;
00443         wdp.numCols = Y.getNumRows();
00444         wdp.numEntries = rbh.template addConstBuffer<size_t>(numEntries_);
00445         wdp.inds_beg   = rbh.template addConstBuffer<const Ordinal *>(indPtrs_);
00446         wdp.vals_beg   = rbh.template addConstBuffer<Scalar *>(valPtrs_);
00447         wdp.x          = rbh.template addConstBuffer<DomainScalar>(X.getValues());
00448         wdp.y          = rbh.template addNonConstBuffer<RangeScalar>(Y.getValuesNonConst());
00449         wdp.xstride = X.getStride();
00450         wdp.ystride = Y.getStride();
00451         rbh.end();
00452         const size_t numRHS = X.getNumCols();
00453         node_->template parallel_for<TOp2D>(0,numRHS,wdp);
00454       }
00455     }
00456     return;
00457   }
00458 
00459 
00460   template <class Scalar, class Ordinal, class Node>
00461   template <class DomainScalar, class RangeScalar>
00462   void DefaultHostSparseOps<Scalar,Ordinal,Node>::multiply(
00463                                 Teuchos::ETransp trans, 
00464                                 RangeScalar alpha, const MultiVector<DomainScalar,Node> &X, 
00465                                 RangeScalar beta, MultiVector<RangeScalar,Node> &Y) const 
00466   {
00467     // the 0 parameter means that beta is considered, and the output multivector enjoys accumulate semantics
00468     typedef DefaultSparseMultiplyOp1<Scalar,Ordinal,DomainScalar,RangeScalar, 0>  Op1D;
00469     typedef DefaultSparseMultiplyOp2<Scalar,Ordinal,DomainScalar,RangeScalar, 0>  Op2D;
00470     typedef DefaultSparseTransposeMultiplyOp1<Scalar,Ordinal,DomainScalar,RangeScalar, 0> TOp1D;
00471     typedef DefaultSparseTransposeMultiplyOp2<Scalar,Ordinal,DomainScalar,RangeScalar, 0> TOp2D;
00472     TEST_FOR_EXCEPTION(indsInit_ == false || valsInit_ == false, std::runtime_error,
00473         Teuchos::typeName(*this) << "::multiply(): operation not fully initialized.");
00474     TEST_FOR_EXCEPT(X.getNumCols() != Y.getNumCols());
00475     ReadyBufferHelper<Node> rbh(node_);
00476     if (isEmpty_ == true) {
00477       // Y <= alpha * 0 * X + beta * Y 
00478       //   <= beta * Y
00479       // NOTE: this neglects NaNs in X, which don't satisfy 0*NaN == 0
00480       //       however, X and Y may be of different size, and we need entries to determine how to mix those potential NaNs in X into Y
00481       //       therefore, the best we can do is scale Y to zero. Setting Y to zero would destroy NaNs in Y, which violates the semantics of the call.
00482       DefaultArithmetic<MultiVector<RangeScalar,Node> >::Scale(Y,beta);
00483     }
00484     else if (begs1D_ != null) {
00485       if (trans == Teuchos::NO_TRANS) {
00486         Op1D wdp;
00487         rbh.begin();
00488         wdp.alpha   = alpha;
00489         wdp.beta    = beta;
00490         wdp.numRows = numRows_;
00491         wdp.begs    = rbh.template addConstBuffer<size_t>(begs1D_);
00492         wdp.ends    = rbh.template addConstBuffer<size_t>(ends1D_);
00493         wdp.inds    = rbh.template addConstBuffer<Ordinal>(inds1D_);
00494         wdp.vals    = rbh.template addConstBuffer< Scalar>(vals1D_);
00495         wdp.x       = rbh.template addConstBuffer<DomainScalar>(X.getValues());
00496         wdp.y       = rbh.template addNonConstBuffer<RangeScalar>(Y.getValuesNonConst());
00497         wdp.xstride = X.getStride();
00498         wdp.ystride = Y.getStride();
00499         rbh.end();
00500         const size_t numRHS = X.getNumCols();
00501         node_->template parallel_for<Op1D>(0,numRows_*numRHS,wdp);
00502       }
00503       else {
00504         TOp1D wdp;
00505         rbh.begin();
00506         wdp.alpha   = alpha;
00507         wdp.beta    = beta;
00508         wdp.numRows = numRows_;
00509         wdp.numCols = Y.getNumRows();
00510         wdp.begs    = rbh.template addConstBuffer<size_t>(begs1D_);
00511         wdp.ends    = rbh.template addConstBuffer<size_t>(ends1D_);
00512         wdp.inds    = rbh.template addConstBuffer<Ordinal>(inds1D_);
00513         wdp.vals    = rbh.template addConstBuffer< Scalar>(vals1D_);
00514         wdp.x       = rbh.template addConstBuffer<DomainScalar>(X.getValues());
00515         wdp.y       = rbh.template addNonConstBuffer<RangeScalar>(Y.getValuesNonConst());
00516         wdp.xstride = X.getStride();
00517         wdp.ystride = Y.getStride();
00518         rbh.end();
00519         const size_t numRHS = X.getNumCols();
00520         node_->template parallel_for<TOp1D>(0,numRHS,wdp);
00521       }
00522     }
00523     else {
00524       if (trans == Teuchos::NO_TRANS) {
00525         Op2D wdp;
00526         rbh.begin();
00527         wdp.numRows = numRows_;
00528         wdp.numEntries = rbh.template addConstBuffer<size_t>(numEntries_);
00529         wdp.inds_beg   = rbh.template addConstBuffer<const Ordinal *>(indPtrs_);
00530         wdp.vals_beg   = rbh.template addConstBuffer<Scalar *>(valPtrs_);
00531         wdp.x          = rbh.template addConstBuffer<DomainScalar>(X.getValues());
00532         wdp.y          = rbh.template addNonConstBuffer<RangeScalar>(Y.getValuesNonConst());
00533         rbh.end();
00534         wdp.alpha   = alpha;
00535         wdp.beta    = beta;
00536         wdp.xstride = X.getStride();
00537         wdp.ystride = Y.getStride();
00538         const size_t numRHS = X.getNumCols();
00539         node_->template parallel_for<Op2D>(0,numRows_*numRHS,wdp);
00540       }
00541       else {
00542         TOp2D wdp;
00543         rbh.begin();
00544         wdp.alpha   = alpha;
00545         wdp.beta    = beta;
00546         wdp.numRows = numRows_;
00547         wdp.numCols = Y.getNumRows();
00548         wdp.numEntries = rbh.template addConstBuffer<size_t>(numEntries_);
00549         wdp.inds_beg   = rbh.template addConstBuffer<const Ordinal *>(indPtrs_);
00550         wdp.vals_beg   = rbh.template addConstBuffer<Scalar *>(valPtrs_);
00551         wdp.x          = rbh.template addConstBuffer<DomainScalar>(X.getValues());
00552         wdp.y          = rbh.template addNonConstBuffer<RangeScalar>(Y.getValuesNonConst());
00553         wdp.xstride = X.getStride();
00554         wdp.ystride = Y.getStride();
00555         rbh.end();
00556         const size_t numRHS = X.getNumCols();
00557         node_->template parallel_for<TOp2D>(0,numRHS,wdp);
00558       }
00559     }
00560     return;
00561   }
00562 
00563 
00564 
00565 
00566   template <class Scalar, class Ordinal, class Node>
00567   template <class VectorScalar>
00568   void DefaultHostSparseOps<Scalar,Ordinal,Node>::leftScale(const MultiVector<VectorScalar,Node> &X) 
00569   {
00570     typedef DefaultSparseScaleOp1<Scalar,Ordinal,VectorScalar>  Op1D;
00571     typedef DefaultSparseScaleOp2<Scalar,Ordinal,VectorScalar>  Op2D;
00572     TEST_FOR_EXCEPTION(indsInit_ == false || valsInit_ == false, std::runtime_error,
00573         Teuchos::typeName(*this) << "::leftScale(): operation not fully initialized.");
00574     TEST_FOR_EXCEPT(X.getNumCols() != 1);
00575     ReadyBufferHelper<Node> rbh(node_);
00576     if (begs1D_ != null) {
00577       Op1D wdp;
00578       rbh.begin();
00579       wdp.numRows = numRows_;
00580       wdp.begs    = rbh.template addConstBuffer<size_t>(begs1D_);
00581       wdp.ends    = rbh.template addConstBuffer<size_t>(ends1D_);
00582       wdp.inds    = rbh.template addConstBuffer<Ordinal>(inds1D_);
00583       wdp.vals    = rbh.template addNonConstBuffer<Scalar>(vals1D_);
00584       wdp.x       = rbh.template addConstBuffer<VectorScalar>(X.getValues());
00585       rbh.end();
00586       node_->template parallel_for<Op1D>(0,numRows_,wdp); 
00587     }
00588     else {
00589       Op2D wdp;
00590       rbh.begin();
00591       wdp.numRows = numRows_;
00592       wdp.numEntries = rbh.template addConstBuffer<size_t>(numEntries_);
00593       wdp.inds_beg   = rbh.template addConstBuffer<const Ordinal *>(indPtrs_);
00594       wdp.vals_beg   = rbh.template addNonConstBuffer<Scalar *>(valPtrs_);
00595       wdp.x          = rbh.template addConstBuffer<VectorScalar>(X.getValues());
00596       rbh.end();
00597       node_->template parallel_for<Op2D>(0,numRows_,wdp);
00598     }
00599     return;
00600   }
00601 
00602 
00606   template <class Scalar, class Ordinal, class Node>
00607   class DefaultDeviceSparseOps {
00608   public:
00609     typedef Scalar  ScalarType;
00610     typedef Ordinal OrdinalType;
00611     typedef Node    NodeType;
00612 
00613     template <class S2>
00614     struct rebind {
00615       typedef DefaultDeviceSparseOps<S2,Ordinal,Node> other;
00616     };
00617 
00619 
00621 
00623     DefaultDeviceSparseOps(const RCP<Node> &node);
00624 
00626     ~DefaultDeviceSparseOps();
00627 
00629 
00631 
00632     
00634     RCP<Node> getNode() const;
00635 
00637 
00639 
00641 
00643     void initializeStructure(const CrsGraphDeviceCompute<Ordinal,Node,DefaultDeviceSparseOps<void,Ordinal,Node> > &graph);
00644 
00646     void initializeValues(const CrsMatrixDeviceCompute<Scalar,Ordinal,Node,DefaultDeviceSparseOps<void,Ordinal,Node> > &matrix);
00647 
00649     void clear();
00650 
00652 
00654 
00656 
00658     template <class DomainScalar, class RangeScalar>
00659     void multiply(Teuchos::ETransp trans, RangeScalar alpha, const MultiVector<DomainScalar,Node> &X, MultiVector<RangeScalar,Node> &Y) const;
00660 
00662     template <class DomainScalar, class RangeScalar>
00663     void multiply(Teuchos::ETransp trans, 
00664                   RangeScalar alpha, const MultiVector<DomainScalar,Node> &X, RangeScalar beta, MultiVector<RangeScalar,Node> &Y) const;
00665 
00667     template <class DomainScalar, class RangeScalar>
00668     void solve(Teuchos::ETransp trans, Teuchos::EUplo uplo, Teuchos::EDiag diag, 
00669                const MultiVector<DomainScalar,Node> &Y, MultiVector<RangeScalar,Node> &X) const;
00670 
00671 
00673     template <class VectorScalar>
00674     void leftScale(const MultiVector<VectorScalar,Node> &X);
00675 
00677 
00678   protected:
00680     DefaultDeviceSparseOps(const DefaultDeviceSparseOps& source);
00681 
00682     RCP<Node> node_;
00683 
00684     // we do this one of two ways: 
00685     // 1D: array of offsets, pointer for ordinals, pointer for values.
00686     ArrayRCP<const size_t>  pbuf_offsets1D_;
00687     ArrayRCP<const Ordinal> pbuf_inds1D_;
00688     ArrayRCP<Scalar>  pbuf_vals1D_;
00689 
00690     size_t numRows_;
00691     bool indsInit_, valsInit_, isEmpty_;
00692   };
00693 
00694   template<class Scalar, class Ordinal, class Node>
00695   DefaultDeviceSparseOps<Scalar,Ordinal,Node>::DefaultDeviceSparseOps(const RCP<Node> &node)
00696   : node_(node) 
00697   {
00698     clear();
00699   }
00700 
00701   template<class Scalar, class Ordinal, class Node>
00702   DefaultDeviceSparseOps<Scalar,Ordinal,Node>::~DefaultDeviceSparseOps() {
00703   }
00704 
00705   template <class Scalar, class Ordinal, class Node>
00706   RCP<Node> DefaultDeviceSparseOps<Scalar,Ordinal,Node>::getNode() const { 
00707     return node_; 
00708   }
00709 
00710   template <class Scalar, class Ordinal, class Node>
00711   void DefaultDeviceSparseOps<Scalar,Ordinal,Node>::clear() {
00712     pbuf_offsets1D_  = null;
00713     pbuf_inds1D_     = null;
00714     pbuf_vals1D_     = null;
00715     numRows_  = 0;
00716     indsInit_ = false;
00717     valsInit_ = false;
00718     isEmpty_  = false;
00719   }
00720 
00721   template <class Scalar, class Ordinal, class Node>
00722   void DefaultDeviceSparseOps<Scalar,Ordinal,Node>::initializeStructure(const CrsGraphDeviceCompute<Ordinal,Node,DefaultDeviceSparseOps<void,Ordinal,Node> > &graph) {
00723     TEST_FOR_EXCEPTION(indsInit_ == true || valsInit_ == true, std::runtime_error, 
00724         Teuchos::typeName(*this) << "::initializeStructure(): structure already initialized.");
00725     numRows_ = graph.getNumRows();
00726     if (graph.isEmpty() || numRows_ == 0) {
00727       isEmpty_ = true;
00728     }
00729     else {
00730       ArrayRCP<Ordinal> inds;
00731       ArrayRCP<size_t > offs;
00732       const_cast<CrsGraphDeviceCompute<Ordinal,Node,DefaultDeviceSparseOps<void,Ordinal,Node> > &>(graph).getDeviceBuffers(inds, offs);
00733       pbuf_inds1D_    = inds;
00734       pbuf_offsets1D_ = offs;
00735     }
00736     indsInit_ = true;
00737   }
00738 
00739   template <class Scalar, class Ordinal, class Node>
00740   void DefaultDeviceSparseOps<Scalar,Ordinal,Node>::initializeValues(const CrsMatrixDeviceCompute<Scalar,Ordinal,Node,DefaultDeviceSparseOps<void,Ordinal,Node> > &matrix) {
00741     TEST_FOR_EXCEPTION(indsInit_ == false, std::runtime_error, 
00742         Teuchos::typeName(*this) << "::initializeValues(): must initialize values after graph.");
00743     TEST_FOR_EXCEPTION(numRows_ != matrix.getNumRows() || isEmpty_ != matrix.isEmpty(), std::runtime_error,
00744         Teuchos::typeName(*this) << "::initializeValues(): matrix not compatible with previously supplied graph.");
00745     if (!isEmpty_) {
00746       ArrayRCP<Scalar> vals;
00747       const_cast<CrsMatrixDeviceCompute<Scalar,Ordinal,Node,DefaultDeviceSparseOps<void,Ordinal,Node> > &>(matrix).getDeviceBuffer(vals);
00748       pbuf_vals1D_ = vals;
00749     }
00750     valsInit_ = true;
00751   }
00752 
00753 
00754   template <class Scalar, class Ordinal, class Node>
00755   template <class DomainScalar, class RangeScalar>
00756   void DefaultDeviceSparseOps<Scalar,Ordinal,Node>::solve(
00757                       Teuchos::ETransp trans, Teuchos::EUplo uplo, Teuchos::EDiag diag, 
00758                       const MultiVector<DomainScalar,Node> &Y,
00759                             MultiVector<RangeScalar,Node> &X) const {
00760     typedef DefaultSparseSolveOp1<Scalar,Ordinal,DomainScalar,RangeScalar>  Op1D;
00761     typedef DefaultSparseTransposeSolveOp1<Scalar,Ordinal,DomainScalar,RangeScalar>  TOp1D;
00762     TEST_FOR_EXCEPTION(indsInit_ == false || valsInit_ == false, std::runtime_error,
00763         Teuchos::typeName(*this) << "::solve(): this solve was not fully initialized.");
00764     TEST_FOR_EXCEPTION(X.getNumCols() != Y.getNumCols(), std::runtime_error,
00765         Teuchos::typeName(*this) << "::solve(): Left hand side and right hand side multivectors have differing numbers of vectors.");
00766     TEST_FOR_EXCEPTION(X.getNumRows() < numRows_, std::runtime_error,
00767         Teuchos::typeName(*this) << "::solve(): Left-hand-side multivector does not have enough rows. Likely cause is that the column map was not provided to the Tpetra::CrsMatrix in the case of an implicit unit diagonal.");
00768 
00769     ReadyBufferHelper<Node> rbh(node_);
00770     if (numRows_ == 0) {
00771       // null op
00772     }
00773     else if (isEmpty_) {
00774       TEST_FOR_EXCEPTION(diag != Teuchos::UNIT_DIAG, std::runtime_error,
00775           Teuchos::typeName(*this) << "::solve(): solve of empty matrix only valid for an implicit unit diagonal.");
00776       // solve I * X = Y for X = Y
00777       DefaultArithmetic<MultiVector<RangeScalar,Node> >::Assign(X,Y);
00778     }
00779     else {
00780       if (trans == Teuchos::NO_TRANS) {
00781         Op1D wdp;
00782         rbh.begin();
00783         wdp.begs    = rbh.template addConstBuffer<size_t>(pbuf_offsets1D_);
00784         wdp.ends    = wdp.begs+1;
00785         wdp.inds    = rbh.template addConstBuffer<Ordinal>(pbuf_inds1D_);
00786         wdp.vals    = rbh.template addConstBuffer< Scalar>(pbuf_vals1D_);
00787         wdp.x       = rbh.template addNonConstBuffer<DomainScalar>(X.getValuesNonConst());
00788         wdp.y       = rbh.template addConstBuffer<RangeScalar>(Y.getValues());
00789         rbh.end();
00790         wdp.numRows = numRows_;
00791         wdp.unitDiag = (diag == Teuchos::UNIT_DIAG ? true : false);
00792         wdp.upper    = (uplo == Teuchos::UPPER_TRI ? true : false);
00793         wdp.xstride = X.getStride();
00794         wdp.ystride = Y.getStride();
00795         const size_t numRHS = X.getNumCols();
00796         node_->template parallel_for<Op1D>(0,numRHS,wdp);
00797       }
00798       else {
00799         TOp1D wdp;
00800         rbh.begin();
00801         wdp.begs    = rbh.template addConstBuffer<size_t>(pbuf_offsets1D_);
00802         wdp.ends    = wdp.begs+1;
00803         wdp.inds    = rbh.template addConstBuffer<Ordinal>(pbuf_inds1D_);
00804         wdp.vals    = rbh.template addConstBuffer< Scalar>(pbuf_vals1D_);
00805         wdp.x       = rbh.template addNonConstBuffer<DomainScalar>(X.getValuesNonConst());
00806         wdp.y       = rbh.template addConstBuffer<RangeScalar>(Y.getValues());
00807         rbh.end();
00808         wdp.numRows = numRows_;
00809         wdp.unitDiag = (diag == Teuchos::UNIT_DIAG ? true : false);
00810         wdp.upper    = (uplo == Teuchos::UPPER_TRI ? true : false);
00811         wdp.xstride = X.getStride();
00812         wdp.ystride = Y.getStride();
00813         const size_t numRHS = X.getNumCols();
00814         node_->template parallel_for<TOp1D>(0,numRHS,wdp);
00815       }
00816     }
00817     return;
00818   }
00819 
00820 
00821   template <class Scalar, class Ordinal, class Node>
00822   template <class DomainScalar, class RangeScalar>
00823   void DefaultDeviceSparseOps<Scalar,Ordinal,Node>::multiply(
00824                                 Teuchos::ETransp trans, 
00825                                 RangeScalar alpha,
00826                                 const MultiVector<DomainScalar,Node> &X, 
00827                                 MultiVector<RangeScalar,Node> &Y) const {
00828     // the 1 parameter to the template means that beta is ignored and the output multivector enjoys overwrite semantics
00829     typedef DefaultSparseMultiplyOp1<Scalar,Ordinal,DomainScalar,RangeScalar, 1>  Op1D;
00830     typedef DefaultSparseTransposeMultiplyOp1<Scalar,Ordinal,DomainScalar,RangeScalar, 1> TOp1D;
00831     TEST_FOR_EXCEPTION(indsInit_ == false || valsInit_ == false, std::runtime_error,
00832         Teuchos::typeName(*this) << "::multiply(): operation not fully initialized.");
00833     TEST_FOR_EXCEPT(X.getNumCols() != Y.getNumCols());
00834     ReadyBufferHelper<Node> rbh(node_);
00835     if (isEmpty_ == true) {
00836       // Y <= 0 * X
00837       //   <= 0
00838       DefaultArithmetic<MultiVector<RangeScalar,Node> >::Init(Y,Teuchos::ScalarTraits<RangeScalar>::zero());
00839     }
00840     else {
00841       if (trans == Teuchos::NO_TRANS) {
00842         Op1D wdp;
00843         rbh.begin();
00844         wdp.alpha   = alpha;
00845         wdp.beta    = Teuchos::ScalarTraits<RangeScalar>::zero(); // not used
00846         wdp.numRows = numRows_;
00847         wdp.begs    = rbh.template addConstBuffer<size_t>(pbuf_offsets1D_);
00848         wdp.ends    = wdp.begs+1;
00849         wdp.inds    = rbh.template addConstBuffer<Ordinal>(pbuf_inds1D_);
00850         wdp.vals    = rbh.template addConstBuffer<Scalar>(pbuf_vals1D_);
00851         wdp.x       = rbh.template addConstBuffer<DomainScalar>(X.getValues());
00852         wdp.y       = rbh.template addNonConstBuffer<RangeScalar>(Y.getValuesNonConst());
00853         wdp.xstride = X.getStride();
00854         wdp.ystride = Y.getStride();
00855         rbh.end();
00856         const size_t numRHS = X.getNumCols();
00857         node_->template parallel_for<Op1D>(0,numRows_*numRHS,wdp);
00858       }
00859       else {
00860         TOp1D wdp;
00861         rbh.begin();
00862         wdp.alpha   = alpha;
00863         wdp.beta    = Teuchos::ScalarTraits<RangeScalar>::zero(); // not used
00864         wdp.numRows = numRows_;
00865         wdp.numCols = Y.getNumRows();
00866         wdp.begs    = rbh.template addConstBuffer<size_t>(pbuf_offsets1D_);
00867         wdp.ends    = wdp.begs+1;
00868         wdp.inds    = rbh.template addConstBuffer<Ordinal>(pbuf_inds1D_);
00869         wdp.vals    = rbh.template addConstBuffer<Scalar>(pbuf_vals1D_);
00870         wdp.x       = rbh.template addConstBuffer<DomainScalar>(X.getValues());
00871         wdp.y       = rbh.template addNonConstBuffer<RangeScalar>(Y.getValuesNonConst());
00872         wdp.xstride = X.getStride();
00873         wdp.ystride = Y.getStride();
00874         rbh.end();
00875         const size_t numRHS = X.getNumCols();
00876         node_->template parallel_for<TOp1D>(0,numRHS,wdp);
00877       }
00878     }
00879     return;
00880   }
00881 
00882 
00883   template <class Scalar, class Ordinal, class Node>
00884   template <class DomainScalar, class RangeScalar>
00885   void DefaultDeviceSparseOps<Scalar,Ordinal,Node>::multiply(
00886                                 Teuchos::ETransp trans, 
00887                                 RangeScalar alpha, const MultiVector<DomainScalar,Node> &X, 
00888                                 RangeScalar beta, MultiVector<RangeScalar,Node> &Y) const {
00889     // the 0 parameter means that beta is considered, and the output multivector enjoys accumulate semantics
00890     typedef DefaultSparseMultiplyOp1<Scalar,Ordinal,DomainScalar,RangeScalar, 0>  Op1D;
00891     typedef DefaultSparseTransposeMultiplyOp1<Scalar,Ordinal,DomainScalar,RangeScalar, 0> TOp1D;
00892     TEST_FOR_EXCEPTION(indsInit_ == false || valsInit_ == false, std::runtime_error,
00893         Teuchos::typeName(*this) << "::multiply(): operation not fully initialized.");
00894     TEST_FOR_EXCEPT(X.getNumCols() != Y.getNumCols());
00895     ReadyBufferHelper<Node> rbh(node_);
00896     if (isEmpty_ == true) {
00897       // Y <= alpha * 0 * X + beta * Y 
00898       //   <= beta * Y
00899       // NOTE: this neglects NaNs in X, which don't satisfy 0*NaN == 0
00900       //       however, X and Y may be of different size, and we need entries to determine how to mix those potential NaNs in X into Y
00901       //       therefore, the best we can do is scale Y to zero. Setting Y to zero would destroy NaNs in Y, which violates the semantics of the call.
00902       DefaultArithmetic<MultiVector<RangeScalar,Node> >::Scale(Y,beta);
00903     }
00904     else {
00905       if (trans == Teuchos::NO_TRANS) {
00906         Op1D wdp;
00907         rbh.begin();
00908         wdp.alpha   = alpha;
00909         wdp.beta    = beta;
00910         wdp.numRows = numRows_;
00911         wdp.begs    = rbh.template addConstBuffer<size_t>(pbuf_offsets1D_);
00912         wdp.ends    = wdp.begs+1;
00913         wdp.inds    = rbh.template addConstBuffer<Ordinal>(pbuf_inds1D_);
00914         wdp.vals    = rbh.template addConstBuffer< Scalar>(pbuf_vals1D_);
00915         wdp.x       = rbh.template addConstBuffer<DomainScalar>(X.getValues());
00916         wdp.y       = rbh.template addNonConstBuffer<RangeScalar>(Y.getValuesNonConst());
00917         wdp.xstride = X.getStride();
00918         wdp.ystride = Y.getStride();
00919         rbh.end();
00920         const size_t numRHS = X.getNumCols();
00921         node_->template parallel_for<Op1D>(0,numRows_*numRHS,wdp);
00922       }
00923       else {
00924         TOp1D wdp;
00925         rbh.begin();
00926         wdp.alpha   = alpha;
00927         wdp.beta    = beta;
00928         wdp.numRows = numRows_;
00929         wdp.numCols = Y.getNumRows();
00930         wdp.begs    = rbh.template addConstBuffer<size_t>(pbuf_offsets1D_);
00931         wdp.ends    = wdp.begs+1;
00932         wdp.inds    = rbh.template addConstBuffer<Ordinal>(pbuf_inds1D_);
00933         wdp.vals    = rbh.template addConstBuffer< Scalar>(pbuf_vals1D_);
00934         wdp.x       = rbh.template addConstBuffer<DomainScalar>(X.getValues());
00935         wdp.y       = rbh.template addNonConstBuffer<RangeScalar>(Y.getValuesNonConst());
00936         wdp.xstride = X.getStride();
00937         wdp.ystride = Y.getStride();
00938         rbh.end();
00939         const size_t numRHS = X.getNumCols();
00940         node_->template parallel_for<TOp1D>(0,numRHS,wdp);
00941       }
00942     }
00943     return;
00944   }
00945 
00946 
00947 
00948 
00949 
00950   template <class Scalar, class Ordinal, class Node>
00951   template <class VectorScalar>
00952   void DefaultDeviceSparseOps<Scalar,Ordinal,Node>::leftScale(const MultiVector<VectorScalar,Node> &X)
00953   {
00954     typedef DefaultSparseScaleOp1<Scalar,Ordinal,VectorScalar>  Op1D;
00955     TEST_FOR_EXCEPTION(indsInit_ == false || valsInit_ == false, std::runtime_error,
00956         Teuchos::typeName(*this) << "::scale(): operation not fully initialized.");
00957     TEST_FOR_EXCEPT(X.getNumCols() != 1);
00958     ReadyBufferHelper<Node> rbh(node_);
00959 
00960 
00961     Op1D wdp;
00962     rbh.begin();
00963     wdp.numRows = numRows_;
00964     wdp.begs    = rbh.template addConstBuffer<size_t>(pbuf_offsets1D_);
00965     wdp.ends    = wdp.begs+1;
00966     wdp.inds    = rbh.template addConstBuffer<Ordinal>(pbuf_inds1D_);
00967     wdp.vals    = rbh.template addNonConstBuffer<Scalar>(pbuf_vals1D_);
00968     wdp.x       = rbh.template addConstBuffer<VectorScalar>(X.getValues());
00969     rbh.end();
00970     const size_t numRHS = X.getNumCols();
00971     node_->template parallel_for<Op1D>(0,numRows_*numRHS,wdp);
00972     
00973     return;
00974   }
00975 
00976 
00977 
00978 
00979 
00984 } // namespace Kokkos
00985 
00986 #endif /* KOKKOS_DEFAULTSPARSEOPS_HPP */
00987 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends