Kokkos Node API and Local Linear Algebra Kernels Version of the Day
Kokkos_DefaultSparseOps.hpp
00001 //@HEADER
00002 // ************************************************************************
00003 //
00004 //          Kokkos: Node API and Parallel Node Kernels
00005 //              Copyright (2008) Sandia Corporation
00006 //
00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00008 // the U.S. Government retains certain rights in this software.
00009 //
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00038 //
00039 // ************************************************************************
00040 //@HEADER
00041 
00042 #ifndef KOKKOS_DEFAULTSPARSEOPS_HPP
00043 #define KOKKOS_DEFAULTSPARSEOPS_HPP
00044 
00045 #include <Teuchos_DataAccess.hpp>
00046 #include <Teuchos_CompileTimeAssert.hpp>
00047 #include <stdexcept>
00048 
00049 #include "Kokkos_ConfigDefs.hpp"
00050 #include "Kokkos_CrsMatrixBase.hpp"
00051 #include "Kokkos_CrsGraphBase.hpp"
00052 
00053 #include "Kokkos_MultiVector.hpp"
00054 #include "Kokkos_NodeHelpers.hpp"
00055 #include "Kokkos_DefaultArithmetic.hpp"
00056 #include "Kokkos_DefaultSparseSolveKernelOps.hpp"
00057 #include "Kokkos_DefaultSparseMultiplyKernelOps.hpp"
00058 
00059 namespace Kokkos {
00060 
00062 
00064   template <class Ordinal,
00065             class Node>
00066   class DefaultCrsGraph : public CrsGraphBase<Ordinal,Node>
00067   {
00068     public:
00069       DefaultCrsGraph(size_t numRows, const RCP<Node> &node, const RCP<ParameterList> &params);
00070       bool isEmpty() const;
00071       void setStructure(const ArrayRCP<const size_t>  &ptrs,
00072                         const ArrayRCP<const Ordinal> &inds);
00073       inline ArrayRCP<const size_t> getPointers() const;
00074       inline ArrayRCP<const Ordinal> getIndices() const;
00075       inline bool isInitialized() const;
00076       inline void setMatDesc(Teuchos::EUplo uplo, Teuchos::EDiag diag);
00077       inline void getMatDesc(Teuchos::EUplo &uplo, Teuchos::EDiag &diag) const;
00078     private:
00079       ArrayRCP<const size_t>  ptrs_;
00080       ArrayRCP<const Ordinal> inds_;
00081       bool isInitialized_;
00082       bool isEmpty_;
00083       Teuchos::EUplo  tri_uplo_;
00084       Teuchos::EDiag unit_diag_;
00085   };
00086 
00088 
00090   template <class Scalar,
00091             class Ordinal,
00092             class Node>
00093   class DefaultCrsMatrix : public CrsMatrixBase<Scalar,Ordinal,Node>
00094   {
00095     public:
00096       DefaultCrsMatrix(const RCP<const DefaultCrsGraph<Ordinal,Node> > &graph, const RCP<ParameterList> &params);
00097       void setValues(const ArrayRCP<const Scalar> &vals);
00098       inline ArrayRCP<const Scalar> getValues() const;
00099       inline bool isInitialized() const;
00100     private:
00101       ArrayRCP<const Scalar> vals_;
00102       bool isInitialized_;
00103   };
00104 
00105   template <class Ordinal, class Node>
00106   DefaultCrsGraph<Ordinal,Node>::DefaultCrsGraph(size_t numRows, const RCP<Node> &node, const RCP<ParameterList> &params)
00107   : CrsGraphBase<Ordinal,Node>(numRows,node,params)
00108   , isInitialized_(false)
00109   , isEmpty_(false)
00110   {
00111     // Make sure that users only specialize for Kokkos Node types that are host Nodes (vs. device Nodes, such as GPU Nodes)
00112     Teuchos::CompileTimeAssert<Node::isHostNode == false> cta; (void)cta;
00113   }
00114 
00115   template <class Ordinal, class Node>
00116   bool DefaultCrsGraph<Ordinal,Node>::isEmpty() const
00117   { return isEmpty_; }
00118 
00119   template <class Ordinal, class Node>
00120   void DefaultCrsGraph<Ordinal,Node>::setStructure(
00121                       const ArrayRCP<const size_t>  &ptrs,
00122                       const ArrayRCP<const Ordinal> &inds)
00123   {
00124     std::string tfecfFuncName("setStructure(ptrs,inds)");
00125     const size_t numrows = this->getNumRows();
00126 
00127     // mfh 19 June 2012: The tests expect std::runtime_error rather
00128     // than the arguably more appropriate std::invalid_argument, so
00129     // I'll throw std::runtime_error here.  Ditto for the other checks
00130     // below.
00131     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00132       ptrs.is_null (),
00133       std::runtime_error,
00134       ": The input array 'ptrs' must be nonnull, even for a matrix with zero "
00135       "rows.");
00136 
00137     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00138       (size_t) ptrs.size() != numrows+1,
00139       std::runtime_error,
00140       ": Graph input data are not coherent:\n"
00141       "-- ptrs.size() = " << ptrs.size() << " != numrows+1 = "
00142       << (numrows+1) << ".");
00143 
00144     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00145       ptrs[0] != 0,
00146       std::runtime_error,
00147       ": Graph input data are not coherent:\n"
00148       "-- ptrs[0] = " << ptrs[0] << " != 0.");
00149 
00150     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00151       (size_t) inds.size() != ptrs[numrows],
00152       std::runtime_error,
00153       ": Graph input data are not coherent:\n"
00154       "-- inds.size() = " << inds.size() << " != ptrs[numrows="
00155       << numrows << "] = " << ptrs[numrows] << ".");
00156 
00157     const size_t numEntries = ptrs[numrows];
00158     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00159       isInitialized_,
00160       std::runtime_error,
00161       " matrix has already been initialized."
00162     )
00163     if (numrows == 0 || numEntries == 0) isEmpty_ = true;
00164     ptrs_ = ptrs;
00165     inds_ = inds;
00166     isInitialized_ = true;
00167   }
00168 
00169   template <class Ordinal, class Node>
00170   ArrayRCP<const size_t> DefaultCrsGraph<Ordinal,Node>::getPointers() const
00171   { return ptrs_; }
00172 
00173   template <class Ordinal, class Node>
00174   ArrayRCP<const Ordinal> DefaultCrsGraph<Ordinal,Node>::getIndices() const
00175   { return inds_; }
00176 
00177 
00178   template <class Ordinal, class Node>
00179   bool DefaultCrsGraph<Ordinal,Node>::isInitialized() const
00180   { return isInitialized_; }
00181 
00182   template <class Ordinal, class Node>
00183   void DefaultCrsGraph<Ordinal,Node>::setMatDesc(Teuchos::EUplo uplo, Teuchos::EDiag diag)
00184   {
00185     tri_uplo_ = uplo;
00186     unit_diag_ = diag;
00187   }
00188 
00189   template <class Ordinal, class Node>
00190   void DefaultCrsGraph<Ordinal,Node>::getMatDesc(Teuchos::EUplo &uplo, Teuchos::EDiag &diag) const
00191   {
00192     uplo = tri_uplo_;
00193     diag = unit_diag_;
00194   }
00195 
00196   template <class Scalar, class Ordinal, class Node>
00197   DefaultCrsMatrix<Scalar,Ordinal,Node>::DefaultCrsMatrix(const RCP<const DefaultCrsGraph<Ordinal,Node> > &graph, const RCP<ParameterList> &params)
00198   : CrsMatrixBase<Scalar,Ordinal,Node>(graph,params)
00199   , isInitialized_(false)
00200   {
00201     // Make sure that users only specialize for Kokkos Node types that are host Nodes (vs. device Nodes, such as GPU Nodes)
00202     Teuchos::CompileTimeAssert<Node::isHostNode == false> cta; (void)cta;
00203   }
00204 
00205   template <class Scalar, class Ordinal, class Node>
00206   void DefaultCrsMatrix<Scalar,Ordinal,Node>::setValues(const ArrayRCP<const Scalar> &vals)
00207   {
00208     std::string tfecfFuncName("setValues(vals)");
00209     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00210         isInitialized_ == true,
00211         std::runtime_error, " matrix is already initialized."
00212     )
00213     vals_ = vals;
00214     isInitialized_ = true;
00215   }
00216 
00217   template <class Scalar, class Ordinal, class Node>
00218   ArrayRCP<const Scalar> DefaultCrsMatrix<Scalar,Ordinal,Node>::getValues() const
00219   { return vals_; }
00220 
00221   template <class Scalar, class Ordinal, class Node>
00222   bool DefaultCrsMatrix<Scalar,Ordinal,Node>::isInitialized() const
00223   {
00224     return isInitialized_;
00225   }
00226 
00235   template <class Scalar, class Ordinal, class Node>
00236   class DefaultHostSparseOps {
00237   public:
00239 
00240 
00242     typedef Scalar  scalar_type;
00244     typedef Ordinal ordinal_type;
00246     typedef Node    node_type;
00248     typedef DefaultHostSparseOps<Scalar,Ordinal,Node> sparse_ops_type;
00249 
00251     template <class O, class N>
00252     struct graph {
00253       typedef DefaultCrsGraph<O,N> graph_type;
00254     };
00255 
00257     template <class S, class O, class N>
00258     struct matrix {
00259       typedef DefaultCrsMatrix<S,O,N> matrix_type;
00260     };
00261 
00274     template <class S2>
00275     struct bind_scalar {
00276       typedef DefaultHostSparseOps<S2,Ordinal,Node> other_type;
00277     };
00278 
00280 
00281 
00282 
00284     DefaultHostSparseOps(const RCP<Node> &node);
00285 
00287     ~DefaultHostSparseOps();
00288 
00290 
00291 
00292 
00294     RCP<Node> getNode() const;
00295 
00297 
00299 
00300 
00302     static ArrayRCP<size_t> allocRowPtrs(const RCP<Node> &node, const ArrayView<const size_t> &numEntriesPerRow);
00303 
00305     template <class T>
00306     static ArrayRCP<T> allocStorage(const RCP<Node> &node, const ArrayView<const size_t> &rowPtrs);
00307 
00309     static void finalizeGraph(Teuchos::EUplo uplo, Teuchos::EDiag diag, DefaultCrsGraph<Ordinal,Node> &graph, const RCP<ParameterList> &params);
00310 
00312     static void finalizeMatrix(const DefaultCrsGraph<Ordinal,Node> &graph, DefaultCrsMatrix<Scalar,Ordinal,Node> &matrix, const RCP<ParameterList> &params);
00313 
00315     static void finalizeGraphAndMatrix(Teuchos::EUplo uplo, Teuchos::EDiag diag, DefaultCrsGraph<Ordinal,Node> &graph, DefaultCrsMatrix<Scalar,Ordinal,Node> &matrix, const RCP<ParameterList> &params);
00316 
00325     void setGraphAndMatrix(const RCP<const DefaultCrsGraph<Ordinal,Node> > &graph,
00326                            const RCP<const DefaultCrsMatrix<Scalar,Ordinal,Node> > &matrix);
00327 
00329 
00330 
00331 
00357     template <class DomainScalar, class RangeScalar>
00358     void
00359     multiply (Teuchos::ETransp trans,
00360               RangeScalar alpha,
00361               const MultiVector<DomainScalar,Node> &X,
00362               MultiVector<RangeScalar,Node> &Y) const;
00363 
00393     template <class DomainScalar, class RangeScalar>
00394     void
00395     multiply (Teuchos::ETransp trans,
00396               RangeScalar alpha,
00397               const MultiVector<DomainScalar,Node> &X,
00398               RangeScalar beta,
00399               MultiVector<RangeScalar,Node> &Y) const;
00400 
00421     template <class DomainScalar, class RangeScalar>
00422     void
00423     solve (Teuchos::ETransp trans,
00424            const MultiVector<DomainScalar,Node> &Y,
00425            MultiVector<RangeScalar,Node> &X) const;
00426 
00428 
00429   protected:
00431     DefaultHostSparseOps(const DefaultHostSparseOps& source);
00432 
00434     RCP<Node> node_;
00435 
00436     // we do this one of two ways:
00437     // packed CRS: array of row pointers, array of indices, array of values.
00438     ArrayRCP<const Ordinal> inds_;
00439     ArrayRCP<const size_t>  ptrs_;
00440     ArrayRCP<const Scalar>  vals_;
00441 
00442     Teuchos::EUplo  tri_uplo_;
00443     Teuchos::EDiag unit_diag_;
00444 
00445     size_t numRows_;
00446     bool isInitialized_;
00447     bool isEmpty_;
00448   };
00449 
00450 
00451   template <class Scalar, class Ordinal, class Node>
00452   void DefaultHostSparseOps<Scalar,Ordinal,Node>::finalizeGraph(Teuchos::EUplo uplo, Teuchos::EDiag diag, DefaultCrsGraph<Ordinal,Node> &graph, const RCP<ParameterList> &params)
00453   {
00454     // nothing much to do here
00455     graph.setMatDesc(uplo,diag);
00456     std::string FuncName("Kokkos::DefaultHostSparseOps::finalizeGraph(graph,params)");
00457     TEUCHOS_TEST_FOR_EXCEPTION(
00458         graph.isInitialized() == false,
00459         std::runtime_error, FuncName << ": graph has not yet been initialized."
00460     )
00461   }
00462 
00463   template <class Scalar, class Ordinal, class Node>
00464   void DefaultHostSparseOps<Scalar,Ordinal,Node>::finalizeMatrix(const DefaultCrsGraph<Ordinal,Node> &graph, DefaultCrsMatrix<Scalar,Ordinal,Node> &matrix, const RCP<ParameterList> &params)
00465   {
00466     // nothing much to do here
00467     std::string FuncName("Kokkos::DefaultHostSparseOps::finalizeMatrix(graph,matrix,params)");
00468     TEUCHOS_TEST_FOR_EXCEPTION(
00469         matrix.isInitialized() == false,
00470         std::runtime_error, FuncName << ": matrix has not yet been initialized."
00471     )
00472   }
00473 
00474   template <class Scalar, class Ordinal, class Node>
00475   void DefaultHostSparseOps<Scalar,Ordinal,Node>::finalizeGraphAndMatrix(Teuchos::EUplo uplo, Teuchos::EDiag diag, DefaultCrsGraph<Ordinal,Node> &graph, DefaultCrsMatrix<Scalar,Ordinal,Node> &matrix, const RCP<ParameterList> &params)
00476   {
00477     // nothing much to do here
00478     graph.setMatDesc(uplo,diag);
00479     std::string FuncName("Kokkos::DefaultHostSparseOps::finalizeGraphAndMatrix(graph,matrix,params)");
00480     TEUCHOS_TEST_FOR_EXCEPTION(
00481         graph.isInitialized() == false,
00482         std::runtime_error, FuncName << ": graph has not yet been initialized."
00483     )
00484     TEUCHOS_TEST_FOR_EXCEPTION(
00485         matrix.isInitialized() == false,
00486         std::runtime_error, FuncName << ": matrix has not yet been initialized."
00487     )
00488   }
00489 
00490 
00491   template<class Scalar, class Ordinal, class Node>
00492   DefaultHostSparseOps<Scalar,Ordinal,Node>::DefaultHostSparseOps(const RCP<Node> &node)
00493   : node_(node)
00494   , numRows_(0)
00495   , isInitialized_(false)
00496   , isEmpty_(false)
00497   {
00498     // Make sure that users only specialize DefaultHostSparseOps for
00499     // Kokkos Node types that are host Nodes (vs. device Nodes, such
00500     // as GPU Nodes).
00501     Teuchos::CompileTimeAssert<Node::isHostNode == false> cta; (void)cta;
00502   }
00503 
00504   template<class Scalar, class Ordinal, class Node>
00505   DefaultHostSparseOps<Scalar,Ordinal,Node>::~DefaultHostSparseOps() {
00506   }
00507 
00508   template <class Scalar, class Ordinal, class Node>
00509   RCP<Node> DefaultHostSparseOps<Scalar,Ordinal,Node>::getNode() const {
00510     return node_;
00511   }
00512 
00513   template <class Scalar, class Ordinal, class Node>
00514   void DefaultHostSparseOps<Scalar,Ordinal,Node>::setGraphAndMatrix(
00515               const RCP<const DefaultCrsGraph<Ordinal,Node> > &opgraph,
00516               const RCP<const DefaultCrsMatrix<Scalar,Ordinal,Node> > &opmatrix)
00517   {
00518     std::string tfecfFuncName("setGraphAndMatrix(uplo,diag,graph,matrix)");
00519     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00520         isInitialized_ == true,
00521         std::runtime_error, " operators already initialized.");
00522     numRows_ = opgraph->getNumRows();
00523     if (opgraph->isEmpty() || numRows_ == 0) {
00524       isEmpty_ = true;
00525     }
00526     else {
00527       isEmpty_ = false;
00528       ptrs_ = opgraph->getPointers();
00529       inds_ = opgraph->getIndices();
00530       vals_ = opmatrix->getValues();
00531       // these checks just about the most that we can perform
00532       TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00533           (size_t)ptrs_.size() != numRows_+1
00534           || inds_.size() != vals_.size(),
00535           std::runtime_error, " matrix and graph seem incongruent.");
00536     }
00537     opgraph->getMatDesc( tri_uplo_, unit_diag_ );
00538     isInitialized_ = true;
00539   }
00540 
00541   template <class Scalar, class Ordinal, class Node>
00542   template <class DomainScalar, class RangeScalar>
00543   void DefaultHostSparseOps<Scalar,Ordinal,Node>::solve(Teuchos::ETransp trans,
00544                                                         const MultiVector<DomainScalar,Node> &Y,
00545                                                               MultiVector< RangeScalar,Node> &X) const
00546   {
00547     std::string tfecfFuncName("solve(trans,Y,X)");
00548     typedef DefaultSparseSolveOp<Scalar,Ordinal,DomainScalar,RangeScalar>            Op;
00549     typedef DefaultSparseTransposeSolveOp<Scalar,Ordinal,DomainScalar,RangeScalar>  TOp;
00550     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00551         isInitialized_ == false,
00552         std::runtime_error, " this solve was not fully initialized."
00553     );
00554     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00555         X.getNumCols() != Y.getNumCols(),
00556         std::runtime_error, " Left hand side and right hand side multivectors have differing numbers of vectors."
00557     );
00558     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00559         X.getNumRows() < numRows_,
00560         std::runtime_error, " Left-hand-side multivector does not have enough rows. "
00561                             "Likely cause is that the column map was not provided to "
00562                             "the Tpetra::CrsMatrix in the case of an implicit unit diagonal."
00563     );
00564 
00565     ReadyBufferHelper<Node> rbh(node_);
00566     if (numRows_ == 0) {
00567       // null op
00568     }
00569     else if (isEmpty_) {
00570       TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(unit_diag_ != Teuchos::UNIT_DIAG, std::runtime_error,
00571           " solve of empty matrix only valid for an implicit unit diagonal.");
00572       // solve I * X = Y for X = Y
00573       DefaultArithmetic<MultiVector<RangeScalar,Node> >::Assign(X,Y);
00574     }
00575     else {
00576       if (trans == Teuchos::NO_TRANS) {
00577         Op wdp;
00578         rbh.begin();
00579         wdp.x       = rbh.template addNonConstBuffer<DomainScalar>(X.getValuesNonConst());
00580         wdp.y       = rbh.template addConstBuffer<RangeScalar>(Y.getValues());
00581         wdp.ptrs    = rbh.template addConstBuffer<     size_t>(ptrs_);
00582         wdp.inds    = rbh.template addConstBuffer<    Ordinal>(inds_);
00583         wdp.vals    = rbh.template addConstBuffer<     Scalar>(vals_);
00584         rbh.end();
00585         wdp.numRows = numRows_;
00586         wdp.unitDiag = (unit_diag_ == Teuchos::UNIT_DIAG ? true : false);
00587         wdp.upper    = ( tri_uplo_ == Teuchos::UPPER_TRI ? true : false);
00588         wdp.xstride = X.getStride();
00589         wdp.ystride = Y.getStride();
00590         wdp.numRHS  = X.getNumCols();
00591         // no parallel for you
00592         wdp.execute();
00593       }
00594       else {
00595         TOp wdp;
00596         rbh.begin();
00597         wdp.x       = rbh.template addNonConstBuffer<DomainScalar>(X.getValuesNonConst());
00598         wdp.y       = rbh.template addConstBuffer<RangeScalar>(Y.getValues());
00599         wdp.ptrs    = rbh.template addConstBuffer<     size_t>(ptrs_);
00600         wdp.inds    = rbh.template addConstBuffer<    Ordinal>(inds_);
00601         wdp.vals    = rbh.template addConstBuffer<     Scalar>(vals_);
00602         rbh.end();
00603         wdp.numRows = numRows_;
00604         wdp.unitDiag = (unit_diag_ == Teuchos::UNIT_DIAG ? true : false);
00605         wdp.upper    = ( tri_uplo_ == Teuchos::UPPER_TRI ? true : false);
00606         wdp.xstride = X.getStride();
00607         wdp.ystride = Y.getStride();
00608         wdp.numRHS  = X.getNumCols();
00609         // no parallel for you
00610         wdp.execute();
00611       }
00612     }
00613     return;
00614   }
00615 
00616 
00617   template <class Scalar, class Ordinal, class Node>
00618   template <class DomainScalar, class RangeScalar>
00619   void DefaultHostSparseOps<Scalar,Ordinal,Node>::multiply(
00620                                 Teuchos::ETransp trans,
00621                                 RangeScalar alpha,
00622                                 const MultiVector<DomainScalar,Node> &X,
00623                                       MultiVector<RangeScalar ,Node> &Y) const
00624   {
00625     std::string tfecfFuncName("multiply(trans,alpha,X,Y)");
00626     // the 1 template parameter below means that beta is not used in computations
00627     // and the output multivector enjoys overwrite semantics (i.e., will overwrite data/NaNs in Y)
00628     typedef DefaultSparseMultiplyOp<         Scalar,Ordinal,DomainScalar,RangeScalar, 1>  Op;
00629     typedef DefaultSparseTransposeMultiplyOp<Scalar,Ordinal,DomainScalar,RangeScalar, 1> TOp;
00630     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00631         isInitialized_ == false,
00632         std::runtime_error, " sparse ops not initialized.");
00633     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00634         X.getNumCols() != Y.getNumCols(),
00635         std::runtime_error, " X and Y do not have the same number of columns.");
00636     ReadyBufferHelper<Node> rbh(node_);
00637     if (isEmpty_ == true) {
00638       // Y <= 0 * X
00639       //   <= 0
00640       DefaultArithmetic<MultiVector<RangeScalar,Node> >::Init(Y,Teuchos::ScalarTraits<RangeScalar>::zero());
00641     }
00642     else {
00643       if (trans == Teuchos::NO_TRANS) {
00644         Op wdp;
00645         rbh.begin();
00646         wdp.alpha   = alpha;
00647         wdp.beta    = Teuchos::ScalarTraits<RangeScalar>::zero(); // not used
00648         wdp.numRows = numRows_;
00649         wdp.y       = rbh.template addNonConstBuffer<RangeScalar>(Y.getValuesNonConst());
00650         wdp.x       = rbh.template addConstBuffer<DomainScalar>(X.getValues());
00651         wdp.ptrs    = rbh.template addConstBuffer<      size_t>(ptrs_);
00652         wdp.inds    = rbh.template addConstBuffer<     Ordinal>(inds_);
00653         wdp.vals    = rbh.template addConstBuffer<      Scalar>(vals_);
00654         wdp.xstride = X.getStride();
00655         wdp.ystride = Y.getStride();
00656         wdp.numRHS  = X.getNumCols();
00657         rbh.end();
00658         node_->template parallel_for<Op>(0,numRows_,wdp);
00659       }
00660       else {
00661         TOp wdp;
00662         rbh.begin();
00663         wdp.alpha   = alpha;
00664         wdp.beta    = Teuchos::ScalarTraits<RangeScalar>::zero(); // not used
00665         wdp.numRows = numRows_;
00666         wdp.numCols = Y.getNumRows();
00667         wdp.y       = rbh.template addNonConstBuffer<RangeScalar>(Y.getValuesNonConst());
00668         wdp.x       = rbh.template addConstBuffer<DomainScalar>(X.getValues());
00669         wdp.ptrs    = rbh.template addConstBuffer<      size_t>(ptrs_);
00670         wdp.inds    = rbh.template addConstBuffer<     Ordinal>(inds_);
00671         wdp.vals    = rbh.template addConstBuffer<      Scalar>(vals_);
00672         wdp.xstride = X.getStride();
00673         wdp.ystride = Y.getStride();
00674         wdp.numRHS  = X.getNumCols();
00675         rbh.end();
00676         // no parallel for you
00677         wdp.execute();
00678       }
00679     }
00680     return;
00681   }
00682 
00683 
00684   template <class Scalar, class Ordinal, class Node>
00685   template <class DomainScalar, class RangeScalar>
00686   void DefaultHostSparseOps<Scalar,Ordinal,Node>::multiply(
00687                                 Teuchos::ETransp trans,
00688                                 RangeScalar alpha, const MultiVector<DomainScalar,Node> &X,
00689                                 RangeScalar beta, MultiVector<RangeScalar,Node> &Y) const
00690   {
00691     std::string tfecfFuncName("multiply(trans,alpha,X,beta,Y)");
00692     // the 0 template parameter below means that beta is used in computations
00693     // and the output multivector enjoys accumulation semantics (i.e., will not overwrite data/NaNs in Y)
00694     typedef DefaultSparseMultiplyOp<         Scalar,Ordinal,DomainScalar,RangeScalar, 0>  Op;
00695     typedef DefaultSparseTransposeMultiplyOp<Scalar,Ordinal,DomainScalar,RangeScalar, 0> TOp;
00696     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00697         isInitialized_ == false,
00698         std::runtime_error, " sparse ops not initialized.");
00699     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00700         X.getNumCols() != Y.getNumCols(),
00701         std::runtime_error, " X and Y do not have the same number of columns.");
00702     ReadyBufferHelper<Node> rbh(node_);
00703     if (isEmpty_ == true) {
00704       // Y <= alpha * 0 * X + beta * Y
00705       //   <= beta * Y
00706       // NOTE: this neglects NaNs in X, which don't satisfy 0*NaN == 0
00707       //       however, X and Y may be of different size, and we need entries to determine how to mix those potential NaNs in X into Y
00708       //       therefore, the best we can do is scale Y to zero. Setting Y to zero would destroy NaNs in Y, which violates the semantics of the call.
00709       DefaultArithmetic<MultiVector<RangeScalar,Node> >::Scale(Y,beta);
00710     }
00711     else {
00712       if (trans == Teuchos::NO_TRANS) {
00713         Op wdp;
00714         rbh.begin();
00715         wdp.alpha   = alpha;
00716         wdp.beta    = beta;
00717         wdp.numRows = numRows_;
00718         wdp.y       = rbh.template addNonConstBuffer<RangeScalar>(Y.getValuesNonConst());
00719         wdp.x       = rbh.template addConstBuffer<DomainScalar>(X.getValues());
00720         wdp.ptrs    = rbh.template addConstBuffer<      size_t>(ptrs_);
00721         wdp.inds    = rbh.template addConstBuffer<     Ordinal>(inds_);
00722         wdp.vals    = rbh.template addConstBuffer<      Scalar>(vals_);
00723         wdp.xstride = X.getStride();
00724         wdp.ystride = Y.getStride();
00725         wdp.numRHS  = X.getNumCols();
00726         rbh.end();
00727         node_->template parallel_for<Op>(0,numRows_,wdp);
00728       }
00729       else {
00730         TOp wdp;
00731         rbh.begin();
00732         wdp.alpha   = alpha;
00733         wdp.beta    = beta;
00734         wdp.numRows = numRows_;
00735         wdp.numCols = Y.getNumRows();
00736         wdp.y       = rbh.template addNonConstBuffer<RangeScalar>(Y.getValuesNonConst());
00737         wdp.x       = rbh.template addConstBuffer<DomainScalar>(X.getValues());
00738         wdp.ptrs    = rbh.template addConstBuffer<      size_t>(ptrs_);
00739         wdp.inds    = rbh.template addConstBuffer<     Ordinal>(inds_);
00740         wdp.vals    = rbh.template addConstBuffer<      Scalar>(vals_);
00741         wdp.xstride = X.getStride();
00742         wdp.ystride = Y.getStride();
00743         wdp.numRHS  = X.getNumCols();
00744         rbh.end();
00745         // no parallel for you
00746         wdp.execute();
00747       }
00748     }
00749     return;
00750   }
00751 
00752 
00753   // ======= pointer allocation ===========
00754   template <class Scalar, class Ordinal, class Node>
00755   ArrayRCP<size_t>
00756   DefaultHostSparseOps<Scalar,Ordinal,Node>::allocRowPtrs(const RCP<Node> &/*node*/, const ArrayView<const size_t> &numEntriesPerRow)
00757   {
00758     ArrayRCP<size_t> ptrs = arcp<size_t>( numEntriesPerRow.size() + 1 );
00759     ptrs[0] = 0;
00760     std::partial_sum( numEntriesPerRow.getRawPtr(), numEntriesPerRow.getRawPtr()+numEntriesPerRow.size(), ptrs.begin()+1 );
00761     return ptrs;
00762   }
00763 
00764   // ======= other allocation ===========
00765   template <class Scalar, class Ordinal, class Node>
00766   template <class T>
00767   ArrayRCP<T>
00768   DefaultHostSparseOps<Scalar,Ordinal,Node>::allocStorage(const RCP<Node> &/*node*/, const ArrayView<const size_t> &rowPtrs)
00769   {
00770     const size_t totalNumEntries = *(rowPtrs.end()-1);
00771     // alloc data
00772     ArrayRCP<T> vals;
00773     if (totalNumEntries > 0) vals = arcp<T>(totalNumEntries);
00774     std::fill( vals.begin(), vals.end(), Teuchos::ScalarTraits<T>::zero() );
00775     return vals;
00776   }
00777 
00778 
00783 } // namespace Kokkos
00784 
00785 #endif /* KOKKOS_DEFAULTSPARSEOPS_HPP */
00786 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends