Kokkos Node API and Local Linear Algebra Kernels Version of the Day
Kokkos_CuspOps.hpp
00001 //@HEADER
00002 // ************************************************************************
00003 //
00004 //          Kokkos: Node API and Parallel Node Kernels
00005 //              Copyright (2008) Sandia Corporation
00006 //
00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00008 // the U.S. Government retains certain rights in this software.
00009 //
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00038 //
00039 // ************************************************************************
00040 //@HEADER
00041 
00042 #ifndef KOKKOS_CUSPOPS_HPP
00043 #define KOKKOS_CUSPOPS_HPP
00044 
00045 #include <Teuchos_DataAccess.hpp>
00046 #include <Teuchos_CompileTimeAssert.hpp>
00047 #include <Teuchos_TypeTraits.hpp>
00048 #include <Teuchos_BLAS_types.hpp>
00049 
00050 #include <Kokkos_ConfigDefs.hpp>
00051 #include <Kokkos_CUDANodeUtils.hpp>
00052 
00053 #include "Kokkos_CrsMatrixBase.hpp"
00054 #include "Kokkos_CrsGraphBase.hpp"
00055 #include "Kokkos_MultiVector.hpp"
00056 #include "Kokkos_NodeHelpers.hpp"
00057 #include "Kokkos_CuspWrappers.hpp"
00058 
00059 namespace Kokkos {
00060 
00061 
00063 
00065   template <class Ordinal, 
00066             class Node>
00067   class CuspCrsGraph : public CrsGraphBase<Ordinal,Node>
00068   {
00069     public:
00070       CuspCrsGraph(Ordinal numRows, Ordinal numCols, const RCP<Node> &node, const RCP<ParameterList> &params);
00071       bool isEmpty() const;
00072       void setStructure(const ArrayRCP<const size_t>  &ptrs,
00073                         const ArrayRCP<const Ordinal> &inds);
00074       void setDeviceData(const ArrayRCP<const Ordinal> &devptrs, const ArrayRCP<const Ordinal> &devinds);
00075       inline ArrayRCP<const size_t>  getPointers() const;
00076       inline ArrayRCP<const Ordinal> getIndices() const;
00077       inline ArrayRCP<const Ordinal> getDevPointers() const;
00078       inline ArrayRCP<const Ordinal> getDevIndices() const;
00079       inline bool isInitialized() const;
00080       void setMatDesc(Teuchos::EUplo  uplo, Teuchos::EDiag  diag);
00081       void getMatDesc(Teuchos::EUplo &uplo, Teuchos::EDiag &diag) const;
00082     private:
00083       bool isInitialized_;
00084       bool isEmpty_;
00085       Teuchos::EUplo uplo_;
00086       Teuchos::EDiag diag_;
00087       // graph data
00088       ArrayRCP<const size_t> host_rowptrs_;
00089       ArrayRCP<const Ordinal> host_colinds_, dev_colinds_;
00090       ArrayRCP<const Ordinal> dev_rowptrs_;
00091   };
00092 
00094 
00096   template <class Scalar,
00097             class Ordinal,
00098             class Node>
00099   class CuspCrsMatrix : public CrsMatrixBase<Scalar,Ordinal,Node>
00100   {
00101     public:
00102       CuspCrsMatrix(const RCP<const CuspCrsGraph<Ordinal,Node> > &graph, const RCP<ParameterList> &params);
00103       void setValues(const ArrayRCP<const Scalar> &vals);
00104       void setDeviceData(const ArrayRCP<const Scalar> &devvals);
00105       void setDeviceDataTrans(const ArrayRCP<const Ordinal> &devptrs, const ArrayRCP<const Ordinal> &devinds, const ArrayRCP<const Scalar> &devvals);
00106       inline ArrayRCP<const Scalar> getValues() const;
00107       inline ArrayRCP<const Scalar> getDevValues() const;
00108       inline void getDeviceDataTrans(ArrayRCP<const Ordinal> &, ArrayRCP<const Ordinal> &, ArrayRCP<const Scalar> &) const;
00109       inline bool isInitialized() const;
00110     private:
00111       bool isInitialized_;
00112       // matrix data
00113       ArrayRCP<const Scalar> vals_, dev_vals_, dev_vals_t_;
00114       ArrayRCP<const Ordinal> dev_rowptrs_t_, dev_colinds_t_;
00115   };
00116 
00117   template <class Ordinal, class Node>
00118   CuspCrsGraph<Ordinal,Node>::CuspCrsGraph(Ordinal numRows, Ordinal numCols, const RCP<Node> &node, const RCP<ParameterList> &params)
00119   : CrsGraphBase<Ordinal,Node>(numRows,numCols,node,params)
00120   , isInitialized_(false)
00121   , isEmpty_(false)
00122   {
00123     // Make sure that users only specialize for Kokkos Node types that are CUDA Nodes
00124     Teuchos::CompileTimeAssert<Node::isCUDANode == false> cta; (void)cta;
00125   }
00126 
00127   template <class Ordinal, class Node>
00128   bool CuspCrsGraph<Ordinal,Node>::isEmpty() const
00129   { return isEmpty_; }
00130 
00131   template <class Ordinal, class Node>
00132   void CuspCrsGraph<Ordinal,Node>::setStructure(const ArrayRCP<const size_t> &ptrs,
00133                                                 const ArrayRCP<const Ordinal> &inds)
00134   {
00135     std::string tfecfFuncName("setStructure(ptrs,inds)");
00136     const Ordinal numrows = this->getNumRows();
00137     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00138         (size_t)ptrs.size() != (size_t)numrows+1
00139         || ptrs[0] != 0
00140         || (size_t)inds.size() != (size_t)ptrs[numrows],
00141         std::runtime_error, " graph data not coherent."
00142     )
00143     const Ordinal numEntries = ptrs[numrows];
00144     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00145         isInitialized_ == true,
00146         std::runtime_error, " matrix has already been initialized"
00147     )
00148     if (numrows == 0 || numEntries == 0) isEmpty_ = true;
00149     host_rowptrs_ = ptrs;
00150     host_colinds_ = inds;
00151     isInitialized_ = true;
00152   }
00153 
00154   template <class Ordinal, class Node>
00155   ArrayRCP<const size_t> CuspCrsGraph<Ordinal,Node>::getPointers() const
00156   { return host_rowptrs_; }
00157 
00158   template <class Ordinal, class Node>
00159   ArrayRCP<const Ordinal> CuspCrsGraph<Ordinal,Node>::getIndices() const
00160   { return host_colinds_; }
00161 
00162   template <class Ordinal, class Node>
00163   ArrayRCP<const Ordinal> CuspCrsGraph<Ordinal,Node>::getDevPointers() const
00164   { return dev_rowptrs_; }
00165 
00166   template <class Ordinal, class Node>
00167   ArrayRCP<const Ordinal> CuspCrsGraph<Ordinal,Node>::getDevIndices() const
00168   { return dev_colinds_; }
00169 
00170   template <class Ordinal, class Node>
00171   void CuspCrsGraph<Ordinal,Node>::setDeviceData(const ArrayRCP<const Ordinal> &devptrs,
00172                                                  const ArrayRCP<const Ordinal> &devinds)
00173   { dev_rowptrs_ = devptrs; dev_colinds_ = devinds; }
00174 
00175   template <class Ordinal, class Node>
00176   bool CuspCrsGraph<Ordinal,Node>::isInitialized() const
00177   { return isInitialized_; }
00178 
00179   template <class Ordinal, class Node>
00180   void CuspCrsGraph<Ordinal,Node>::setMatDesc(Teuchos::EUplo uplo, Teuchos::EDiag diag)
00181   {
00182     uplo_ = uplo;
00183     diag_ = diag;
00184   }
00185 
00186   template <class Ordinal, class Node>
00187   void CuspCrsGraph<Ordinal,Node>::getMatDesc(Teuchos::EUplo &uplo, Teuchos::EDiag &diag) const
00188   {
00189     uplo = uplo_;
00190     diag = diag_;
00191   }
00192 
00193   template <class Scalar, class Ordinal, class Node>
00194   CuspCrsMatrix<Scalar,Ordinal,Node>::CuspCrsMatrix(const RCP<const CuspCrsGraph<Ordinal,Node> > &graph, const RCP<ParameterList> &params)
00195   : CrsMatrixBase<Scalar,Ordinal,Node>(graph,params)
00196   , isInitialized_(false)
00197   {
00198     // Make sure that users only specialize for Kokkos Node types that are CUDA Nodes
00199     Teuchos::CompileTimeAssert<Node::isCUDANode == false> cta; (void)cta;
00200   }
00201 
00202   template <class Scalar, class Ordinal, class Node>
00203   void CuspCrsMatrix<Scalar,Ordinal,Node>::setValues(const ArrayRCP<const Scalar> &vals)
00204   {
00205     std::string tfecfFuncName("setValues(vals)");
00206     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00207         isInitialized_ == true,
00208         std::runtime_error, " matrix is already initialized."
00209     )
00210     vals_ = vals;
00211     isInitialized_ = true;
00212   }
00213 
00214   template <class Scalar, class Ordinal, class Node>
00215   ArrayRCP<const Scalar> CuspCrsMatrix<Scalar,Ordinal,Node>::getValues() const
00216   { return vals_; }
00217 
00218   template <class Scalar, class Ordinal, class Node>
00219   bool CuspCrsMatrix<Scalar,Ordinal,Node>::isInitialized() const
00220   { return isInitialized_; }
00221 
00222   template <class Scalar, class Ordinal, class Node>
00223   ArrayRCP<const Scalar> CuspCrsMatrix<Scalar,Ordinal,Node>::getDevValues() const
00224   { return dev_vals_; }
00225 
00226   template <class Scalar, class Ordinal, class Node>
00227   void CuspCrsMatrix<Scalar,Ordinal,Node>::setDeviceData(const ArrayRCP<const Scalar> &devvals)
00228   { dev_vals_ = devvals; }
00229 
00230   template <class Scalar, class Ordinal, class Node>
00231   inline void CuspCrsMatrix<Scalar,Ordinal,Node>::getDeviceDataTrans(ArrayRCP<const Ordinal> &tptrs, 
00232                                                                      ArrayRCP<const Ordinal> &tinds, 
00233                                                                      ArrayRCP<const Scalar> &tvals) const
00234   {
00235     tptrs = dev_rowptrs_t_;  
00236     tinds = dev_colinds_t_;  
00237     tvals = dev_vals_t_;  
00238   }
00239 
00240   template <class Scalar, class Ordinal, class Node>
00241   void CuspCrsMatrix<Scalar,Ordinal,Node>::setDeviceDataTrans(const ArrayRCP<const Ordinal> &devptrs, 
00242                                                               const ArrayRCP<const Ordinal> &devinds, 
00243                                                               const ArrayRCP<const Scalar> &devvals) { 
00244     dev_rowptrs_t_ = devptrs; 
00245     dev_colinds_t_ = devinds; 
00246     dev_vals_t_    = devvals; 
00247   }
00248 
00256   template <class Scalar, class Ordinal, class Node>
00257   class CuspOps {
00258   public:
00260 
00261 
00263     typedef Scalar  scalar_type;
00265     typedef Ordinal ordinal_type;
00267     typedef Node    node_type;
00269     typedef CuspOps<Scalar,Ordinal,Node> sparse_ops_type;
00270 
00272     template <class O, class N>
00273     struct graph {
00274       typedef CuspCrsGraph<O,N> graph_type;
00275     };
00276 
00278     template <class S, class O, class N>
00279     struct matrix {
00280       typedef CuspCrsMatrix<S,O,N> matrix_type;
00281     };
00282 
00295     template <class S2>
00296     struct bind_scalar {
00297       typedef CuspOps<S2,Ordinal,Node> other_type;
00298     };
00299 
00301 
00302 
00303 
00305     CuspOps(const RCP<Node> &node);
00306 
00308     ~CuspOps();
00309 
00311 
00312 
00313 
00315     RCP<Node> getNode() const;
00316 
00318 
00320 
00321 
00323     static ArrayRCP<size_t> allocRowPtrs(const RCP<Node> &node, const ArrayView<const size_t> &rowPtrs);
00324 
00326     template <class T>
00327     static ArrayRCP<T> allocStorage(const RCP<Node> &node, const ArrayView<const size_t> &ptrs);
00328 
00330     static void finalizeGraph(Teuchos::EUplo uplo, Teuchos::EDiag diag, CuspCrsGraph<Ordinal,Node> &graph, const RCP<ParameterList> &params);
00331 
00333     static void finalizeMatrix(const CuspCrsGraph<Ordinal,Node> &graph, CuspCrsMatrix<Scalar,Ordinal,Node> &matrix, const RCP<ParameterList> &params);
00334 
00336     static void finalizeGraphAndMatrix(Teuchos::EUplo uplo, Teuchos::EDiag diag, CuspCrsGraph<Ordinal,Node> &graph, CuspCrsMatrix<Scalar,Ordinal,Node> &matrix, const RCP<ParameterList> &params);
00337 
00339     void setGraphAndMatrix(const RCP<const CuspCrsGraph<Ordinal,Node> > &graph,
00340                            const RCP<const CuspCrsMatrix<Scalar,Ordinal,Node> > &matrix);
00341 
00343 
00344 
00345 
00370     template <class DomainScalar, class RangeScalar>
00371     void
00372     multiply (Teuchos::ETransp trans,
00373               RangeScalar alpha,
00374               const MultiVector<DomainScalar,Node> &X,
00375               MultiVector<RangeScalar,Node> &Y) const;
00376 
00401     template <class DomainScalar, class RangeScalar>
00402     void
00403     multiply (Teuchos::ETransp trans,
00404               RangeScalar alpha,
00405               const MultiVector<DomainScalar,Node> &X,
00406               RangeScalar beta,
00407               MultiVector<RangeScalar,Node> &Y) const;
00408 
00435     template <class DomainScalar, class RangeScalar>
00436     void
00437     solve (Teuchos::ETransp trans,
00438            const MultiVector<DomainScalar,Node> &Y,
00439            MultiVector<RangeScalar,Node> &X) const {
00440       TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, "Kokkos::CuspOps does not support solve() because Cusp library does not provide sparse triangular solve.");
00441     }
00442 
00444 
00445   private:
00447     CuspOps(const CuspOps& source);
00448 
00450     RCP<Node> node_;
00451 
00452     Ordinal numRows_, numCols_, numNZ_;
00453     bool isInitialized_;
00454 
00455     // CSR data for A
00456     ArrayRCP<const Ordinal> rowPtrs_, colInds_;
00457     ArrayRCP<const Scalar>  rowVals_;
00458     // CSR data for transpose(A)
00459     ArrayRCP<const Ordinal> rowPtrs_t_, colInds_t_;
00460     ArrayRCP<const Scalar>  rowVals_t_;
00461   };
00462 
00463 
00464   // ======= matrix finalization ===========
00465   template <class Scalar, class Ordinal, class Node>
00466   void CuspOps<Scalar,Ordinal,Node>::finalizeGraph(Teuchos::EUplo uplo, Teuchos::EDiag diag,
00467                                                    CuspCrsGraph<Ordinal,Node> &graph, const RCP<ParameterList> &params)
00468   {
00469     const Ordinal MAX_NNZ = Teuchos::OrdinalTraits<Ordinal>::max();
00470     const std::string prefix("finalizeGraph()");
00471     RCP<Node> node = graph.getNode();
00472     TEUCHOS_TEST_FOR_EXCEPTION(
00473         graph.isInitialized() == false,
00474         std::runtime_error, prefix << ": graph has not yet been initialized."
00475     )
00476     // diag: have to allocate and indicate
00477     ArrayRCP<Ordinal>       devinds, devptrs;
00478     ArrayRCP<const Ordinal> hostinds = graph.getIndices();
00479     ArrayRCP<const size_t> hostptrs = graph.getPointers();
00480     const Ordinal numRows = graph.getNumRows();
00481     // set description
00482     graph.setMatDesc(uplo,diag);
00483     const size_t numnz = hostinds.size();
00484     TEUCHOS_TEST_FOR_EXCEPTION( 
00485         numnz > (size_t)MAX_NNZ, std::runtime_error, 
00486         "Kokkos::CuspOps: Selected ordinal does not support more than " << MAX_NNZ << " non-zeros." 
00487         );
00488     devptrs = node->template allocBuffer<Ordinal>( numRows+1 );
00489     ArrayRCP<Ordinal> h_devptrs = node->viewBufferNonConst(WriteOnly, numRows+1, devptrs);
00490     std::copy( hostptrs.begin(), hostptrs.end(), h_devptrs.begin() );
00491     h_devptrs = null;
00492     if (numnz) {
00493       devinds = node->template allocBuffer<Ordinal>( numnz );
00494       node->copyToBuffer( numnz, hostinds(), devinds );
00495     }
00496     // set the data
00497     graph.setDeviceData(devptrs,devinds);
00498   }
00499 
00500   // ======= matrix finalization ===========
00501   template <class Scalar, class Ordinal, class Node>
00502   void CuspOps<Scalar,Ordinal,Node>::
00503   finalizeMatrix (const CuspCrsGraph<Ordinal,Node> &graph,
00504                         CuspCrsMatrix<Scalar,Ordinal,Node> &matrix,
00505                   const RCP<ParameterList> &params)
00506   {
00507     std::string FuncName("Kokkos::CuspOps::finalizeMatrix()");
00508     RCP<Node> node = graph.getNode();
00509     TEUCHOS_TEST_FOR_EXCEPTION(
00510         matrix.isInitialized() == false,
00511         std::runtime_error, FuncName << ": matrix has not yet been initialized."
00512     )
00513     ArrayRCP<Scalar>        devvals;
00514     ArrayRCP<const size_t> hostptrs = graph.getPointers();
00515     ArrayRCP<const Scalar> hostvals = matrix.getValues();
00516     const Ordinal numRows = graph.getNumRows(),
00517                   numCols = graph.getNumCols();
00518     const Ordinal numnz = (Ordinal)hostptrs[numRows];
00519     if (numnz) {
00520       devvals = node->template allocBuffer<Scalar>( numnz );
00521       node->copyToBuffer( numnz,     hostvals(), devvals );
00522     }
00523     matrix.setDeviceData(devvals);
00524     if (params != null) {
00525       // warn? if (params->get("Prepare Solve",false))  {}
00526       // warn? if (params->get("Prepare Transpose Solve",false))  {}
00527       // warn? if (params->get("Prepare Conjugate Transpose Solve",false))  {}
00528 
00529       // explicitly create Transpose if true
00530       if (params->get("Prepare Transpose Multiply",false)) {
00531         ArrayRCP<Scalar>  tvals = node->template allocBuffer<Scalar>( numnz );
00532         ArrayRCP<Ordinal> tptrs = node->template allocBuffer<Ordinal>( numCols+1 ), 
00533                           tinds = node->template allocBuffer<Ordinal>( numnz );
00534         ArrayRCP<const Ordinal> ptrs = graph.getDevPointers();
00535         ArrayRCP<const Ordinal> inds = graph.getDevIndices();
00536         ArrayRCP<const Scalar>  vals = matrix.getDevValues();
00537         Cuspdetails::cuspCrsTranspose<Ordinal,Ordinal,Scalar>(numRows,numCols,numnz,
00538                                       ptrs.getRawPtr(),inds.getRawPtr(),vals.getRawPtr(),
00539                                       tptrs.getRawPtr(),tinds.getRawPtr(),tvals.getRawPtr());
00540         matrix.setDeviceDataTrans(tptrs,tinds,tvals);
00541       } 
00542       // if (params->get("Prepare Multiply",true))  {}            // delete non-transpose if false
00543     }
00544   }
00545 
00546   // ======= graph and matrix finalization ===========
00547   template <class Scalar, class Ordinal, class Node>
00548   void CuspOps<Scalar,Ordinal,Node>::finalizeGraphAndMatrix(Teuchos::EUplo uplo, Teuchos::EDiag diag,
00549                                                             CuspCrsGraph<Ordinal,Node> &graph, 
00550                                                             CuspCrsMatrix<Scalar,Ordinal,Node> &matrix,
00551                                                             const RCP<ParameterList> &params)
00552   {
00553     std::string FuncName("Kokkos::CuspOps::finalizeGraphAndMatrix(graph,matrix,params)");
00554     TEUCHOS_TEST_FOR_EXCEPTION(
00555         graph.isInitialized() == false,
00556         std::runtime_error, FuncName << ": graph has not yet been initialized."
00557     )
00558     TEUCHOS_TEST_FOR_EXCEPTION(
00559         matrix.isInitialized() == false,
00560         std::runtime_error, FuncName << ": matrix has not yet been initialized."
00561     )
00562     // no benefit to doing them together; do them separately
00563     finalizeGraph(uplo,diag,graph,params);
00564     finalizeMatrix(graph,matrix,params);
00565   }
00566 
00567 
00568   // ======= pointer allocation ===========
00569   template <class Scalar, class Ordinal, class Node>
00570   ArrayRCP<size_t>
00571   CuspOps<Scalar,Ordinal,Node>::allocRowPtrs(const RCP<Node> &/*node*/, const ArrayView<const size_t> &numEntriesPerRow)
00572   {
00573     // alloc page-locked ("pinned") memory on the host, specially allocated and specially deallocated
00574     CUDANodeHostPinnedDeallocator<size_t> dealloc;
00575     ArrayRCP<size_t> ptrs = dealloc.alloc(numEntriesPerRow.size() + 1);
00576     ptrs[0] = 0;
00577     std::partial_sum( numEntriesPerRow.getRawPtr(), numEntriesPerRow.getRawPtr()+numEntriesPerRow.size(), ptrs.begin()+1 );
00578     return ptrs;
00579   }
00580 
00581   // ======= other allocation ===========
00582   template <class Scalar, class Ordinal, class Node>
00583   template <class T>
00584   ArrayRCP<T>
00585   CuspOps<Scalar,Ordinal,Node>::allocStorage(const RCP<Node> &/*node*/, const ArrayView<const size_t> &rowPtrs)
00586   {
00587     // alloc page-locked ("pinned") memory on the host, specially allocated and specially deallocated
00588     const Ordinal totalNumEntries = *(rowPtrs.end()-1);
00589     CUDANodeHostPinnedDeallocator<T> dealloc;
00590     ArrayRCP<T> buf = dealloc.alloc(totalNumEntries);
00591     std::fill(buf.begin(), buf.end(), Teuchos::ScalarTraits<T>::zero() );
00592     return buf;
00593   }
00594 
00595   template<class Scalar, class Ordinal, class Node>
00596   CuspOps<Scalar,Ordinal,Node>::CuspOps(const RCP<Node> &node)
00597   : node_(node)
00598   , numRows_(0)
00599   , numCols_(0)
00600   , numNZ_(0)
00601   , isInitialized_(false)
00602   {
00603     // Make sure that users only specialize for Kokkos Node types that are CUDA Nodes
00604     Teuchos::CompileTimeAssert<Node::isCUDANode == false> cta; (void)cta;
00605   }
00606 
00607   template<class Scalar, class Ordinal, class Node>
00608   CuspOps<Scalar,Ordinal,Node>::~CuspOps()
00609   { }
00610 
00611   template <class Scalar, class Ordinal, class Node>
00612   RCP<Node> CuspOps<Scalar,Ordinal,Node>::getNode() const {
00613     return node_;
00614   }
00615 
00616   template <class Scalar, class Ordinal, class Node>
00617   void CuspOps<Scalar,Ordinal,Node>::setGraphAndMatrix(const RCP<const CuspCrsGraph<Ordinal,Node> > &graph_in, 
00618                                                        const RCP<const CuspCrsMatrix<Scalar,Ordinal,Node> > &matrix_in)
00619   {
00620     std::string tfecfFuncName("setGraphAndMatrix(graph_in,matrix_in)");
00621     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00622         isInitialized_ == true,
00623         std::runtime_error, " operators already initialized.");
00624     // get data from the matrix
00625     numRows_ = graph_in->getNumRows();
00626     numCols_ = graph_in->getNumCols();
00627     rowPtrs_ = graph_in->getDevPointers();
00628     colInds_ = graph_in->getDevIndices();
00629     rowVals_ = matrix_in->getDevValues();
00630     matrix_in->getDeviceDataTrans(rowPtrs_t_, colInds_t_, rowVals_t_);
00631     numNZ_ = colInds_.size();
00632     isInitialized_ = true;
00633   }
00634 
00635   template <class Scalar, class Ordinal, class Node>
00636   template <class DomainScalar, class RangeScalar>
00637   void CuspOps<Scalar,Ordinal,Node>::multiply(Teuchos::ETransp trans,
00638                                           RangeScalar alpha,
00639                                           const MultiVector<DomainScalar,Node> &X,
00640                                                 MultiVector< RangeScalar,Node> &Y) const
00641   {
00642     // Cusp doesn't support mixed precision
00643     Teuchos::CompileTimeAssert<Teuchos::TypeTraits::is_same<DomainScalar,Scalar>::value == false ||
00644                                Teuchos::TypeTraits::is_same< RangeScalar,Scalar>::value == false > cta; (void)cta;
00645     //
00646     std::string tfecfFuncName("multiply(trans,alpha,X,Y)");
00647     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00648         isInitialized_ == false,
00649         std::runtime_error, "sparse operators have not been initialized with graph and matrix data; call setGraphAndMatrix() first.")
00650     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00651         trans == Teuchos::CONJ_TRANS,
00652         std::runtime_error, "no support for transpose multiplication in Cusp")
00653     // get pointers,stride from X and Y
00654     Ordinal stride_x = (Ordinal)X.getStride(),
00655             stride_y = (Ordinal)Y.getStride();
00656     const DomainScalar * data_x = X.getValues().getRawPtr();
00657     RangeScalar * data_y = Y.getValuesNonConst().getRawPtr();
00658     const Ordinal numMatRows = numRows_;
00659     const Ordinal numMatCols = numCols_;
00660     const Ordinal opRows     = (trans == Teuchos::NO_TRANS ? numMatRows : numMatCols);
00661     const Ordinal opCols     = (trans == Teuchos::NO_TRANS ? numMatCols : numMatRows);
00662     const Ordinal numRHS     = X.getNumCols();
00663     const Ordinal numNZ      = rowVals_.size();
00664     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00665         X.getNumCols() != Y.getNumCols(),
00666         std::runtime_error, "X and Y do not have the same number of column vectors.")
00667     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00668         (size_t)X.getNumRows() != (size_t)opCols,
00669         std::runtime_error, "Size of X is not congruous with dimensions of operator.")
00670     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00671         (size_t)Y.getNumRows() != (size_t)opRows,
00672         std::runtime_error, "Size of Y is not congruous with dimensions of operator.")
00673     if (trans == Teuchos::NO_TRANS) {
00674       Cuspdetails::cuspCrsMultiply(numMatRows, numMatCols, numNZ, rowPtrs_.getRawPtr(), colInds_.getRawPtr(), rowVals_.getRawPtr(), 
00675                                    numRHS, data_x, stride_x, data_y, stride_y);
00676     }
00677     else {
00678       Cuspdetails::cuspCrsMultiply(numMatCols, numMatRows, numNZ, rowPtrs_t_.getRawPtr(), colInds_t_.getRawPtr(), rowVals_t_.getRawPtr(), 
00679                                    numRHS, data_x, stride_x, data_y, stride_y);
00680     }
00681     if (alpha != Teuchos::ScalarTraits<RangeScalar>::one()) {
00682       DefaultArithmetic< MultiVector< RangeScalar,Node > > ::Scale(Y,alpha);
00683     }
00684   }
00685 
00686 
00687   template <class Scalar, class Ordinal, class Node>
00688   template <class DomainScalar, class RangeScalar>
00689   void CuspOps<Scalar,Ordinal,Node>::multiply(Teuchos::ETransp trans,
00690                                           RangeScalar alpha, const MultiVector<DomainScalar,Node> &X,
00691                                           RangeScalar beta, MultiVector<RangeScalar,Node> &Y) const
00692   {
00693     // Cusp doesn't support mixed precision
00694     Teuchos::CompileTimeAssert<Teuchos::TypeTraits::is_same<DomainScalar,Scalar>::value == false ||
00695                                Teuchos::TypeTraits::is_same< RangeScalar,Scalar>::value == false > cta; (void)cta;
00696     //
00697     std::string tfecfFuncName("multiply(trans,alpha,X,beta,Y)");
00698     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00699         trans == Teuchos::CONJ_TRANS,
00700         std::runtime_error, "no support for transpose multiplication in Cusp")
00701     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00702         isInitialized_ == false,
00703         std::runtime_error, "sparse operators have not been initialized with graph and matrix data; call setGraphAndMatrix() first.")
00704     // get pointers,stride from X and Y
00705     Ordinal stride_x = (Ordinal)X.getStride(),
00706             stride_y = (Ordinal)Y.getStride();
00707     const DomainScalar * data_x = X.getValues().getRawPtr();
00708     RangeScalar * data_y = Y.getValuesNonConst().getRawPtr();
00709     const Ordinal numMatRows = numRows_;
00710     const Ordinal numMatCols = numCols_;
00711     const Ordinal opRows     = (trans == Teuchos::NO_TRANS ? numMatRows : numMatCols);
00712     const Ordinal opCols     = (trans == Teuchos::NO_TRANS ? numMatCols : numMatRows);
00713     const Ordinal numRHS     = X.getNumCols();
00714     const Ordinal numNZ      = rowVals_.size();
00715     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00716         X.getNumCols() != Y.getNumCols(),
00717         std::runtime_error, "X and Y do not have the same number of column vectors.")
00718     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00719         (size_t)X.getNumRows() != (size_t)opCols,
00720         std::runtime_error, "Size of X is not congruous with dimensions of operator.")
00721     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00722         (size_t)Y.getNumRows() != (size_t)opRows,
00723         std::runtime_error, "Size of Y is not congruous with dimensions of operator.")
00724     // y = alpha*A*x + beta*y
00725     if (beta == Teuchos::ScalarTraits<RangeScalar>::zero()) {
00726       // don't need temp storage
00727       if (trans == Teuchos::NO_TRANS) {
00728         Cuspdetails::cuspCrsMultiply(numMatRows, numMatCols, numNZ, rowPtrs_.getRawPtr(), colInds_.getRawPtr(), rowVals_.getRawPtr(),
00729                                      numRHS, data_x, stride_x, data_y, stride_y);
00730       }
00731       else {
00732         Cuspdetails::cuspCrsMultiply(numMatCols, numMatRows, numNZ, rowPtrs_t_.getRawPtr(), colInds_t_.getRawPtr(), rowVals_t_.getRawPtr(), 
00733                                      numRHS, data_x, stride_x, data_y, stride_y);
00734       }
00735       if (alpha != Teuchos::ScalarTraits<RangeScalar>::one()) {
00736         DefaultArithmetic< MultiVector< RangeScalar,Node > > ::Scale(Y,alpha);
00737       }
00738     }
00739     else {
00740       // allocate temp space
00741       typedef MultiVector<RangeScalar,Node> MV;
00742       ArrayRCP<RangeScalar> tmp = node_->template allocBuffer<RangeScalar>( opRows*numRHS );
00743       // tmp = A*X
00744       if (trans == Teuchos::NO_TRANS) {
00745         Cuspdetails::cuspCrsMultiply(numMatRows, numMatCols, numNZ, rowPtrs_.getRawPtr(), colInds_.getRawPtr(), rowVals_.getRawPtr(),
00746                                      numRHS, data_x, stride_x, tmp.getRawPtr(), opRows);
00747       }
00748       else {
00749         Cuspdetails::cuspCrsMultiply(numMatCols, numMatRows, numNZ, rowPtrs_t_.getRawPtr(), colInds_t_.getRawPtr(), rowVals_t_.getRawPtr(), 
00750                                      numRHS, data_x, stride_x, tmp.getRawPtr(), opRows);
00751       }
00752       // Y = alpha*tmp + beta*Y
00753       MV mvtmp(node_);
00754       mvtmp.initializeValues(opRows,numRHS,tmp,opRows);
00755       DefaultArithmetic<MV>::GESUM(Y,alpha,mvtmp,beta);
00756     }
00757   }
00758 
00759 } // namespace Kokkos
00760 
00761 #endif /* KOKKOS_CUSPOPS_HPP */
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends