Kokkos Node API and Local Linear Algebra Kernels Version of the Day
Kokkos_CUSPARSEOps.hpp
00001 //@HEADER
00002 // ************************************************************************
00003 //
00004 //          Kokkos: Node API and Parallel Node Kernels
00005 //              Copyright (2008) Sandia Corporation
00006 //
00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00008 // the U.S. Government retains certain rights in this software.
00009 //
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00038 //
00039 // ************************************************************************
00040 //@HEADER
00041 
00042 #ifndef KOKKOS_CUSPARSEOPS_HPP
00043 #define KOKKOS_CUSPARSEOPS_HPP
00044 
00045 #include <Teuchos_DataAccess.hpp>
00046 #include <Teuchos_CompileTimeAssert.hpp>
00047 #include <Teuchos_TypeTraits.hpp>
00048 #include <stdexcept>
00049 
00050 #include "Kokkos_ConfigDefs.hpp"
00051 #include "Kokkos_CrsMatrixBase.hpp"
00052 #include "Kokkos_CrsGraphBase.hpp"
00053 #include "Kokkos_CUDANodeUtils.hpp"
00054 
00055 #include "Kokkos_MultiVector.hpp"
00056 #include "Kokkos_NodeHelpers.hpp"
00057 #include "Kokkos_DefaultArithmetic.hpp"
00058 
00059 #include <cusparse_v2.h>
00060 
00061 namespace Teuchos {
00062   template<>
00063   class TypeNameTraits<cusparseHandle_t> {
00064   public:
00065     static std::string name() { return std::string("cusparseHandle_t"); }
00066     static std::string concreteName( const cusparseHandle_t& ) { return std::string("cusparseHandle_t"); }
00067   };
00068   template<>
00069   class TypeNameTraits<cusparseMatDescr_t> {
00070   public:
00071     static std::string name() { return std::string("cusparseMatDescr_t"); }
00072     static std::string concreteName( const cusparseMatDescr_t& ) { return std::string("cusparseMatDescr_t"); }
00073   };
00074   template<>
00075   class TypeNameTraits<cusparseSolveAnalysisInfo_t> {
00076   public:
00077     static std::string name() { return std::string("cusparseSolveAnalysisInfo_t"); }
00078     static std::string concreteName( const cusparseSolveAnalysisInfo_t& ) { return std::string("cusparseSolveAnalysisInfo_t"); }
00079   };
00080 }
00081 
00082 namespace Kokkos {
00083 
00084   namespace CUSPARSEdetails {
00085 
00086     class Session {
00087       public:
00089         static void init();
00090 
00092         static RCP<const cusparseHandle_t> getHandle();
00093 
00094       private:
00095         static RCP<cusparseHandle_t> session_handle_;
00096     };
00097 
00098     RCP<cusparseMatDescr_t>           createMatDescr();
00099     RCP<cusparseSolveAnalysisInfo_t>  createSolveAnalysisInfo();
00100 
00101     class CUSPARSESessionDestroyer {
00102     public:
00103       CUSPARSESessionDestroyer();
00104       void free(cusparseHandle_t *ptr);
00105     };
00106 
00107     class CUSPARSEMatDescDestroyer {
00108     public:
00109       CUSPARSEMatDescDestroyer();
00110       void free(cusparseMatDescr_t *ptr);
00111     };
00112 
00113     class CUSPARSESolveAnalysisDestroyer {
00114     public:
00115       CUSPARSESolveAnalysisDestroyer();
00116       void free(cusparseSolveAnalysisInfo_t *ptr);
00117     };
00118 
00119 
00120     template <typename T>
00121     struct CUSPARSEUnsupportedScalar
00122     {
00123       public:
00124       static inline T notSupported() { return T::this_type_is_not_supported_by_CUSPARSE(); }
00125     };
00126 
00127 
00128     template <class T>
00129     class CUSPARSETemplateAdaptors {
00130       public:
00131       //
00132       static inline cusparseStatus_t
00133       CSRMM(cusparseHandle_t handle, cusparseOperation_t transA,
00134             int m, int n, int k, int nnz, const T *alpha,
00135             const cusparseMatDescr_t descrA, const T *csrValA,
00136             const int *csrRowPtrA, const int *csrColIndA,
00137             const T *B, int ldb,
00138             const T *beta, T *C, int ldc)
00139       { return CUSPARSEUnsupportedScalar<T>::notSupported(); }
00140       //
00141       static inline cusparseStatus_t
00142       CSRSM_analysis(cusparseHandle_t handle, cusparseOperation_t transA,
00143                      int m, int nnz, const cusparseMatDescr_t descrA,
00144                      const T *csrValA, const int *csrRowPtrA,
00145                      const int *csrColIndA, cusparseSolveAnalysisInfo_t info)
00146       { return CUSPARSEUnsupportedScalar<T>::notSupported(); }
00147       //
00148       static inline cusparseStatus_t
00149       CSRSM_solve(cusparseHandle_t handle, cusparseOperation_t transA,
00150                   int m, int n, const T *alpha,
00151                   const cusparseMatDescr_t descrA,
00152                   const T *csrValA, const int *csrRowPtrA,
00153                   const int *csrColIndA, cusparseSolveAnalysisInfo_t info,
00154                   const T *X, int ldx,
00155                   T *Y, int ldy)
00156       { return CUSPARSEUnsupportedScalar<T>::notSupported(); }
00157     };
00158 
00159     // float support
00160 #ifdef HAVE_KOKKOS_CUDA_FLOAT
00161     template <>
00162     class CUSPARSETemplateAdaptors<float> {
00163       public:
00164       //
00165       static inline cusparseStatus_t
00166       CSRMM(cusparseHandle_t handle, cusparseOperation_t transA,
00167             int m, int n, int k, int nnz, const float *alpha,
00168             const cusparseMatDescr_t descrA, const float *csrValA,
00169             const int *csrRowPtrA, const int *csrColIndA,
00170             const float *B, int ldb,
00171             const float *beta, float *C, int ldc)
00172       { return cusparseScsrmm(handle,transA,m,n,k,nnz,
00173                               alpha,descrA,csrValA,csrRowPtrA,csrColIndA,
00174                               B,ldb,beta,C,ldc); }
00175       //
00176       static inline cusparseStatus_t
00177       CSRSM_analysis(cusparseHandle_t handle, cusparseOperation_t transA,
00178                      int m, int nnz, const cusparseMatDescr_t descrA,
00179                      const float *csrValA, const int *csrRowPtrA,
00180                      const int *csrColIndA, cusparseSolveAnalysisInfo_t info)
00181       { return cusparseScsrsm_analysis(handle,transA,m,nnz,descrA,
00182                                        csrValA,csrRowPtrA,csrColIndA,info); }
00183       //
00184       static inline cusparseStatus_t
00185       CSRSM_solve(cusparseHandle_t handle, cusparseOperation_t transA,
00186                   int m, int n, const float *alpha,
00187                   const cusparseMatDescr_t descrA,
00188                   const float *csrValA, const int *csrRowPtrA,
00189                   const int *csrColIndA, cusparseSolveAnalysisInfo_t info,
00190                   const float *X, int ldx,
00191                   float *Y, int ldy)
00192       { return cusparseScsrsm_solve(handle,transA,m,n,
00193                                     alpha,descrA,csrValA,csrRowPtrA,csrColIndA,
00194                                     info,X,ldx,Y,ldy); }
00195     };
00196 #endif
00197 
00198 #ifdef HAVE_KOKKOS_CUDA_DOUBLE
00199     // double support
00200     template <>
00201     class CUSPARSETemplateAdaptors<double> {
00202       public:
00203       //
00204       static inline cusparseStatus_t
00205       CSRMM(cusparseHandle_t handle, cusparseOperation_t transA,
00206             int m, int n, int k, int nnz, const double *alpha,
00207             const cusparseMatDescr_t descrA, const double *csrValA,
00208             const int *csrRowPtrA, const int *csrColIndA,
00209             const double *B, int ldb,
00210             const double *beta, double *C, int ldc)
00211       { return cusparseDcsrmm(handle,transA,m,n,k,nnz,
00212                               alpha,descrA,csrValA,csrRowPtrA,csrColIndA,
00213                               B,ldb,beta,C,ldc); }
00214       //
00215       static inline cusparseStatus_t
00216       CSRSM_analysis(cusparseHandle_t handle, cusparseOperation_t transA,
00217                      int m, int nnz, const cusparseMatDescr_t descrA,
00218                      const double *csrValA, const int *csrRowPtrA,
00219                      const int *csrColIndA, cusparseSolveAnalysisInfo_t info)
00220       { return cusparseDcsrsm_analysis(handle,transA,m,nnz,descrA,
00221                                        csrValA,csrRowPtrA,csrColIndA,info); }
00222       //
00223       static inline cusparseStatus_t
00224       CSRSM_solve(cusparseHandle_t handle, cusparseOperation_t transA,
00225                   int m, int n, const double *alpha,
00226                   const cusparseMatDescr_t descrA,
00227                   const double *csrValA, const int *csrRowPtrA,
00228                   const int *csrColIndA, cusparseSolveAnalysisInfo_t info,
00229                   const double *X, int ldx,
00230                   double *Y, int ldy)
00231       { return cusparseDcsrsm_solve(handle,transA,m,n,
00232                                     alpha,descrA,csrValA,csrRowPtrA,csrColIndA,
00233                                     info,X,ldx,Y,ldy); }
00234     };
00235 #endif
00236 
00237 #ifdef HAVE_KOKKOS_CUDA_COMPLEX_FLOAT
00238     // complex<float> support
00239     template <>
00240     class CUSPARSETemplateAdaptors<std::complex<float> > {
00241       public:
00242       //
00243       static inline cusparseStatus_t
00244       CSRMM(cusparseHandle_t handle, cusparseOperation_t transA,
00245             int m, int n, int k, int nnz, const cuComplex *alpha,
00246             const cusparseMatDescr_t descrA, const cuComplex *csrValA,
00247             const int *csrRowPtrA, const int *csrColIndA,
00248             const cuComplex *B, int ldb,
00249             const cuComplex *beta, cuComplex *C, int ldc)
00250       { return cusparseCcsrmm(handle,transA,m,n,k,nnz,
00251                               alpha,descrA,csrValA,csrRowPtrA,csrColIndA,
00252                               B,ldb,beta,C,ldc); }
00253       //
00254       static inline cusparseStatus_t
00255       CSRSM_analysis(cusparseHandle_t handle, cusparseOperation_t transA,
00256                      int m, int nnz, const cusparseMatDescr_t descrA,
00257                      const cuComplex *csrValA, const int *csrRowPtrA,
00258                      const int *csrColIndA, cusparseSolveAnalysisInfo_t info)
00259       { return cusparseCcsrsm_analysis(handle,transA,m,nnz,descrA,
00260                                        csrValA,csrRowPtrA,csrColIndA,info); }
00261       //
00262       static inline cusparseStatus_t
00263       CSRSM_solve(cusparseHandle_t handle, cusparseOperation_t transA,
00264                   int m, int n, const cuComplex *alpha,
00265                   const cusparseMatDescr_t descrA,
00266                   const cuComplex *csrValA, const int *csrRowPtrA,
00267                   const int *csrColIndA, cusparseSolveAnalysisInfo_t info,
00268                   const cuComplex *X, int ldx,
00269                   cuComplex *Y, int ldy)
00270       { return cusparseCcsrsm_solve(handle,transA,m,n,
00271                                     alpha,descrA,csrValA,csrRowPtrA,csrColIndA,
00272                                     info,X,ldx,Y,ldy); }
00273     };
00274 #endif
00275 
00276 #ifdef HAVE_KOKKOS_CUDA_COMPLEX_DOUBLE
00277     // complex<double> support
00278     template <>
00279     class CUSPARSETemplateAdaptors<std::complex<double> > {
00280       public:
00281       //
00282       static inline cusparseStatus_t
00283       CSRMM(cusparseHandle_t handle, cusparseOperation_t transA,
00284             int m, int n, int k, int nnz, const cuDoubleComplex *alpha,
00285             const cusparseMatDescr_t descrA, const cuDoubleComplex *csrValA,
00286             const int *csrRowPtrA, const int *csrColIndA,
00287             const cuDoubleComplex *B, int ldb,
00288             const cuDoubleComplex *beta, cuDoubleComplex *C, int ldc)
00289       { return cusparseZcsrmm(handle,transA,m,n,k,nnz,
00290                               alpha,descrA,csrValA,csrRowPtrA,csrColIndA,
00291                               B,ldb,beta,C,ldc); }
00292       //
00293       static inline cusparseStatus_t
00294       CSRSM_analysis(cusparseHandle_t handle, cusparseOperation_t transA,
00295                      int m, int nnz, const cusparseMatDescr_t descrA,
00296                      const cuDoubleComplex *csrValA, const int *csrRowPtrA,
00297                      const int *csrColIndA, cusparseSolveAnalysisInfo_t info)
00298       { return cusparseZcsrsm_analysis(handle,transA,m,nnz,descrA,
00299                                        csrValA,csrRowPtrA,csrColIndA,info); }
00300       //
00301       static inline cusparseStatus_t
00302       CSRSM_solve(cusparseHandle_t handle, cusparseOperation_t transA,
00303                   int m, int n, const cuDoubleComplex *alpha,
00304                   const cusparseMatDescr_t descrA,
00305                   const cuDoubleComplex *csrValA, const int *csrRowPtrA,
00306                   const int *csrColIndA, cusparseSolveAnalysisInfo_t info,
00307                   const cuDoubleComplex *X, int ldx,
00308                   cuDoubleComplex *Y, int ldy)
00309       { return cusparseZcsrsm_solve(handle,transA,m,n,
00310                                     alpha,descrA,csrValA,csrRowPtrA,csrColIndA,
00311                                     info,X,ldx,Y,ldy); }
00312     };
00313 #endif
00314 
00315   } // end of namespace CUSPARSEdetails
00316 
00318 
00320   template <class Node>
00321   class CUSPARSECrsGraph : public CrsGraphBase<int,Node>
00322   {
00323     public:
00324       CUSPARSECrsGraph(size_t numRows, const RCP<Node> &node, const RCP<ParameterList> &params);
00325       bool isEmpty() const;
00326       void setStructure(const ArrayRCP<const size_t>  &ptrs,
00327                         const ArrayRCP<const int> &inds);
00328       void setDeviceData(const ArrayRCP<const int> &devptrs, const ArrayRCP<const int> &devinds);
00329       inline ArrayRCP<const size_t> getPointers() const;
00330       inline ArrayRCP<const int>    getIndices() const;
00331       inline ArrayRCP<const int>    getDevPointers() const;
00332       inline ArrayRCP<const int>    getDevIndices() const;
00333       inline bool isInitialized() const;
00334       void setMatDesc(Teuchos::EUplo uplo, Teuchos::EDiag diag);
00335       void getMatDesc(Teuchos::EUplo &uplo, Teuchos::EDiag &diag) const;
00336       RCP<cusparseMatDescr_t> getMatDesc() const;
00337     private:
00338       bool isInitialized_;
00339       bool isEmpty_;
00340       // cusparse matrix description handle
00341       RCP<cusparseMatDescr_t> matdescr_;
00342       Teuchos::EUplo uplo_;
00343       Teuchos::EDiag diag_;
00344       // graph data
00345       ArrayRCP<const size_t>  host_rowptrs_;
00346       ArrayRCP<const int>     dev_rowptrs_;
00347       ArrayRCP<const int>     host_colinds_, dev_colinds_;
00348       // TODO: add CSC data, for efficient tranpose multiply
00349   };
00350 
00352 
00354   template <class Scalar,
00355             class Node>
00356   class CUSPARSECrsMatrix : public CrsMatrixBase<Scalar,int,Node>
00357   {
00358     public:
00359       CUSPARSECrsMatrix(const RCP<const CUSPARSECrsGraph<Node> > &graph, const RCP<ParameterList> &params);
00360       void setValues(const ArrayRCP<const Scalar> &vals);
00361       void setDeviceData(const ArrayRCP<const Scalar> &devvals);
00362       inline ArrayRCP<const Scalar> getValues() const;
00363       inline ArrayRCP<const Scalar> getDevValues() const;
00364       inline bool isInitialized() const;
00365       void setAnalyses(const RCP<cusparseSolveAnalysisInfo_t> &analysisNoTrans,
00366                        const RCP<cusparseSolveAnalysisInfo_t> &analysisTrans,
00367                        const RCP<cusparseSolveAnalysisInfo_t> &analysisConjTrans);
00368       void getAnalyses(RCP<cusparseSolveAnalysisInfo_t> &analysisNoTrans,
00369                        RCP<cusparseSolveAnalysisInfo_t> &analysisTrans,
00370                        RCP<cusparseSolveAnalysisInfo_t> &analysisConjTrans) const;
00371     private:
00372       bool isInitialized_;
00373       // cusparse analysis handles
00374       RCP<cusparseSolveAnalysisInfo_t> analysisNoTrans_, analysisConjTrans_, analysisTrans_;
00375       // matrix data
00376       ArrayRCP<const Scalar> vals_, dev_vals_;
00377       // TODO: add CSC data, for efficient transpose multiply
00378   };
00379 
00380   template <class Node>
00381   CUSPARSECrsGraph<Node>::CUSPARSECrsGraph(size_t numRows, const RCP<Node> &node, const RCP<ParameterList> &params)
00382   : CrsGraphBase<int,Node>(numRows,node,params)
00383   , isInitialized_(false)
00384   , isEmpty_(false)
00385   {
00386     CUSPARSEdetails::Session::init();
00387     // Make sure that users only specialize for Kokkos Node types that are CUDA Nodes
00388     Teuchos::CompileTimeAssert<Node::isCUDANode == false> cta; (void)cta;
00389   }
00390 
00391   template <class Node>
00392   bool CUSPARSECrsGraph<Node>::isEmpty() const
00393   { return isEmpty_; }
00394 
00395   template <class Node>
00396   void CUSPARSECrsGraph<Node>::setStructure(
00397                       const ArrayRCP<const size_t>  &ptrs,
00398                       const ArrayRCP<const int> &inds)
00399   {
00400     std::string tfecfFuncName("setStructure(ptrs,inds)");
00401     const size_t numrows = this->getNumRows();
00402     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00403         (size_t)ptrs.size() != numrows+1
00404         || ptrs[0] != 0
00405         || (size_t)inds.size() != ptrs[numrows],
00406         std::runtime_error, " graph data not coherent."
00407     )
00408     const size_t numEntries = ptrs[numrows];
00409     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00410         isInitialized_ == true,
00411         std::runtime_error, " matrix has already been initialized"
00412     )
00413     if (numrows == 0 || numEntries == 0) isEmpty_ = true;
00414     host_rowptrs_ = ptrs;
00415     host_colinds_ = inds;
00416     isInitialized_ = true;
00417   }
00418 
00419   template <class Node>
00420   ArrayRCP<const size_t> CUSPARSECrsGraph<Node>::getPointers() const
00421   { return host_rowptrs_; }
00422 
00423   template <class Node>
00424   ArrayRCP<const int> CUSPARSECrsGraph<Node>::getIndices() const
00425   { return host_colinds_; }
00426 
00427   template <class Node>
00428   ArrayRCP<const int> CUSPARSECrsGraph<Node>::getDevPointers() const
00429   { return dev_rowptrs_; }
00430 
00431   template <class Node>
00432   ArrayRCP<const int> CUSPARSECrsGraph<Node>::getDevIndices() const
00433   { return dev_colinds_; }
00434 
00435   template <class Node>
00436   void CUSPARSECrsGraph<Node>::setDeviceData(const ArrayRCP<const int> &devptrs,
00437                                              const ArrayRCP<const int>    &devinds)
00438   { dev_rowptrs_ = devptrs; dev_colinds_ = devinds; }
00439 
00440   template <class Node>
00441   bool CUSPARSECrsGraph<Node>::isInitialized() const
00442   { return isInitialized_; }
00443 
00444   template <class Node>
00445   void CUSPARSECrsGraph<Node>::setMatDesc(Teuchos::EUplo uplo, Teuchos::EDiag diag)
00446   {
00447     std::string tfecfFuncName("setMatDesc()");
00448     //
00449     uplo_ = uplo;
00450     diag_ = diag;
00451     //
00452     matdescr_ = CUSPARSEdetails::createMatDescr();
00453     cusparseStatus_t status = cusparseSetMatIndexBase(*matdescr_, CUSPARSE_INDEX_BASE_ZERO);
00454     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(status != CUSPARSE_STATUS_SUCCESS, std::runtime_error, "error setting matrix descriptor (index base).")
00455     // upper or lower
00456     if (uplo == Teuchos::UPPER_TRI) {
00457       status = cusparseSetMatFillMode(*matdescr_, CUSPARSE_FILL_MODE_UPPER);
00458       TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(status != CUSPARSE_STATUS_SUCCESS, std::runtime_error, "error setting matrix descriptor (upper).")
00459       status = cusparseSetMatType(*matdescr_, CUSPARSE_MATRIX_TYPE_TRIANGULAR);
00460       TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(status != CUSPARSE_STATUS_SUCCESS, std::runtime_error, "error setting matrix descriptor (triangular).")
00461     }
00462     else if (uplo == Teuchos::LOWER_TRI) {
00463       status = cusparseSetMatFillMode(*matdescr_, CUSPARSE_FILL_MODE_LOWER);
00464       TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(status != CUSPARSE_STATUS_SUCCESS, std::runtime_error, "error setting matrix descriptor (lower).")
00465       status = cusparseSetMatType(*matdescr_, CUSPARSE_MATRIX_TYPE_TRIANGULAR);
00466       TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(status != CUSPARSE_STATUS_SUCCESS, std::runtime_error, "error setting matrix descriptor (triangular).")
00467     }
00468     else {
00469       status = cusparseSetMatType(*matdescr_, CUSPARSE_MATRIX_TYPE_GENERAL);
00470       TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(status != CUSPARSE_STATUS_SUCCESS, std::runtime_error, "error setting matrix descriptor (general).")
00471     }
00472     if (diag == Teuchos::UNIT_DIAG) {
00473       status = cusparseSetMatDiagType(*matdescr_, CUSPARSE_DIAG_TYPE_UNIT);
00474       TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(status != CUSPARSE_STATUS_SUCCESS, std::runtime_error, "error setting matrix descriptor (unit).")
00475     }
00476     else {
00477       status = cusparseSetMatDiagType(*matdescr_, CUSPARSE_DIAG_TYPE_NON_UNIT);
00478       TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(status != CUSPARSE_STATUS_SUCCESS, std::runtime_error, "error setting matrix descriptor (non-unit).")
00479     }
00480   }
00481 
00482   template <class Node>
00483   void CUSPARSECrsGraph<Node>::getMatDesc(Teuchos::EUplo &uplo, Teuchos::EDiag &diag) const
00484   {
00485     uplo = uplo_;
00486     diag = diag_;
00487   }
00488 
00489   template <class Node>
00490   RCP<cusparseMatDescr_t> CUSPARSECrsGraph<Node>::getMatDesc() const
00491   {
00492     return matdescr_;
00493   }
00494 
00495   template <class Scalar, class Node>
00496   CUSPARSECrsMatrix<Scalar,Node>::CUSPARSECrsMatrix(const RCP<const CUSPARSECrsGraph<Node> > &graph, const RCP<ParameterList> &params)
00497   : CrsMatrixBase<Scalar,int,Node>(graph,params)
00498   , isInitialized_(false)
00499   {
00500     // Make sure that users only specialize for Kokkos Node types that are CUDA Nodes
00501     Teuchos::CompileTimeAssert<Node::isCUDANode == false> cta; (void)cta;
00502   }
00503 
00504   template <class Scalar, class Node>
00505   void CUSPARSECrsMatrix<Scalar,Node>::setValues(const ArrayRCP<const Scalar> &vals)
00506   {
00507     std::string tfecfFuncName("setValues(vals)");
00508     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00509         isInitialized_ == true,
00510         std::runtime_error, " matrix is already initialized."
00511     )
00512     vals_ = vals;
00513     isInitialized_ = true;
00514   }
00515 
00516   template <class Scalar, class Node>
00517   ArrayRCP<const Scalar> CUSPARSECrsMatrix<Scalar,Node>::getValues() const
00518   { return vals_; }
00519 
00520   template <class Scalar, class Node>
00521   bool CUSPARSECrsMatrix<Scalar,Node>::isInitialized() const
00522   { return isInitialized_; }
00523 
00524   template <class Scalar, class Node>
00525   ArrayRCP<const Scalar> CUSPARSECrsMatrix<Scalar,Node>::getDevValues() const
00526   { return dev_vals_; }
00527 
00528   template <class Scalar, class Node>
00529   void CUSPARSECrsMatrix<Scalar,Node>::setDeviceData(const ArrayRCP<const Scalar> &devvals)
00530   { dev_vals_ = devvals; }
00531 
00532   template <class Scalar, class Node>
00533   void CUSPARSECrsMatrix<Scalar,Node>::setAnalyses(const RCP<cusparseSolveAnalysisInfo_t> &analysisNoTrans,
00534                                                    const RCP<cusparseSolveAnalysisInfo_t> &analysisTrans,
00535                                                    const RCP<cusparseSolveAnalysisInfo_t> &analysisConjTrans)
00536   {
00537     analysisNoTrans_   = analysisNoTrans;
00538     analysisTrans_     = analysisTrans;
00539     analysisConjTrans_ = analysisConjTrans;
00540   }
00541 
00542   template <class Scalar, class Node>
00543   void CUSPARSECrsMatrix<Scalar,Node>::getAnalyses(RCP<cusparseSolveAnalysisInfo_t> &analysisNoTrans,
00544                                                    RCP<cusparseSolveAnalysisInfo_t> &analysisTrans,
00545                                                    RCP<cusparseSolveAnalysisInfo_t> &analysisConjTrans) const
00546   {
00547     analysisNoTrans   = analysisNoTrans_;
00548     analysisTrans     = analysisTrans_;
00549     analysisConjTrans = analysisConjTrans_;
00550   }
00551 
00559   template <class Scalar, class Node>
00560   class CUSPARSEOps {
00561   public:
00563 
00564 
00566     typedef Scalar  scalar_type;
00568     typedef int     ordinal_type;
00570     typedef Node    node_type;
00572     typedef CUSPARSEOps<Scalar,Node> sparse_ops_type;
00573 
00575     template <class O, class N>
00576     struct graph {
00577       typedef CUSPARSECrsGraph<N> graph_type;
00578     };
00579 
00581     template <class S, class O, class N>
00582     struct matrix {
00583       typedef CUSPARSECrsMatrix<S,N> matrix_type;
00584     };
00585 
00598     template <class S2>
00599     struct bind_scalar {
00600       typedef CUSPARSEOps<S2,Node> other_type;
00601     };
00602 
00604 
00605 
00606 
00608     CUSPARSEOps(const RCP<Node> &node);
00609 
00611     ~CUSPARSEOps();
00612 
00614 
00615 
00616 
00618     RCP<Node> getNode() const;
00619 
00621 
00623 
00624 
00626     static ArrayRCP<size_t> allocRowPtrs(const RCP<Node> &node, const ArrayView<const size_t> &rowPtrs);
00627 
00629     template <class T>
00630     static ArrayRCP<T> allocStorage(const RCP<Node> &node, const ArrayView<const size_t> &ptrs);
00631 
00633     static void finalizeGraph(Teuchos::EUplo uplo, Teuchos::EDiag diag, CUSPARSECrsGraph<Node> &graph, const RCP<ParameterList> &params);
00634 
00636     static void finalizeMatrix(const CUSPARSECrsGraph<Node> &graph, CUSPARSECrsMatrix<Scalar,Node> &matrix, const RCP<ParameterList> &params);
00637 
00639     static void finalizeGraphAndMatrix(Teuchos::EUplo uplo, Teuchos::EDiag diag, CUSPARSECrsGraph<Node> &graph, CUSPARSECrsMatrix<Scalar,Node> &matrix, const RCP<ParameterList> &params);
00640 
00642     void setGraphAndMatrix(const RCP<const CUSPARSECrsGraph<Node> > &graph,
00643                            const RCP<const CUSPARSECrsMatrix<Scalar,Node> > &matrix);
00644 
00646 
00647 
00648 
00673     template <class DomainScalar, class RangeScalar>
00674     void
00675     multiply (Teuchos::ETransp trans,
00676               RangeScalar alpha,
00677               const MultiVector<DomainScalar,Node> &X,
00678               MultiVector<RangeScalar,Node> &Y) const;
00679 
00704     template <class DomainScalar, class RangeScalar>
00705     void
00706     multiply (Teuchos::ETransp trans,
00707               RangeScalar alpha,
00708               const MultiVector<DomainScalar,Node> &X,
00709               RangeScalar beta,
00710               MultiVector<RangeScalar,Node> &Y) const;
00711 
00738     template <class DomainScalar, class RangeScalar>
00739     void
00740     solve (Teuchos::ETransp trans,
00741            const MultiVector<DomainScalar,Node> &Y,
00742            MultiVector<RangeScalar,Node> &X) const;
00743 
00745 
00746   private:
00748     CUSPARSEOps(const CUSPARSEOps& source);
00749 
00751     RCP<Node> node_;
00752 
00753     int numRows_, numNZ_;
00754     bool isInitialized_;
00755 
00756     ArrayRCP<const int> rowPtrs_, colInds_;
00757     ArrayRCP<const Scalar> rowVals_;
00758     RCP<cusparseMatDescr_t>          matdescr_;
00759     RCP<cusparseSolveAnalysisInfo_t> aiNoTrans_, aiTrans_, aiConjTrans_;
00760   };
00761 
00762 
00763   // ======= matrix finalization ===========
00764   template <class Scalar, class Node>
00765   void CUSPARSEOps<Scalar,Node>::finalizeGraph(Teuchos::EUplo uplo, Teuchos::EDiag diag,
00766                                                CUSPARSECrsGraph<Node> &graph, const RCP<ParameterList> &params)
00767   {
00768     const std::string prefix("finalizeGraph()");
00769     RCP<Node> node = graph.getNode();
00770     TEUCHOS_TEST_FOR_EXCEPTION(
00771         graph.isInitialized() == false,
00772         std::runtime_error, prefix << ": graph has not yet been initialized."
00773     )
00774     // diag: have to allocate and indicate
00775     ArrayRCP<int>    devinds, devptrs;
00776     ArrayRCP<const int>    hostinds = graph.getIndices();
00777     ArrayRCP<const size_t> hostptrs = graph.getPointers();
00778     const size_t numRows = graph.getNumRows();
00779     // set description
00780     graph.setMatDesc(uplo,diag);
00781     // allocate and initialize data
00782     if (diag == Teuchos::UNIT_DIAG) {
00783       // cuSPARSE, unfortunately, always assumes that the diagonal
00784       // entries are present in the storage.  Therefore, this flag
00785       // only specifies whether they are considered or not; they are
00786       // assumed to be present, and neglecting them will result in
00787       // incorrect behavior (causing the adjacent entry to be
00788       // neglected instead).  Therefore, because our API doesn't give
00789       // us explicit diagonal entries if diag == Teuchos::UNIT_DIAG,
00790       // we must allocate space for them.
00791       const size_t numnz = hostinds.size() + numRows;
00792       devptrs = node->template allocBuffer<int>( numRows+1 );
00793       if (numnz) devinds = node->template allocBuffer<int>( numnz );
00794       ArrayRCP<int> h_devptrs = node->viewBufferNonConst(WriteOnly, numRows+1, devptrs);
00795       ArrayRCP<int> h_devinds = node->viewBufferNonConst(WriteOnly, numnz,     devinds);
00796       for (size_t r=0; r < numRows; ++r) {
00797         h_devptrs[r] = (int)(hostptrs[r]+r);
00798         if (uplo == Teuchos::LOWER_TRI) {
00799           // leave one space at the end
00800           std::copy (hostinds.begin()+hostptrs[r],
00801                      hostinds.begin()+hostptrs[r+1],
00802                      h_devinds.begin()+h_devptrs[r]);
00803         }
00804         else {
00805           // leave one space at the beginning
00806           std::copy (hostinds.begin()+hostptrs[r],
00807                      hostinds.begin()+hostptrs[r+1],
00808                      h_devinds.begin()+h_devptrs[r]+1);
00809         }
00810       }
00811       h_devptrs[numRows] = (int)(hostptrs[numRows]+numRows);
00812       // copy back
00813       h_devptrs = null;
00814       h_devinds = null;
00815     }
00816     else {
00817       // our format == their format; just allocate and copy
00818       const size_t numnz = hostinds.size();
00819       devptrs = node->template allocBuffer<int>( numRows+1 );
00820       ArrayRCP<int> h_devptrs = node->viewBufferNonConst(WriteOnly, numRows+1, devptrs);
00821       // have to copy on host because of the cast from size_t to int
00822       std::copy(hostptrs.begin(), hostptrs.end(), h_devptrs.begin());
00823       h_devptrs = null;
00824       if (numnz) {
00825         devinds = node->template allocBuffer<int>( numnz );
00826         node->copyToBuffer( numnz,     hostinds(), devinds );
00827       }
00828     }
00829     // set the data
00830     graph.setDeviceData(devptrs,devinds);
00831   }
00832 
00833   // ======= matrix finalization ===========
00834   template <class Scalar, class Node>
00835   void CUSPARSEOps<Scalar,Node>::
00836   finalizeMatrix (const CUSPARSECrsGraph<Node> &graph,
00837                   CUSPARSECrsMatrix<Scalar,Node> &matrix,
00838                   const RCP<ParameterList> &params)
00839   {
00840     std::string FuncName("Kokkos::CUSPARSEOps::finalizeMatrix()");
00841     RCP<Node> node = graph.getNode();
00842     TEUCHOS_TEST_FOR_EXCEPTION(
00843         matrix.isInitialized() == false,
00844         std::runtime_error, FuncName << ": matrix has not yet been initialized."
00845     )
00846     // diag: have to allocate and indicate
00847     Teuchos::EUplo uplo;
00848     Teuchos::EDiag diag;
00849     graph.getMatDesc(uplo,diag);
00850     ArrayRCP<Scalar>       devvals;
00851     ArrayRCP<const size_t> hostptrs = graph.getPointers();
00852     ArrayRCP<const Scalar> hostvals = matrix.getValues();
00853     const size_t numRows = graph.getNumRows();
00854     if (diag == Teuchos::UNIT_DIAG) {
00855       // cuSPARSE, unfortunately, always assumes that the diagonal
00856       // entries are present in the storage.  Therefore, this flag
00857       // only specifies whether they are considered or not; they are
00858       // assumed to be present, and neglecting them will result in
00859       // incorrect behavior (causing the adjacent entry to be
00860       // neglected instead).  Therefore, because our API doesn't give
00861       // us diagonal entries if diag == Teuchos::UNIT_DIAG, we must
00862       // allocate space for them.
00863       const size_t numnz = hostptrs[numRows] + numRows;
00864       devvals = node->template allocBuffer<Scalar>( numnz );
00865       ArrayRCP<Scalar> h_devvals = node->viewBufferNonConst(WriteOnly, numnz, devvals);
00866       for (size_t r=0; r < numRows; ++r) {
00867         if (uplo == Teuchos::LOWER_TRI) {
00868           // leave one space at the end
00869           std::copy( hostvals.begin()+hostptrs[r], hostvals.begin()+hostptrs[r+1], h_devvals.begin()+hostptrs[r]+r );
00870         }
00871         else {
00872           // leave one space at the beginning
00873           std::copy( hostvals.begin()+hostptrs[r], hostvals.begin()+hostptrs[r+1], h_devvals.begin()+hostptrs[r]+r+1 );
00874         }
00875       }
00876       // copy back
00877       h_devvals = null;
00878     }
00879     else {
00880       const size_t numnz = hostptrs[numRows];
00881       if (numnz) {
00882         devvals = node->template allocBuffer<Scalar>( numnz );
00883         node->copyToBuffer( numnz,     hostvals(), devvals );
00884       }
00885     }
00886     matrix.setDeviceData(devvals);
00887 
00888     const int numnz = (int)devvals.size();
00889     RCP<cusparseMatDescr_t> descr = graph.getMatDesc();
00890     ArrayRCP<const int> devptrs = graph.getDevPointers(),
00891                         devinds = graph.getDevIndices();
00892 
00893     RCP<const cusparseHandle_t> hndl = CUSPARSEdetails::Session::getHandle();
00894     // look at the parameter list and do any analyses requested for solves
00895     RCP<cusparseSolveAnalysisInfo_t> ai_non, ai_trans, ai_conj;
00896     if (params != null && params->get("Prepare Solve",false)) {
00897       ai_non = CUSPARSEdetails::createSolveAnalysisInfo();
00898       cusparseStatus_t stat = CUSPARSEdetails::CUSPARSETemplateAdaptors<Scalar>::CSRSM_analysis(
00899           *hndl, CUSPARSE_OPERATION_NON_TRANSPOSE,numRows,
00900           numnz, *descr, devvals.getRawPtr(), devptrs.getRawPtr(), devinds.getRawPtr(),
00901           *ai_non);
00902       TEUCHOS_TEST_FOR_EXCEPTION(stat != CUSPARSE_STATUS_SUCCESS, std::runtime_error,
00903           FuncName << ": CSRSM_analysis(non-trans) returned error " << stat);
00904     }
00905     if (params != null && params->get("Prepare Tranpose Solve",false)) {
00906       ai_trans = CUSPARSEdetails::createSolveAnalysisInfo();
00907       cusparseStatus_t stat = CUSPARSEdetails::CUSPARSETemplateAdaptors<Scalar>::CSRSM_analysis(
00908           *hndl, CUSPARSE_OPERATION_TRANSPOSE,numRows,
00909           numnz, *descr, devvals.getRawPtr(), devptrs.getRawPtr(), devinds.getRawPtr(),
00910           *ai_trans);
00911       TEUCHOS_TEST_FOR_EXCEPTION(stat != CUSPARSE_STATUS_SUCCESS, std::runtime_error,
00912           FuncName << ": CSRSM_analysis(trans) returned error " << stat);
00913     }
00914     if (params != null && params->get("Prepare Conjugate Tranpose Solve",false)) {
00915       ai_conj = CUSPARSEdetails::createSolveAnalysisInfo();
00916       cusparseStatus_t stat = CUSPARSEdetails::CUSPARSETemplateAdaptors<Scalar>::CSRSM_analysis(
00917           *hndl, CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE,numRows,
00918           numnz, *descr, devvals.getRawPtr(), devptrs.getRawPtr(), devinds.getRawPtr(),
00919           *ai_conj);
00920       TEUCHOS_TEST_FOR_EXCEPTION(stat != CUSPARSE_STATUS_SUCCESS, std::runtime_error,
00921           FuncName << ": CSRSM_analysis(conj-trans) returned error " << stat);
00922     }
00923     matrix.setAnalyses( ai_non, ai_trans, ai_conj );
00924     //
00925     if (params != null && params->get("Prepare Transpose Multiply",false)) {
00926       // finish: compute CSC for transpose
00927       TEUCHOS_TEST_FOR_EXCEPT(true);
00928     }
00929   }
00930 
00931   // ======= graph and matrix finalization ===========
00932   template <class Scalar, class Node>
00933   void CUSPARSEOps<Scalar,Node>::finalizeGraphAndMatrix(Teuchos::EUplo uplo, Teuchos::EDiag diag,
00934                                                         CUSPARSECrsGraph<Node> &graph, CUSPARSECrsMatrix<Scalar,Node> &matrix,
00935                                                         const RCP<ParameterList> &params)
00936   {
00937     std::string FuncName("Kokkos::CUSPARSEOps::finalizeGraphAndMatrix(graph,matrix,params)");
00938     TEUCHOS_TEST_FOR_EXCEPTION(
00939         graph.isInitialized() == false,
00940         std::runtime_error, FuncName << ": graph has not yet been initialized."
00941     )
00942     TEUCHOS_TEST_FOR_EXCEPTION(
00943         matrix.isInitialized() == false,
00944         std::runtime_error, FuncName << ": matrix has not yet been initialized."
00945     )
00946     // no benefit to doing them together; do them separately
00947     finalizeGraph(uplo,diag,graph,params);
00948     finalizeMatrix(graph,matrix,params);
00949   }
00950 
00951 
00952   // ======= pointer allocation ===========
00953   template <class Scalar, class Node>
00954   ArrayRCP<size_t>
00955   CUSPARSEOps<Scalar,Node>::allocRowPtrs(const RCP<Node> &/*node*/, const ArrayView<const size_t> &numEntriesPerRow)
00956   {
00957     // alloc page-locked ("pinned") memory on the host, specially allocated and specially deallocated
00958     CUDANodeHostPinnedDeallocator<size_t> dealloc;
00959     ArrayRCP<size_t> ptrs = dealloc.alloc(numEntriesPerRow.size() + 1);
00960     ptrs[0] = 0;
00961     std::partial_sum( numEntriesPerRow.getRawPtr(), numEntriesPerRow.getRawPtr()+numEntriesPerRow.size(), ptrs.begin()+1 );
00962     return ptrs;
00963   }
00964 
00965   // ======= other allocation ===========
00966   template <class Scalar, class Node>
00967   template <class T>
00968   ArrayRCP<T>
00969   CUSPARSEOps<Scalar,Node>::allocStorage(const RCP<Node> &/*node*/, const ArrayView<const size_t> &rowPtrs)
00970   {
00971     // alloc page-locked ("pinned") memory on the host, specially allocated and specially deallocated
00972     const size_t totalNumEntries = *(rowPtrs.end()-1);
00973     CUDANodeHostPinnedDeallocator<T> dealloc;
00974     ArrayRCP<T> buf = dealloc.alloc(totalNumEntries);
00975     std::fill(buf.begin(), buf.end(), Teuchos::ScalarTraits<T>::zero() );
00976     return buf;
00977   }
00978   template<class Scalar, class Node>
00979   CUSPARSEOps<Scalar,Node>::CUSPARSEOps(const RCP<Node> &node)
00980   : node_(node)
00981   , numRows_(0)
00982   , numNZ_(0)
00983   , isInitialized_(false)
00984   {
00985     CUSPARSEdetails::Session::init();
00986     // Make sure that users only specialize for Kokkos Node types that are CUDA Nodes
00987     Teuchos::CompileTimeAssert<Node::isCUDANode == false> cta; (void)cta;
00988   }
00989 
00990   template<class Scalar, class Node>
00991   CUSPARSEOps<Scalar,Node>::~CUSPARSEOps()
00992   { }
00993 
00994   template <class Scalar, class Node>
00995   RCP<Node> CUSPARSEOps<Scalar,Node>::getNode() const {
00996     return node_;
00997   }
00998 
00999   template <class Scalar, class Node>
01000   void CUSPARSEOps<Scalar,Node>::setGraphAndMatrix(const RCP<const CUSPARSECrsGraph<Node> > &graph, const RCP<const CUSPARSECrsMatrix<Scalar,Node> > &matrix)
01001   {
01002     std::string tfecfFuncName("setGraphAndMatrix(graph,matrix)");
01003     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
01004         isInitialized_ == true,
01005         std::runtime_error, " operators already initialized.");
01006     // get cusparse data from the matrix
01007     numRows_ = graph->getNumRows();
01008     matdescr_ = graph->getMatDesc();
01009     rowPtrs_ = graph->getDevPointers();
01010     colInds_ = graph->getDevIndices();
01011     rowVals_ = matrix->getDevValues();
01012     numNZ_ = colInds_.size();
01013     matrix->getAnalyses( aiNoTrans_, aiTrans_, aiConjTrans_ );
01014     isInitialized_ = true;
01015   }
01016 
01017   template <class Scalar, class Node>
01018   template <class DomainScalar, class RangeScalar>
01019   void CUSPARSEOps<Scalar,Node>::solve(Teuchos::ETransp trans,
01020                                        const MultiVector<DomainScalar,Node> &Y,
01021                                              MultiVector< RangeScalar,Node> &X) const
01022   {
01023     // CUSPARSE doesn't support mixed precision; partial specialize, then nix the generic versions
01024     Teuchos::CompileTimeAssert<Teuchos::TypeTraits::is_same<DomainScalar,Scalar>::value == false ||
01025                                Teuchos::TypeTraits::is_same< RangeScalar,Scalar>::value == false > cta; (void)cta;
01026     //
01027     std::string tfecfFuncName("solve()");
01028     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
01029         isInitialized_ == false,
01030         std::runtime_error, "sparse operators have not been initialized with graph and matrix data; call setGraphAndMatrix() first.")
01031     // get pointers,stride from X and Y
01032     int stride_x = (int)X.getStride(),
01033         stride_y = (int)Y.getStride();
01034     const Scalar * data_y = Y.getValues().getRawPtr();
01035     Scalar * data_x = X.getValuesNonConst().getRawPtr();
01036     const int numMatRows = X.getNumRows();
01037     const int numRHS     = X.getNumCols();
01038     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
01039         X.getNumRows() != Y.getNumRows(),
01040         std::runtime_error, "X and Y do not have the same number of row vectors.")
01041     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
01042         X.getNumCols() != Y.getNumCols(),
01043         std::runtime_error, "X and Y do not have the same number of column vectors.")
01044     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
01045         Y.getNumRows() != numRows_,
01046         std::runtime_error, "Y does not have the same number of rows as does the matrix.")
01047     //
01048     RCP<cusparseSolveAnalysisInfo_t> solveInfo;
01049     cusparseOperation_t                     op;
01050     if (trans == Teuchos::NO_TRANS) {
01051       solveInfo = aiNoTrans_;
01052       op = CUSPARSE_OPERATION_NON_TRANSPOSE;
01053     }
01054     else if (trans == Teuchos::TRANS) {
01055       solveInfo = aiTrans_;
01056       op = CUSPARSE_OPERATION_TRANSPOSE;
01057     }
01058     else if (trans == Teuchos::CONJ_TRANS) {
01059       solveInfo = aiConjTrans_;
01060       op = CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE;
01061     }
01062     else {
01063       TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
01064           true, std::runtime_error, "invalid transformation (none ofe of NO_TRANS, TRANS or CONJ_TRANS): " << trans);
01065     }
01066     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
01067       solveInfo == null, std::runtime_error,
01068       "solve info not computed at matrix finalization time for requested transformation: " << trans);
01069     RCP<const cusparseHandle_t> hndl = CUSPARSEdetails::Session::getHandle();
01070     const Scalar s_alpha = Teuchos::ScalarTraits<Scalar>::one();
01071     cusparseStatus_t stat = CUSPARSEdetails::CUSPARSETemplateAdaptors<Scalar>::CSRSM_solve(
01072         *hndl, op, numMatRows, numRHS, &s_alpha,
01073         *matdescr_, rowVals_.getRawPtr(), rowPtrs_.getRawPtr(), colInds_.getRawPtr(), *solveInfo,
01074         data_y, stride_y, data_x, stride_x);
01075     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
01076         stat != CUSPARSE_STATUS_SUCCESS,
01077         std::runtime_error, ": CSRSM_solve returned error " << stat);
01078     return;
01079   }
01080 
01081 
01082   template <class Scalar, class Node>
01083   template <class DomainScalar, class RangeScalar>
01084   void CUSPARSEOps<Scalar,Node>::multiply(Teuchos::ETransp trans,
01085                                           RangeScalar alpha,
01086                                           const MultiVector<DomainScalar,Node> &X,
01087                                                 MultiVector< RangeScalar,Node> &Y) const
01088   {
01089     // CUSPARSE doesn't support mixed precision; partial specialize, then nix the generic versions
01090     Teuchos::CompileTimeAssert<Teuchos::TypeTraits::is_same<DomainScalar,Scalar>::value == false ||
01091                                Teuchos::TypeTraits::is_same< RangeScalar,Scalar>::value == false > cta; (void)cta;
01092     //
01093     std::string tfecfFuncName("multiply(trans,alpha,X,Y)");
01094     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
01095         isInitialized_ == false,
01096         std::runtime_error, "sparse operators have not been initialized with graph and matrix data; call setGraphAndMatrix() first.")
01097     // get pointers,stride from X and Y
01098     int stride_x = (int)X.getStride(),
01099         stride_y = (int)Y.getStride();
01100     const Scalar * data_x = X.getValues().getRawPtr();
01101     Scalar * data_y = Y.getValuesNonConst().getRawPtr();
01102     const int numMatRows = Y.getNumRows();
01103     const int numMatCols = X.getNumRows();
01104     const int numRHS     = X.getNumCols();
01105     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
01106         X.getNumCols() != Y.getNumCols(),
01107         std::runtime_error, "X and Y do not have the same number of column vectors.")
01108     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
01109         Y.getNumRows() != (size_t)numRows_,
01110         std::runtime_error, "Y does not have the same number of rows as does the matrix.")
01111     // call mat-vec
01112     cusparseOperation_t op;
01113     if      (trans == Teuchos::NO_TRANS)   op = CUSPARSE_OPERATION_NON_TRANSPOSE;
01114     else if (trans == Teuchos::TRANS)      op = CUSPARSE_OPERATION_TRANSPOSE;
01115     else if (trans == Teuchos::CONJ_TRANS) op = CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE;
01116     RCP<const cusparseHandle_t> sess = CUSPARSEdetails::Session::getHandle();
01117     const Scalar s_alpha = (Scalar)alpha,
01118                  s_beta  = Teuchos::ScalarTraits<Scalar>::zero();
01119 #ifdef HAVE_KOKKOS_DEBUG
01120     cudaError_t err = cudaGetLastError();
01121     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( cudaSuccess != err, std::runtime_error,
01122         ": cudaGetLastError() returned error after function call:\n"
01123         << cudaGetErrorString(err) );
01124     err = cudaThreadSynchronize();
01125     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( cudaSuccess != err, std::runtime_error,
01126         ": cudaThreadSynchronize() returned error after function call:\n"
01127         << cudaGetErrorString(err) );
01128 #endif
01129     cusparseStatus_t stat;
01130     stat = CUSPARSEdetails::CUSPARSETemplateAdaptors<Scalar>::CSRMM(
01131         *sess, op, numMatRows, numRHS, numMatCols, numNZ_, &s_alpha,
01132         *matdescr_, rowVals_.getRawPtr(), rowPtrs_.getRawPtr(), colInds_.getRawPtr(),
01133         data_x, stride_x, &s_beta, data_y, stride_y);
01134 #ifdef HAVE_KOKKOS_DEBUG
01135     err = cudaGetLastError();
01136     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( cudaSuccess != err, std::runtime_error,
01137         ": cudaGetLastError() returned error after function call:\n"
01138         << cudaGetErrorString(err) );
01139     err = cudaThreadSynchronize();
01140     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( cudaSuccess != err, std::runtime_error,
01141         ": cudaThreadSynchronize() returned error after function call:\n"
01142         << cudaGetErrorString(err) );
01143 #endif
01144     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
01145         stat != CUSPARSE_STATUS_SUCCESS,
01146         std::runtime_error, ": CSRMM returned error " << stat);
01147     return;
01148   }
01149 
01150 
01151   template <class Scalar, class Node>
01152   template <class DomainScalar, class RangeScalar>
01153   void CUSPARSEOps<Scalar,Node>::multiply(Teuchos::ETransp trans,
01154                                           RangeScalar alpha, const MultiVector<DomainScalar,Node> &X,
01155                                           RangeScalar beta, MultiVector<RangeScalar,Node> &Y) const
01156   {
01157     // CUSPARSE doesn't support mixed precision; partial specialize, then nix the generic versions
01158     Teuchos::CompileTimeAssert<Teuchos::TypeTraits::is_same<DomainScalar,Scalar>::value == false ||
01159                                Teuchos::TypeTraits::is_same< RangeScalar,Scalar>::value == false > cta; (void)cta;
01160     //
01161     std::string tfecfFuncName("multiply(trans,alpha,X,beta,Y)");
01162     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
01163         isInitialized_ == false,
01164         std::runtime_error, "sparse operators have not been initialized with graph and matrix data; call setGraphAndMatrix() first.")
01165     // get pointers,stride from X and Y
01166     int stride_x = (int)X.getStride(),
01167         stride_y = (int)Y.getStride();
01168     const Scalar * data_x = X.getValues().getRawPtr();
01169     Scalar * data_y = Y.getValuesNonConst().getRawPtr();
01170     const int numMatRows = Y.getNumRows();
01171     const int numMatCols = X.getNumRows();
01172     const int numRHS     = X.getNumCols();
01173     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
01174         X.getNumCols() != Y.getNumCols(),
01175         std::runtime_error, "X and Y do not have the same number of column vectors.")
01176     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
01177         Y.getNumRows() != (size_t)numRows_,
01178         std::runtime_error, "Y does not have the same number of rows as does the matrix.")
01179     // call mat-vec
01180     cusparseOperation_t op;
01181     RCP<const cusparseHandle_t> hndl = CUSPARSEdetails::Session::getHandle();
01182     if      (trans == Teuchos::NO_TRANS)   op = CUSPARSE_OPERATION_NON_TRANSPOSE;
01183     else if (trans == Teuchos::TRANS)      op = CUSPARSE_OPERATION_TRANSPOSE;
01184     else if (trans == Teuchos::CONJ_TRANS) op = CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE;
01185     const Scalar s_alpha = (Scalar)alpha,
01186                  s_beta  = (Scalar)beta;
01187     cusparseStatus_t stat = CUSPARSEdetails::CUSPARSETemplateAdaptors<Scalar>::CSRMM(
01188         *hndl, op, numMatRows, numRHS, numMatCols, numNZ_, &s_alpha,
01189         *matdescr_, rowVals_.getRawPtr(), rowPtrs_.getRawPtr(), colInds_.getRawPtr(),
01190         data_x, stride_x, &s_beta, data_y, stride_y);
01191 #ifdef HAVE_KOKKOS_DEBUG
01192     cudaError_t err = cudaGetLastError();
01193     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( cudaSuccess != err, std::runtime_error,
01194         ": cudaGetLastError() returned error after function call:\n"
01195         << cudaGetErrorString(err) );
01196     err = cudaThreadSynchronize();
01197     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( cudaSuccess != err, std::runtime_error,
01198         ": cudaThreadSynchronize() returned error after function call:\n"
01199         << cudaGetErrorString(err) );
01200 #endif
01201     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
01202         stat != CUSPARSE_STATUS_SUCCESS,
01203         std::runtime_error, ": CSRMM returned error " << stat);
01204     return;
01205   }
01206 
01207 
01208 
01209 } // namespace Kokkos
01210 
01211 #endif /* KOKKOS_CUSPARSEOPS_HPP */
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends