Kokkos Node API and Local Linear Algebra Kernels Version of the Day
Kokkos_DefaultBlockSparseOps.hpp
00001 //@HEADER
00002 // ************************************************************************
00003 // 
00004 //          Kokkos: Node API and Parallel Node Kernels
00005 //              Copyright (2009) Sandia Corporation
00006 // 
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 // 
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //  
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //  
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00025 // 
00026 // ************************************************************************
00027 //@HEADER
00028 
00029 #ifndef KOKKOS_DEFAULTBLOCKSPARSEOPS_HPP
00030 #define KOKKOS_DEFAULTBLOCKSPARSEOPS_HPP
00031 
00032 #include <Teuchos_ArrayRCP.hpp>
00033 #include <Teuchos_DataAccess.hpp>
00034 #include <Teuchos_TestForException.hpp>
00035 #include <Teuchos_TypeNameTraits.hpp>
00036 #include <Teuchos_BLAS_types.hpp>
00037 #include <stdexcept>
00038 
00039 #include "Kokkos_ConfigDefs.hpp"
00040 #include "Kokkos_VbrMatrix.hpp" 
00041 #include "Kokkos_MultiVector.hpp"
00042 #include "Kokkos_NodeHelpers.hpp"
00043 #include "Kokkos_DefaultArithmetic.hpp"
00044 #include "Kokkos_DefaultBlockSparseKernelOps.hpp"
00045 
00046 namespace Kokkos {
00047 
00048   // default implementation
00049   template <class Scalar, class Ordinal, class Node = DefaultNode::DefaultNodeType>
00053   class DefaultBlockSparseOps {
00054   public:
00055     typedef Scalar  ScalarType;
00056     typedef Ordinal OrdinalType;
00057     typedef Node    NodeType;
00058 
00060 
00062 
00064     DefaultBlockSparseOps(const RCP<Node> &node = DefaultNode::getDefaultNode());
00065 
00067     ~DefaultBlockSparseOps();
00068 
00070 
00072 
00073     
00075     RCP<Node> getNode() const;
00076 
00078 
00080 
00082 
00084     void initializeValues(const VbrMatrix<Scalar,Ordinal,Node> &matrix);
00085 
00087     void clear();
00088 
00090 
00092 
00094 
00096     template <class DomainScalar, class RangeScalar>
00097     void multiply(Teuchos::ETransp trans, RangeScalar alpha, const MultiVector<DomainScalar,Node> &X, MultiVector<RangeScalar,Node> &Y) const;
00098 
00100     template <class DomainScalar, class RangeScalar>
00101     void multiply(Teuchos::ETransp trans, 
00102                   RangeScalar alpha, const MultiVector<DomainScalar,Node> &X, RangeScalar beta, MultiVector<RangeScalar,Node> &Y) const;
00103 
00105     template <class DomainScalar, class RangeScalar>
00106     void solve(Teuchos::ETransp trans, Teuchos::EUplo triang, Teuchos::EDiag diag,
00107             const MultiVector<DomainScalar,Node>& Y, MultiVector<RangeScalar,Node>& X) const;
00109 
00110   protected:
00112     DefaultBlockSparseOps(const DefaultBlockSparseOps& source);
00113 
00114     RCP<Node> node_;
00115 
00116     ArrayRCP<const Ordinal> pbuf_rptr_;
00117     ArrayRCP<const Ordinal> pbuf_cptr_;
00118     ArrayRCP<const size_t> pbuf_bptr_;
00119     ArrayRCP<const Ordinal> pbuf_bindx_;
00120     ArrayRCP<const Ordinal> pbuf_indx_;
00121     ArrayRCP<const Scalar>  pbuf_vals1D_;
00122 
00123     size_t numBlockRows_;
00124     bool valsInit_, isPacked_, isEmpty_;
00125   };
00126 
00127   template<class Scalar, class Ordinal, class Node>
00128   DefaultBlockSparseOps<Scalar,Ordinal,Node>::DefaultBlockSparseOps(const RCP<Node> &node)
00129   : node_(node)
00130   , valsInit_(false)
00131   , isPacked_(false)
00132   , isEmpty_(false) {
00133   }
00134 
00135   template<class Scalar, class Ordinal, class Node>
00136   DefaultBlockSparseOps<Scalar,Ordinal,Node>::~DefaultBlockSparseOps() {
00137   }
00138 
00139   template <class Scalar, class Ordinal, class Node>
00140   void DefaultBlockSparseOps<Scalar,Ordinal,Node>::initializeValues(const VbrMatrix<Scalar,Ordinal,Node> &matrix) {
00141     isEmpty_ = false;
00142     pbuf_vals1D_ = matrix.get_values();
00143     pbuf_rptr_ = matrix.get_rptr();
00144     pbuf_cptr_ = matrix.get_cptr();
00145     pbuf_bptr_ = matrix.get_bptr();
00146     pbuf_bindx_ = matrix.get_bindx();
00147     pbuf_indx_ = matrix.get_indx();
00148     numBlockRows_ = matrix.getNumBlockRows();
00149     valsInit_ = true;
00150     isPacked_ = true;
00151   }
00152 
00153 
00154   template <class Scalar, class Ordinal, class Node>
00155   RCP<Node> DefaultBlockSparseOps<Scalar,Ordinal,Node>::getNode() const { 
00156     return node_; 
00157   }
00158 
00159 
00160   template <class Scalar, class Ordinal, class Node>
00161   void DefaultBlockSparseOps<Scalar,Ordinal,Node>::clear() {
00162     pbuf_vals1D_  = null;
00163     pbuf_rptr_    = null;
00164     pbuf_cptr_    = null;
00165     pbuf_bptr_    = null;
00166     pbuf_bindx_   = null;
00167     pbuf_indx_    = null;
00168     valsInit_ = false;
00169     isPacked_ = false;
00170     isEmpty_  = false;
00171   }
00172 
00173   template <class Scalar, class Ordinal, class Node>
00174   template <class DomainScalar, class RangeScalar>
00175   void DefaultBlockSparseOps<Scalar,Ordinal,Node>::multiply(
00176                                 Teuchos::ETransp trans, 
00177                                 RangeScalar alpha,
00178                                 const MultiVector<DomainScalar,Node> &X, 
00179                                 MultiVector<RangeScalar,Node> &Y) const {
00180     typedef DefaultBlockSparseMultiplyOp1<Scalar,Ordinal,DomainScalar,RangeScalar>  Op1;
00181     typedef DefaultBlockSparseMultiplyOp1Transpose<Scalar,Ordinal,DomainScalar,RangeScalar>  Op1T;
00182     TEST_FOR_EXCEPTION(valsInit_ == false, std::runtime_error,
00183         Teuchos::typeName(*this) << "::multiply(): operation not fully initialized.");
00184     TEST_FOR_EXCEPT(X.getNumCols() != Y.getNumCols());
00185     ReadyBufferHelper<Node> rbh(node_);
00186     if (isEmpty_ == true) {
00187       // Y <= 0 * X
00188       //   <= 0
00189       DefaultArithmetic<MultiVector<RangeScalar,Node> >::Init(Y,Teuchos::ScalarTraits<RangeScalar>::zero());
00190     }
00191     else if (isPacked_ == true) {
00192       if (trans == Teuchos::NO_TRANS) {
00193         Op1 wdp;
00194         rbh.begin();
00195         wdp.alpha   = alpha;
00196         wdp.beta    = Teuchos::ScalarTraits<RangeScalar>::zero(); // not used
00197         wdp.numBlockRows = numBlockRows_;
00198         wdp.vals = rbh.template addConstBuffer<Scalar>(pbuf_vals1D_);
00199         wdp.rptr = rbh.template addConstBuffer<Ordinal>(pbuf_rptr_);
00200         wdp.cptr = rbh.template addConstBuffer<Ordinal>(pbuf_cptr_);
00201         wdp.bptr = rbh.template addConstBuffer<size_t>(pbuf_bptr_);
00202         wdp.bindx= rbh.template addConstBuffer<Ordinal>(pbuf_bindx_);
00203         wdp.indx = rbh.template addConstBuffer<Ordinal>(pbuf_indx_);
00204         wdp.y    = rbh.template addNonConstBuffer<RangeScalar>(Y.getValuesNonConst());
00205         wdp.x    = rbh.template addConstBuffer<DomainScalar>(X.getValues());
00206         wdp.xstride = X.getStride();
00207         wdp.ystride = Y.getStride();
00208         rbh.end();
00209         const size_t numRHS = X.getNumCols();
00210         node_->template parallel_for<Op1>(0,numBlockRows_*numRHS,wdp);
00211       }
00212       else {
00213         //start by initializing Y = beta*Y
00214         DefaultArithmetic<MultiVector<RangeScalar,Node> >::Init(Y,Teuchos::ScalarTraits<RangeScalar>::zero());
00215         Op1T wdp;
00216         rbh.begin();
00217         wdp.alpha   = alpha;
00218         wdp.numBlockRows = numBlockRows_;
00219         wdp.vals = rbh.template addConstBuffer<Scalar>(pbuf_vals1D_);
00220         wdp.rptr = rbh.template addConstBuffer<Ordinal>(pbuf_rptr_);
00221         wdp.cptr = rbh.template addConstBuffer<Ordinal>(pbuf_cptr_);
00222         wdp.bptr = rbh.template addConstBuffer<size_t>(pbuf_bptr_);
00223         wdp.bindx= rbh.template addConstBuffer<Ordinal>(pbuf_bindx_);
00224         wdp.indx = rbh.template addConstBuffer<Ordinal>(pbuf_indx_);
00225         wdp.y    = rbh.template addNonConstBuffer<RangeScalar>(Y.getValuesNonConst());
00226         wdp.x    = rbh.template addConstBuffer<DomainScalar>(X.getValues());
00227         wdp.xstride = X.getStride();
00228         wdp.ystride = Y.getStride();
00229         rbh.end();
00230         const size_t numRHS = X.getNumCols();
00231         node_->template parallel_for<Op1T>(0,numRHS,wdp);
00232       }
00233     }
00234     else {
00235       throw std::runtime_error("DefaultBlockSparseOps ERROR, not implemented for non-packed '2D' storage.");
00236     }
00237   }
00238 
00239 
00240   template <class Scalar, class Ordinal, class Node>
00241   template <class DomainScalar, class RangeScalar>
00242   void DefaultBlockSparseOps<Scalar,Ordinal,Node>::multiply(
00243                                 Teuchos::ETransp trans, 
00244                                 RangeScalar alpha, const MultiVector<DomainScalar,Node> &X, 
00245                                 RangeScalar beta, MultiVector<RangeScalar,Node> &Y) const {
00246     typedef DefaultBlockSparseMultiplyOp1<Scalar,Ordinal,DomainScalar,RangeScalar>  Op1;
00247     typedef DefaultBlockSparseMultiplyOp1Transpose<Scalar,Ordinal,DomainScalar,RangeScalar>  Op1T;
00248     TEST_FOR_EXCEPTION(valsInit_ == false, std::runtime_error,
00249         Teuchos::typeName(*this) << "::multiply(): operation not fully initialized.");
00250     TEST_FOR_EXCEPT(X.getNumCols() != Y.getNumCols());
00251     ReadyBufferHelper<Node> rbh(node_);
00252     if (isEmpty_ == true) {
00253       // Y <= 0 * X
00254       //   <= 0
00255       DefaultArithmetic<MultiVector<RangeScalar,Node> >::Scale(Y,beta);
00256     }
00257     else if (isPacked_ == true) {
00258       if (trans == Teuchos::NO_TRANS) {
00259         Op1 wdp;
00260         rbh.begin();
00261         wdp.alpha   = alpha;
00262         wdp.beta    = beta;
00263         wdp.numBlockRows = numBlockRows_;
00264         wdp.vals = rbh.template addConstBuffer<Scalar>(pbuf_vals1D_);
00265         wdp.rptr = rbh.template addConstBuffer<Ordinal>(pbuf_rptr_);
00266         wdp.cptr = rbh.template addConstBuffer<Ordinal>(pbuf_cptr_);
00267         wdp.bptr = rbh.template addConstBuffer<size_t>(pbuf_bptr_);
00268         wdp.bindx= rbh.template addConstBuffer<Ordinal>(pbuf_bindx_);
00269         wdp.indx = rbh.template addConstBuffer<Ordinal>(pbuf_indx_);
00270         wdp.y    = rbh.template addNonConstBuffer<RangeScalar>(Y.getValuesNonConst());
00271         wdp.x    = rbh.template addConstBuffer<DomainScalar>(X.getValues());
00272         wdp.xstride = X.getStride();
00273         wdp.ystride = Y.getStride();
00274         rbh.end();
00275         const size_t numRHS = X.getNumCols();
00276         node_->template parallel_for<Op1>(0,numBlockRows_*numRHS,wdp);
00277       }
00278       else {
00279         //start by initializing Y = beta*Y
00280         if (beta == Teuchos::ScalarTraits<RangeScalar>::zero()) {
00281           DefaultArithmetic<MultiVector<RangeScalar,Node> >::Init(Y,Teuchos::ScalarTraits<RangeScalar>::zero());
00282         }
00283         else {
00284           DefaultArithmetic<MultiVector<RangeScalar,Node> >::Scale(Y,beta,Y);
00285         }
00286         Op1T wdp;
00287         rbh.begin();
00288         wdp.alpha   = alpha;
00289         wdp.numBlockRows = numBlockRows_;
00290         wdp.vals = rbh.template addConstBuffer<Scalar>(pbuf_vals1D_);
00291         wdp.rptr = rbh.template addConstBuffer<Ordinal>(pbuf_rptr_);
00292         wdp.cptr = rbh.template addConstBuffer<Ordinal>(pbuf_cptr_);
00293         wdp.bptr = rbh.template addConstBuffer<size_t>(pbuf_bptr_);
00294         wdp.bindx= rbh.template addConstBuffer<Ordinal>(pbuf_bindx_);
00295         wdp.indx = rbh.template addConstBuffer<Ordinal>(pbuf_indx_);
00296         wdp.y    = rbh.template addNonConstBuffer<RangeScalar>(Y.getValuesNonConst());
00297         wdp.x    = rbh.template addConstBuffer<DomainScalar>(X.getValues());
00298         wdp.xstride = X.getStride();
00299         wdp.ystride = Y.getStride();
00300         rbh.end();
00301         const size_t numRHS = X.getNumCols();
00302         node_->template parallel_for<Op1T>(0,numRHS,wdp);
00303       }
00304     }
00305     else {
00306       throw std::runtime_error("DefaultBlockSparseOps::Multiply ERROR, not implemented for non-packed '2D' storage.");
00307     }
00308   }
00309 
00310   template <class Scalar, class Ordinal, class Node>
00311   template <class DomainScalar, class RangeScalar>
00312   void DefaultBlockSparseOps<Scalar,Ordinal,Node>::solve(
00313                                 Teuchos::ETransp trans, 
00314                                 Teuchos::EUplo triang, 
00315                                 Teuchos::EDiag diag, 
00316                                 const MultiVector<DomainScalar,Node> &Y, 
00317                                 MultiVector<RangeScalar,Node> &X) const {
00318     typedef DefaultBlockSparseSolveOp1<Scalar,Ordinal,DomainScalar,RangeScalar>  Op;
00319     typedef DefaultBlockSparseTransposeSolveOp1<Scalar,Ordinal,DomainScalar,RangeScalar>  OpT;
00320     TEST_FOR_EXCEPTION(valsInit_ == false, std::runtime_error,
00321         Teuchos::typeName(*this) << "::solve(): operation not fully initialized.");
00322     TEST_FOR_EXCEPT(X.getNumCols() != Y.getNumCols());
00323     ReadyBufferHelper<Node> rbh(node_);
00324     if (isEmpty_ == true) {
00325       // X <= Y
00326       DefaultArithmetic<MultiVector<RangeScalar,Node> >::Assign(X,Y);
00327     }
00328     else if (isPacked_ == true) {
00329       if (trans == Teuchos::NO_TRANS) {
00330         Op wdp;
00331         rbh.begin();
00332         wdp.upper   = (triang == Teuchos::UPPER_TRI);
00333         wdp.unitDiag = (diag == Teuchos::UNIT_DIAG);
00334         wdp.numBlockRows = numBlockRows_;
00335         wdp.vals = rbh.template addConstBuffer<Scalar>(pbuf_vals1D_);
00336         wdp.rptr = rbh.template addConstBuffer<Ordinal>(pbuf_rptr_);
00337         wdp.cptr = rbh.template addConstBuffer<Ordinal>(pbuf_cptr_);
00338         wdp.bptr = rbh.template addConstBuffer<size_t>(pbuf_bptr_);
00339         wdp.bindx= rbh.template addConstBuffer<Ordinal>(pbuf_bindx_);
00340         wdp.indx = rbh.template addConstBuffer<Ordinal>(pbuf_indx_);
00341         wdp.x    = rbh.template addNonConstBuffer<DomainScalar>(X.getValuesNonConst());
00342         wdp.y    = rbh.template addConstBuffer<RangeScalar>(Y.getValues());
00343         wdp.xstride = X.getStride();
00344         wdp.ystride = Y.getStride();
00345         rbh.end();
00346         const size_t numRHS = X.getNumCols();
00347         node_->template parallel_for<Op>(0,numRHS,wdp);
00348       }
00349       else {
00350         OpT wdp;
00351         rbh.begin();
00352         wdp.upper   = (triang == Teuchos::UPPER_TRI);
00353         wdp.unitDiag = (diag == Teuchos::UNIT_DIAG);
00354         wdp.numBlockRows = numBlockRows_;
00355         wdp.vals = rbh.template addConstBuffer<Scalar>(pbuf_vals1D_);
00356         wdp.rptr = rbh.template addConstBuffer<Ordinal>(pbuf_rptr_);
00357         wdp.cptr = rbh.template addConstBuffer<Ordinal>(pbuf_cptr_);
00358         wdp.bptr = rbh.template addConstBuffer<size_t>(pbuf_bptr_);
00359         wdp.bindx= rbh.template addConstBuffer<Ordinal>(pbuf_bindx_);
00360         wdp.indx = rbh.template addConstBuffer<Ordinal>(pbuf_indx_);
00361         wdp.x    = rbh.template addNonConstBuffer<DomainScalar>(X.getValuesNonConst());
00362         wdp.y    = rbh.template addConstBuffer<RangeScalar>(Y.getValues());
00363         wdp.xstride = X.getStride();
00364         wdp.ystride = Y.getStride();
00365         rbh.end();
00366         const size_t numRHS = X.getNumCols();
00367         node_->template parallel_for<OpT>(0,numRHS,wdp);
00368       }
00369     }
00370     else {
00371       throw std::runtime_error("DefaultBlockSparseOps::solve ERROR, not implemented for non-packed '2D' storage.");
00372     }
00373   }
00374 
00375 } // namespace Kokkos
00376 
00377 #endif /* KOKKOS_DEFAULTSPARSEOPS_HPP */
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends