Kokkos Node API and Local Linear Algebra Kernels Version of the Day
Kokkos_DefaultBlockSparseOps.hpp
00001 //@HEADER
00002 // ************************************************************************
00003 // 
00004 //          Kokkos: Node API and Parallel Node Kernels
00005 //              Copyright (2008) Sandia Corporation
00006 // 
00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00008 // the U.S. Government retains certain rights in this software.
00009 // 
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00038 // 
00039 // ************************************************************************
00040 //@HEADER
00041 
00042 #ifndef KOKKOS_DEFAULTBLOCKSPARSEOPS_HPP
00043 #define KOKKOS_DEFAULTBLOCKSPARSEOPS_HPP
00044 
00045 #include <Teuchos_ArrayRCP.hpp>
00046 #include <Teuchos_DataAccess.hpp>
00047 #include <Teuchos_Assert.hpp>
00048 #include <Teuchos_TypeNameTraits.hpp>
00049 #include <Teuchos_BLAS_types.hpp>
00050 #include <stdexcept>
00051 
00052 #include "Kokkos_ConfigDefs.hpp"
00053 #include "Kokkos_VbrMatrix.hpp" 
00054 #include "Kokkos_MultiVector.hpp"
00055 #include "Kokkos_NodeHelpers.hpp"
00056 #include "Kokkos_DefaultArithmetic.hpp"
00057 #include "Kokkos_DefaultBlockSparseKernelOps.hpp"
00058 
00059 namespace Kokkos {
00060 
00061   // default implementation
00062   template <class Scalar, class Ordinal, class Node = DefaultNode::DefaultNodeType>
00066   class DefaultBlockSparseOps {
00067   public:
00068     typedef Scalar  ScalarType;
00069     typedef Ordinal OrdinalType;
00070     typedef Node    NodeType;
00071 
00073 
00075 
00077     DefaultBlockSparseOps(const RCP<Node> &node = DefaultNode::getDefaultNode());
00078 
00080     ~DefaultBlockSparseOps();
00081 
00083 
00085 
00086     
00088     RCP<Node> getNode() const;
00089 
00091 
00093 
00095 
00097     void initializeValues(const VbrMatrix<Scalar,Ordinal,Node> &matrix);
00098 
00100     void clear();
00101 
00103 
00105 
00107 
00109     template <class DomainScalar, class RangeScalar>
00110     void multiply(Teuchos::ETransp trans, RangeScalar alpha, const MultiVector<DomainScalar,Node> &X, MultiVector<RangeScalar,Node> &Y) const;
00111 
00113     template <class DomainScalar, class RangeScalar>
00114     void multiply(Teuchos::ETransp trans, 
00115                   RangeScalar alpha, const MultiVector<DomainScalar,Node> &X, RangeScalar beta, MultiVector<RangeScalar,Node> &Y) const;
00116 
00118     template <class DomainScalar, class RangeScalar>
00119     void solve(Teuchos::ETransp trans, Teuchos::EUplo triang, Teuchos::EDiag diag,
00120             const MultiVector<DomainScalar,Node>& Y, MultiVector<RangeScalar,Node>& X) const;
00122 
00123   protected:
00125     DefaultBlockSparseOps(const DefaultBlockSparseOps& source);
00126 
00127     RCP<Node> node_;
00128 
00129     ArrayRCP<const Ordinal> pbuf_rptr_;
00130     ArrayRCP<const Ordinal> pbuf_cptr_;
00131     ArrayRCP<const size_t> pbuf_bptr_;
00132     ArrayRCP<const Ordinal> pbuf_bindx_;
00133     ArrayRCP<const Ordinal> pbuf_indx_;
00134     ArrayRCP<const Scalar>  pbuf_vals1D_;
00135 
00136     size_t numBlockRows_;
00137     bool valsInit_, isPacked_, isEmpty_;
00138   };
00139 
00140   template<class Scalar, class Ordinal, class Node>
00141   DefaultBlockSparseOps<Scalar,Ordinal,Node>::DefaultBlockSparseOps(const RCP<Node> &node)
00142   : node_(node)
00143   , valsInit_(false)
00144   , isPacked_(false)
00145   , isEmpty_(false) {
00146   }
00147 
00148   template<class Scalar, class Ordinal, class Node>
00149   DefaultBlockSparseOps<Scalar,Ordinal,Node>::~DefaultBlockSparseOps() {
00150   }
00151 
00152   template <class Scalar, class Ordinal, class Node>
00153   void DefaultBlockSparseOps<Scalar,Ordinal,Node>::initializeValues(const VbrMatrix<Scalar,Ordinal,Node> &matrix) {
00154     isEmpty_ = false;
00155     pbuf_vals1D_ = matrix.get_values();
00156     pbuf_rptr_ = matrix.get_rptr();
00157     pbuf_cptr_ = matrix.get_cptr();
00158     pbuf_bptr_ = matrix.get_bptr();
00159     pbuf_bindx_ = matrix.get_bindx();
00160     pbuf_indx_ = matrix.get_indx();
00161     numBlockRows_ = matrix.getNumBlockRows();
00162     valsInit_ = true;
00163     isPacked_ = true;
00164   }
00165 
00166 
00167   template <class Scalar, class Ordinal, class Node>
00168   RCP<Node> DefaultBlockSparseOps<Scalar,Ordinal,Node>::getNode() const { 
00169     return node_; 
00170   }
00171 
00172 
00173   template <class Scalar, class Ordinal, class Node>
00174   void DefaultBlockSparseOps<Scalar,Ordinal,Node>::clear() {
00175     pbuf_vals1D_  = null;
00176     pbuf_rptr_    = null;
00177     pbuf_cptr_    = null;
00178     pbuf_bptr_    = null;
00179     pbuf_bindx_   = null;
00180     pbuf_indx_    = null;
00181     valsInit_ = false;
00182     isPacked_ = false;
00183     isEmpty_  = false;
00184   }
00185 
00186   template <class Scalar, class Ordinal, class Node>
00187   template <class DomainScalar, class RangeScalar>
00188   void DefaultBlockSparseOps<Scalar,Ordinal,Node>::multiply(
00189                                 Teuchos::ETransp trans, 
00190                                 RangeScalar alpha,
00191                                 const MultiVector<DomainScalar,Node> &X, 
00192                                 MultiVector<RangeScalar,Node> &Y) const {
00193     typedef DefaultBlockSparseMultiplyOp1<Scalar,Ordinal,DomainScalar,RangeScalar>  Op1;
00194     typedef DefaultBlockSparseMultiplyOp1Transpose<Scalar,Ordinal,DomainScalar,RangeScalar>  Op1T;
00195     TEUCHOS_TEST_FOR_EXCEPTION(valsInit_ == false, std::runtime_error,
00196         Teuchos::typeName(*this) << "::multiply(): operation not fully initialized.");
00197     TEUCHOS_TEST_FOR_EXCEPT(X.getNumCols() != Y.getNumCols());
00198     ReadyBufferHelper<Node> rbh(node_);
00199     if (isEmpty_ == true) {
00200       // Y <= 0 * X
00201       //   <= 0
00202       DefaultArithmetic<MultiVector<RangeScalar,Node> >::Init(Y,Teuchos::ScalarTraits<RangeScalar>::zero());
00203     }
00204     else if (isPacked_ == true) {
00205       if (trans == Teuchos::NO_TRANS) {
00206         Op1 wdp;
00207         rbh.begin();
00208         wdp.alpha   = alpha;
00209         wdp.beta    = Teuchos::ScalarTraits<RangeScalar>::zero(); // not used
00210         wdp.numBlockRows = numBlockRows_;
00211         wdp.vals = rbh.template addConstBuffer<Scalar>(pbuf_vals1D_);
00212         wdp.rptr = rbh.template addConstBuffer<Ordinal>(pbuf_rptr_);
00213         wdp.cptr = rbh.template addConstBuffer<Ordinal>(pbuf_cptr_);
00214         wdp.bptr = rbh.template addConstBuffer<size_t>(pbuf_bptr_);
00215         wdp.bindx= rbh.template addConstBuffer<Ordinal>(pbuf_bindx_);
00216         wdp.indx = rbh.template addConstBuffer<Ordinal>(pbuf_indx_);
00217         wdp.y    = rbh.template addNonConstBuffer<RangeScalar>(Y.getValuesNonConst());
00218         wdp.x    = rbh.template addConstBuffer<DomainScalar>(X.getValues());
00219         wdp.xstride = X.getStride();
00220         wdp.ystride = Y.getStride();
00221         rbh.end();
00222         const size_t numRHS = X.getNumCols();
00223         node_->template parallel_for<Op1>(0,numBlockRows_*numRHS,wdp);
00224       }
00225       else {
00226         //start by initializing Y = beta*Y
00227         DefaultArithmetic<MultiVector<RangeScalar,Node> >::Init(Y,Teuchos::ScalarTraits<RangeScalar>::zero());
00228         Op1T wdp;
00229         rbh.begin();
00230         wdp.alpha   = alpha;
00231         wdp.numBlockRows = numBlockRows_;
00232         wdp.vals = rbh.template addConstBuffer<Scalar>(pbuf_vals1D_);
00233         wdp.rptr = rbh.template addConstBuffer<Ordinal>(pbuf_rptr_);
00234         wdp.cptr = rbh.template addConstBuffer<Ordinal>(pbuf_cptr_);
00235         wdp.bptr = rbh.template addConstBuffer<size_t>(pbuf_bptr_);
00236         wdp.bindx= rbh.template addConstBuffer<Ordinal>(pbuf_bindx_);
00237         wdp.indx = rbh.template addConstBuffer<Ordinal>(pbuf_indx_);
00238         wdp.y    = rbh.template addNonConstBuffer<RangeScalar>(Y.getValuesNonConst());
00239         wdp.x    = rbh.template addConstBuffer<DomainScalar>(X.getValues());
00240         wdp.xstride = X.getStride();
00241         wdp.ystride = Y.getStride();
00242         rbh.end();
00243         const size_t numRHS = X.getNumCols();
00244         node_->template parallel_for<Op1T>(0,numRHS,wdp);
00245       }
00246     }
00247     else {
00248       throw std::runtime_error("DefaultBlockSparseOps ERROR, not implemented for non-packed '2D' storage.");
00249     }
00250   }
00251 
00252 
00253   template <class Scalar, class Ordinal, class Node>
00254   template <class DomainScalar, class RangeScalar>
00255   void DefaultBlockSparseOps<Scalar,Ordinal,Node>::multiply(
00256                                 Teuchos::ETransp trans, 
00257                                 RangeScalar alpha, const MultiVector<DomainScalar,Node> &X, 
00258                                 RangeScalar beta, MultiVector<RangeScalar,Node> &Y) const {
00259     typedef DefaultBlockSparseMultiplyOp1<Scalar,Ordinal,DomainScalar,RangeScalar>  Op1;
00260     typedef DefaultBlockSparseMultiplyOp1Transpose<Scalar,Ordinal,DomainScalar,RangeScalar>  Op1T;
00261     TEUCHOS_TEST_FOR_EXCEPTION(valsInit_ == false, std::runtime_error,
00262         Teuchos::typeName(*this) << "::multiply(): operation not fully initialized.");
00263     TEUCHOS_TEST_FOR_EXCEPT(X.getNumCols() != Y.getNumCols());
00264     ReadyBufferHelper<Node> rbh(node_);
00265     if (isEmpty_ == true) {
00266       // Y <= 0 * X
00267       //   <= 0
00268       DefaultArithmetic<MultiVector<RangeScalar,Node> >::Scale(Y,beta);
00269     }
00270     else if (isPacked_ == true) {
00271       if (trans == Teuchos::NO_TRANS) {
00272         Op1 wdp;
00273         rbh.begin();
00274         wdp.alpha   = alpha;
00275         wdp.beta    = beta;
00276         wdp.numBlockRows = numBlockRows_;
00277         wdp.vals = rbh.template addConstBuffer<Scalar>(pbuf_vals1D_);
00278         wdp.rptr = rbh.template addConstBuffer<Ordinal>(pbuf_rptr_);
00279         wdp.cptr = rbh.template addConstBuffer<Ordinal>(pbuf_cptr_);
00280         wdp.bptr = rbh.template addConstBuffer<size_t>(pbuf_bptr_);
00281         wdp.bindx= rbh.template addConstBuffer<Ordinal>(pbuf_bindx_);
00282         wdp.indx = rbh.template addConstBuffer<Ordinal>(pbuf_indx_);
00283         wdp.y    = rbh.template addNonConstBuffer<RangeScalar>(Y.getValuesNonConst());
00284         wdp.x    = rbh.template addConstBuffer<DomainScalar>(X.getValues());
00285         wdp.xstride = X.getStride();
00286         wdp.ystride = Y.getStride();
00287         rbh.end();
00288         const size_t numRHS = X.getNumCols();
00289         node_->template parallel_for<Op1>(0,numBlockRows_*numRHS,wdp);
00290       }
00291       else {
00292         //start by initializing Y = beta*Y
00293         if (beta == Teuchos::ScalarTraits<RangeScalar>::zero()) {
00294           DefaultArithmetic<MultiVector<RangeScalar,Node> >::Init(Y,Teuchos::ScalarTraits<RangeScalar>::zero());
00295         }
00296         else {
00297           DefaultArithmetic<MultiVector<RangeScalar,Node> >::Scale(Y,beta,Y);
00298         }
00299         Op1T wdp;
00300         rbh.begin();
00301         wdp.alpha   = alpha;
00302         wdp.numBlockRows = numBlockRows_;
00303         wdp.vals = rbh.template addConstBuffer<Scalar>(pbuf_vals1D_);
00304         wdp.rptr = rbh.template addConstBuffer<Ordinal>(pbuf_rptr_);
00305         wdp.cptr = rbh.template addConstBuffer<Ordinal>(pbuf_cptr_);
00306         wdp.bptr = rbh.template addConstBuffer<size_t>(pbuf_bptr_);
00307         wdp.bindx= rbh.template addConstBuffer<Ordinal>(pbuf_bindx_);
00308         wdp.indx = rbh.template addConstBuffer<Ordinal>(pbuf_indx_);
00309         wdp.y    = rbh.template addNonConstBuffer<RangeScalar>(Y.getValuesNonConst());
00310         wdp.x    = rbh.template addConstBuffer<DomainScalar>(X.getValues());
00311         wdp.xstride = X.getStride();
00312         wdp.ystride = Y.getStride();
00313         rbh.end();
00314         const size_t numRHS = X.getNumCols();
00315         node_->template parallel_for<Op1T>(0,numRHS,wdp);
00316       }
00317     }
00318     else {
00319       throw std::runtime_error("DefaultBlockSparseOps::Multiply ERROR, not implemented for non-packed '2D' storage.");
00320     }
00321   }
00322 
00323   template <class Scalar, class Ordinal, class Node>
00324   template <class DomainScalar, class RangeScalar>
00325   void DefaultBlockSparseOps<Scalar,Ordinal,Node>::solve(
00326                                 Teuchos::ETransp trans, 
00327                                 Teuchos::EUplo triang, 
00328                                 Teuchos::EDiag diag, 
00329                                 const MultiVector<DomainScalar,Node> &Y, 
00330                                 MultiVector<RangeScalar,Node> &X) const {
00331     typedef DefaultBlockSparseSolveOp1<Scalar,Ordinal,DomainScalar,RangeScalar>  Op;
00332     typedef DefaultBlockSparseTransposeSolveOp1<Scalar,Ordinal,DomainScalar,RangeScalar>  OpT;
00333     TEUCHOS_TEST_FOR_EXCEPTION(valsInit_ == false, std::runtime_error,
00334         Teuchos::typeName(*this) << "::solve(): operation not fully initialized.");
00335     TEUCHOS_TEST_FOR_EXCEPT(X.getNumCols() != Y.getNumCols());
00336     ReadyBufferHelper<Node> rbh(node_);
00337     if (isEmpty_ == true) {
00338       // X <= Y
00339       DefaultArithmetic<MultiVector<RangeScalar,Node> >::Assign(X,Y);
00340     }
00341     else if (isPacked_ == true) {
00342       if (trans == Teuchos::NO_TRANS) {
00343         Op wdp;
00344         rbh.begin();
00345         wdp.upper   = (triang == Teuchos::UPPER_TRI);
00346         wdp.unitDiag = (diag == Teuchos::UNIT_DIAG);
00347         wdp.numBlockRows = numBlockRows_;
00348         wdp.vals = rbh.template addConstBuffer<Scalar>(pbuf_vals1D_);
00349         wdp.rptr = rbh.template addConstBuffer<Ordinal>(pbuf_rptr_);
00350         wdp.cptr = rbh.template addConstBuffer<Ordinal>(pbuf_cptr_);
00351         wdp.bptr = rbh.template addConstBuffer<size_t>(pbuf_bptr_);
00352         wdp.bindx= rbh.template addConstBuffer<Ordinal>(pbuf_bindx_);
00353         wdp.indx = rbh.template addConstBuffer<Ordinal>(pbuf_indx_);
00354         wdp.x    = rbh.template addNonConstBuffer<DomainScalar>(X.getValuesNonConst());
00355         wdp.y    = rbh.template addConstBuffer<RangeScalar>(Y.getValues());
00356         wdp.xstride = X.getStride();
00357         wdp.ystride = Y.getStride();
00358         rbh.end();
00359         const size_t numRHS = X.getNumCols();
00360         node_->template parallel_for<Op>(0,numRHS,wdp);
00361       }
00362       else {
00363         OpT wdp;
00364         rbh.begin();
00365         wdp.upper   = (triang == Teuchos::UPPER_TRI);
00366         wdp.unitDiag = (diag == Teuchos::UNIT_DIAG);
00367         wdp.numBlockRows = numBlockRows_;
00368         wdp.vals = rbh.template addConstBuffer<Scalar>(pbuf_vals1D_);
00369         wdp.rptr = rbh.template addConstBuffer<Ordinal>(pbuf_rptr_);
00370         wdp.cptr = rbh.template addConstBuffer<Ordinal>(pbuf_cptr_);
00371         wdp.bptr = rbh.template addConstBuffer<size_t>(pbuf_bptr_);
00372         wdp.bindx= rbh.template addConstBuffer<Ordinal>(pbuf_bindx_);
00373         wdp.indx = rbh.template addConstBuffer<Ordinal>(pbuf_indx_);
00374         wdp.x    = rbh.template addNonConstBuffer<DomainScalar>(X.getValuesNonConst());
00375         wdp.y    = rbh.template addConstBuffer<RangeScalar>(Y.getValues());
00376         wdp.xstride = X.getStride();
00377         wdp.ystride = Y.getStride();
00378         rbh.end();
00379         const size_t numRHS = X.getNumCols();
00380         node_->template parallel_for<OpT>(0,numRHS,wdp);
00381       }
00382     }
00383     else {
00384       throw std::runtime_error("DefaultBlockSparseOps::solve ERROR, not implemented for non-packed '2D' storage.");
00385     }
00386   }
00387 
00388 } // namespace Kokkos
00389 
00390 #endif /* KOKKOS_DEFAULTSPARSEOPS_HPP */
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends