Kokkos Node API and Local Linear Algebra Kernels Version of the Day
Kokkos_DefaultSparseMultiplyKernelOps.hpp
00001 //@HEADER
00002 // ************************************************************************
00003 // 
00004 //          Kokkos: Node API and Parallel Node Kernels
00005 //              Copyright (2009) Sandia Corporation
00006 // 
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 // 
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //  
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //  
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00025 // 
00026 // ************************************************************************
00027 //@HEADER
00028 
00029 #ifndef KOKKOS_DEFAULTSPARSEMULTIPLY_KERNELOPS_HPP
00030 #define KOKKOS_DEFAULTSPARSEMULTIPLY_KERNELOPS_HPP
00031 
00032 #ifndef KERNEL_PREFIX
00033 #define KERNEL_PREFIX
00034 #endif
00035 
00036 #ifdef __CUDACC__
00037 #include <Teuchos_ScalarTraitsCUDA.hpp>
00038 #else
00039 #include <Teuchos_ScalarTraits.hpp>
00040 #endif
00041 
00042 // TODO: begs/ends instead of offsets/rowsizes seems clever, as it allows the same code to be
00043 // use for packed and non-packed storage, by setting ends=begs+1. 
00044 // however, in the packed scenario, it requires an additional (redundant) N reads
00045 // this may become significant as NNZ -> N
00046 // this should be revisited; we may have to add DefaultSparseMultiplyPacked and DefaultSparseTransposeMultiplyPacked kernels
00047 
00048 namespace Kokkos {
00049 
00050   template <class Scalar, class Ordinal, class DomainScalar, class RangeScalar, int NO_BETA_AND_OVERWRITE>
00051   struct DefaultSparseMultiplyOp1 {
00052     // mat data
00053     const size_t  *begs;
00054     const size_t  *ends;
00055     const Ordinal *inds;
00056     const Scalar  *vals;
00057     // matvec params
00058     RangeScalar        alpha, beta;
00059     size_t numRows;
00060     // mv data
00061     const DomainScalar  *x;
00062     RangeScalar         *y;
00063     size_t xstride, ystride;
00064 
00065     inline KERNEL_PREFIX void execute(size_t i) {
00066       const size_t row = i % numRows;
00067       const size_t rhs = (i - row) / numRows;
00068       RangeScalar tmp = Teuchos::ScalarTraits<RangeScalar>::zero();
00069       const DomainScalar *xj = x + rhs * xstride;
00070       RangeScalar        *yj = y + rhs * ystride;
00071       for (size_t c=begs[row]; c != ends[row]; ++c) {
00072         tmp += (RangeScalar)vals[c] * (RangeScalar)xj[inds[c]];
00073       }
00074       if (NO_BETA_AND_OVERWRITE) {
00075         yj[row] = (RangeScalar)alpha * tmp;
00076       }
00077       else {
00078         RangeScalar tmp2 = beta * yj[row];
00079         yj[row] = (RangeScalar)(alpha * tmp + tmp2);
00080       }
00081     }
00082   };
00083 
00084   template <class Scalar, class Ordinal, class DomainScalar, class RangeScalar, int NO_BETA_AND_OVERWRITE>
00085   struct DefaultSparseTransposeMultiplyOp1 {
00086     // mat data
00087     const size_t  *begs;
00088     const size_t  *ends;
00089     const Ordinal *inds;
00090     const Scalar  *vals;
00091     // matvec params
00092     RangeScalar        alpha, beta;
00093     size_t numRows, numCols;
00094     // mv data
00095     const DomainScalar  *x;
00096     RangeScalar         *y;
00097     size_t xstride, ystride;
00098 
00099     inline KERNEL_PREFIX void execute(size_t i) {
00100       // multiply entire matrix for rhs i
00101       const size_t rhs = i;
00102       const DomainScalar *xj = x + rhs * xstride;
00103       const RangeScalar RANGE_ZERO = Teuchos::ScalarTraits<RangeScalar>::zero();
00104       RangeScalar        *yj = y + rhs * ystride;
00105       for (size_t row=0; row < numCols; ++row) {
00106         if (NO_BETA_AND_OVERWRITE) {
00107           yj[row] = RANGE_ZERO;
00108         }
00109         else {
00110           yj[row] = (RangeScalar)(yj[row] * beta);
00111         }
00112       }
00113       for (size_t row=0; row < numRows; ++row) {
00114         for (size_t c=begs[row]; c != ends[row]; ++c) {
00115           yj[inds[c]] += (RangeScalar)(alpha * Teuchos::ScalarTraits<RangeScalar>::conjugate(vals[c]) * (RangeScalar)xj[row]);
00116         }
00117       }
00118     }
00119   };
00120 
00121 
00122   template <class Scalar, class Ordinal, class DomainScalar, class RangeScalar, int NO_BETA_AND_OVERWRITE>
00123   struct DefaultSparseMultiplyOp2 {
00124     // mat data
00125     const Ordinal * const * inds_beg;
00126     const Scalar  * const * vals_beg;
00127     const size_t  *         numEntries;
00128     // matvec params
00129     RangeScalar        alpha, beta;
00130     size_t numRows;
00131     // mv data
00132     const DomainScalar  *x;
00133     RangeScalar         *y;
00134     size_t xstride, ystride;
00135 
00136     inline KERNEL_PREFIX void execute(size_t i) {
00137       const size_t row = i % numRows;
00138       const size_t rhs = (i - row) / numRows;
00139       RangeScalar tmp = Teuchos::ScalarTraits<RangeScalar>::zero();
00140       const DomainScalar *xj = x + rhs * xstride;
00141       RangeScalar        *yj = y + rhs * ystride;
00142       const Scalar  *curval = vals_beg[row];
00143       const Ordinal *curind = inds_beg[row];
00144       for (size_t j=0; j != numEntries[row]; ++j) {
00145         tmp += (RangeScalar)curval[j] * (RangeScalar)xj[curind[j]];
00146       }
00147       if (NO_BETA_AND_OVERWRITE) {
00148         yj[row] = (RangeScalar)alpha * tmp;
00149       }
00150       else {
00151         RangeScalar tmp2 = beta * yj[row];
00152         yj[row] = (RangeScalar)(alpha * tmp + tmp2);
00153       }
00154     }
00155   };
00156 
00157 
00158   template <class Scalar, class Ordinal, class DomainScalar, class RangeScalar, int NO_BETA_AND_OVERWRITE>
00159   struct DefaultSparseTransposeMultiplyOp2 {
00160     // mat data
00161     const Ordinal * const * inds_beg;
00162     const Scalar  * const * vals_beg;
00163     const size_t  *         numEntries;
00164     // matvec params
00165     RangeScalar        alpha, beta;
00166     size_t numRows, numCols;
00167     // mv data
00168     const DomainScalar  *x;
00169     RangeScalar         *y;
00170     size_t xstride, ystride;
00171 
00172     inline KERNEL_PREFIX void execute(size_t i) {
00173       // multiply entire matrix for rhs i
00174       const size_t rhs = i;
00175       const RangeScalar RANGE_ZERO = Teuchos::ScalarTraits<RangeScalar>::zero();
00176       const DomainScalar *xj = x + rhs * xstride;
00177       RangeScalar        *yj = y + rhs * ystride;
00178       for (size_t row=0; row < numCols; ++row) {
00179         if (NO_BETA_AND_OVERWRITE) {
00180           yj[row] = RANGE_ZERO;
00181         }
00182         else {
00183           yj[row] = (RangeScalar)(yj[row] * beta);
00184         }
00185       }
00186       for (size_t row=0; row < numRows; ++row) {
00187         const Scalar  *rowval = vals_beg[row];
00188         const Ordinal *rowind = inds_beg[row];
00189         for (size_t j=0; j != numEntries[row]; ++j) {
00190           yj[rowind[j]] += (RangeScalar)(alpha * Teuchos::ScalarTraits<RangeScalar>::conjugate(rowval[j]) * (RangeScalar)xj[row]);
00191         }
00192       }
00193     }
00194   };
00195 
00196 } // namespace Kokkos
00197 
00198 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends