Kokkos Node API and Local Linear Algebra Kernels Version of the Day
Kokkos_DefaultRelaxationKernelOps.hpp
00001 //@HEADER
00002 // ************************************************************************
00003 // 
00004 //          Kokkos: Node API and Parallel Node Kernels
00005 //              Copyright (2008) Sandia Corporation
00006 // 
00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00008 // the U.S. Government retains certain rights in this software.
00009 // 
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00038 // 
00039 // ************************************************************************
00040 //@HEADER
00041 
00042 #ifndef KOKKOS_DEFAULTRELAXATION_KERNELOPS_HPP
00043 #define KOKKOS_DEFAULTRELAXATION_KERNELOPS_HPP
00044 
00045 #ifndef KERNEL_PREFIX
00046 #define KERNEL_PREFIX
00047 #endif
00048 
00049 #ifdef __CUDACC__
00050 #include <Teuchos_ScalarTraitsCUDA.hpp>
00051 #else
00052 #include <Teuchos_ScalarTraits.hpp>
00053 #endif
00054 
00055 namespace Kokkos {
00056 
00057   // Extract Matrix Diagonal for Type 1 storage
00058   template <class Scalar, class Ordinal>
00059   struct ExtractDiagonalOp {
00060 
00061     // mat data
00062     size_t numRows;
00063     const size_t  * ptrs;
00064     const Ordinal * inds;
00065     const Scalar  * vals;
00066     Scalar * diag;
00067 
00068     inline KERNEL_PREFIX void execute(size_t row) {
00069       for (size_t c=ptrs[row]; c != ptrs[row+1]; ++c) {
00070         if (row==(size_t)inds[c]) {
00071           diag[row]=vals[c];
00072           break;
00073         }
00074       }
00075     }
00076   };
00077 
00078 
00079   /************************************************************************************/
00080   /********************************* Jacobi Kernels ***********************************/
00081   /************************************************************************************/
00082   template <class Scalar, class Ordinal>
00083   struct DefaultJacobiOp {
00084     size_t numRows;
00085     const size_t  *ptrs;
00086     const Ordinal *inds;
00087     const Scalar  *vals;
00088     const Scalar  *diag;
00089     // vector data (including multiple rhs)
00090     Scalar       *x;
00091     const Scalar *x0;
00092     const Scalar *b;
00093     Scalar damping_factor;
00094     size_t xstride, bstride;
00095 
00096     inline KERNEL_PREFIX void execute(size_t i) {
00097       const size_t row  = i % numRows;
00098       const size_t rhs  = (i - row) / numRows;
00099       Scalar       *xj  = x + rhs * xstride;
00100       const Scalar *x0j = x0 + rhs * xstride;
00101       const Scalar *bj  = b + rhs * bstride;
00102 
00103       Scalar tmp = bj[row];
00104       for (size_t c=ptrs[row]; c<ptrs[row+1]; ++c) {
00105         tmp -= vals[c] * x0j[inds[c]];
00106       }
00107       xj[row]=x0j[row]+damping_factor*tmp/diag[row];
00108     }
00109   };
00110 
00111 
00112   /************************************************************************************/
00113   /************************ Fine-Grain Gauss-Seidel Kernels ***************************/
00114   /************************************************************************************/
00115 
00116   // Note: This is actually real Gauss-Seidel for a serial node, and hybrid for almost any other kind of node.
00117   template <class Scalar, class Ordinal>
00118   struct DefaultFineGrainHybridGaussSeidelOp {
00119     const size_t  *ptrs;
00120     const Ordinal *inds;
00121     const Scalar  *vals;
00122     const Scalar  *diag;
00123     size_t numRows;
00124     // vector data (including multiple rhs)
00125     Scalar       *x;
00126     const Scalar *b;
00127     Scalar damping_factor;
00128     size_t xstride, bstride;
00129 
00130     inline KERNEL_PREFIX void execute(size_t i) {
00131       const size_t row = i % numRows;
00132       const size_t rhs = (i - row) / numRows;
00133       Scalar       *xj = x + rhs * xstride;
00134       const Scalar *bj = b + rhs * bstride;
00135       Scalar tmp = bj[row];
00136       for (size_t c=ptrs[row];c<ptrs[row+1];c++) {
00137         tmp -= vals[c] * xj[inds[c]];
00138       }
00139       xj[row]+=damping_factor*tmp/diag[row];
00140     }
00141   };
00142 
00143 
00144   /************************************************************************************/
00145   /************************ Coarse-Grain Gauss-Seidel Kernels *************************/
00146   /************************************************************************************/
00147 
00148   // Coarse-grain "hybrid" Gauss-Seidel for Type 1 storage.
00149   // Note: This is actually real Gauss-Seidel for a serial node, and hybrid for almost any other kind of node.
00150   template <class Scalar, class Ordinal>
00151   struct DefaultCoarseGrainHybridGaussSeidelOp1 {
00152     const size_t  *ptrs;
00153     const Ordinal *inds;
00154     const Scalar  *vals;
00155     const Scalar  *diag;
00156     size_t numRows;
00157     size_t numChunks;
00158     // vector data (including multiple rhs)
00159     Scalar       *x;
00160     const Scalar *b;
00161     Scalar damping_factor;
00162     size_t xstride, bstride;
00163 
00164     inline KERNEL_PREFIX void execute(size_t i) {
00165       const size_t chunk = i % numChunks;
00166       const size_t rhs = (i - chunk) / numChunks;
00167       const size_t start_r = chunk * numRows / numChunks;
00168       const size_t stop_r  = TEUCHOS_MIN((chunk+1)*numRows/numChunks,numRows);
00169       Scalar       *xj = x + rhs * xstride;
00170       const Scalar *bj = b + rhs * bstride;
00171       for (size_t row=start_r;row<stop_r;row++){
00172         Scalar tmp = bj[row];
00173         for (size_t c=ptrs[row];c<ptrs[row+1];c++) {
00174           tmp -= vals[c] * xj[inds[c]];
00175         }
00176         xj[row]+=damping_factor*tmp/diag[row];
00177       }
00178     }
00179   };
00180 
00181   /************************************************************************************/
00182   /******************************** Chebyshev Kernels *********************************/
00183   /************************************************************************************/
00184 
00185   template <class Scalar, class Ordinal>
00186   struct DefaultChebyshevOp {
00187     const size_t  *ptrs;
00188     const Ordinal *inds;
00189     const Scalar  *vals;
00190     const Scalar  *diag;
00191     size_t numRows;
00192     // vector data (including multiple rhs)
00193     Scalar       *x,*w;
00194     const Scalar *x0,*b;
00195     Scalar oneOverTheta,dtemp1,dtemp2;
00196     size_t stride;
00197     bool first_step;
00198     bool zero_initial_guess;
00199 
00200     inline KERNEL_PREFIX void execute(size_t i) {
00201       const size_t row  = i % numRows;
00202       const size_t rhs  = (i - row) / numRows;
00203       Scalar       *xj  = x + rhs * stride;
00204       const Scalar *x0j = x0 + rhs * stride;
00205       Scalar       *wj  = w + rhs * stride;      
00206       const Scalar *bj  = b + rhs * stride;
00207       Scalar        vj;
00208 
00209       if (first_step) {
00210         if(zero_initial_guess) {
00211           // x= theta^{-1} D^{-1} b
00212           xj[row]=wj[row]=bj[row] / diag[row] *oneOverTheta;
00213         } else {
00214           // v=Ax
00215           vj=Teuchos::ScalarTraits<Scalar>::zero();
00216           for (size_t c=ptrs[row]; c<ptrs[row+1]; ++c) {
00217             vj += vals[c] * x0j[inds[c]];
00218           }
00219           // w=theta^{-1} D^{-1} (b -Ax)
00220           wj[row]=(bj[row]-vj)/diag[row]*oneOverTheta;
00221           // x+=w
00222           xj[row]+=wj[row];
00223         }
00224       } else {
00225         //v=Ax
00226         vj=Teuchos::ScalarTraits<Scalar>::zero();
00227         for (size_t c=ptrs[row]; c<ptrs[row+1]; ++c) {
00228           vj += vals[c] * x0j[inds[c]];
00229         }
00230         // w=dtemp1*w +  D^{-1}*dtemp2*(b-Ax)
00231         wj[row]=dtemp1*wj[row]+dtemp2*(bj[row]-vj)/diag[row];
00232         // x+=w
00233         xj[row]+=wj[row];
00234       }
00235 
00236       //      printf("[%3d-%d] x=%11.4e v=%11.4e w=%11.4e x0=%11.4e\n",row,first_step,xj[row],vj,wj[row],x0j[row]);
00237 
00238     }
00239 
00240   };
00241 
00242 }// namespace Kokkos
00243 
00244 #endif /* KOKKOS_DEFAULTRELAXATION_KERNELOPS_HPP */
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends