Kokkos Node API and Local Linear Algebra Kernels Version of the Day
Kokkos_DefaultRelaxation.hpp
00001 //@HEADER
00002 // ************************************************************************
00003 // 
00004 //          Kokkos: Node API and Parallel Node Kernels
00005 //              Copyright (2008) Sandia Corporation
00006 // 
00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00008 // the U.S. Government retains certain rights in this software.
00009 // 
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00038 // 
00039 // ************************************************************************
00040 //@HEADER
00041 
00042 #ifndef KOKKOS_DEFAULTRELAXATION_HPP
00043 #define KOKKOS_DEFAULTRELAXATION_HPP
00044 
00045 #include <stdio.h>
00046 #include <stdexcept>
00047 
00048 #include <Teuchos_ArrayRCP.hpp>
00049 #include <Teuchos_DataAccess.hpp>
00050 #include <Teuchos_Assert.hpp>
00051 #include <Teuchos_TypeNameTraits.hpp>
00052 #include <Teuchos_ScalarTraits.hpp>
00053 
00054 #include "Kokkos_ConfigDefs.hpp"
00055 #include "Kokkos_MultiVector.hpp"
00056 #include "Kokkos_NodeHelpers.hpp"
00057 #include "Kokkos_DefaultArithmetic.hpp"
00058 #include "Kokkos_DefaultRelaxationKernelOps.hpp"
00059 #include "Kokkos_DefaultSparseOps.hpp"
00060 
00061 namespace Kokkos {
00062 
00070   template <class Scalar, class Ordinal, class Node>
00071   class DefaultRelaxation {
00072   public:
00073     typedef Scalar  ScalarType;
00074     typedef Ordinal OrdinalType;
00075     typedef Node    NodeType;
00076 
00078 
00080 
00082     DefaultRelaxation(const RCP<Node> &node = DefaultNode::getDefaultNode());
00083 
00085     ~DefaultRelaxation();
00086 
00088 
00090 
00091     
00093     RCP<Node> getNode() const;
00094 
00096 
00098 
00100 
00105     template <class UNSUPPORTED_GRAPH, class UNSUPPORTED_MATRIX>
00106     void initializeData(const RCP<const UNSUPPORTED_GRAPH > &graph, 
00107                         const RCP<const UNSUPPORTED_MATRIX> &matrix);
00108 
00110     void initializeData(const RCP<const DefaultCrsGraph<Ordinal,Node>         > &graph, 
00111                         const RCP<const DefaultCrsMatrix<Scalar,Ordinal,Node> > &matrix);
00112 
00117     void setDiagonal(MultiVector<Scalar,Node> & diag);    
00118 
00120 
00122 
00124 
00126 
00128     void sweep_jacobi(Scalar dampingFactor_, MultiVector<Scalar,Node> &X, const MultiVector<Scalar,Node> &B) const;
00129 
00130 #ifdef ENABLE_ALL_OTHER_RELAXATION
00131 
00132     void sweep_fine_hybrid(Scalar dampingFactor_, MultiVector<Scalar,Node> &X, const MultiVector<Scalar,Node> &B) const;
00133 
00135     void sweep_coarse_hybrid(Scalar dampingFactor_, size_t num_chunks, MultiVector<Scalar,Node> &X, const MultiVector<Scalar,Node> &B) const;
00136 
00137 #endif //ifdef ENABLE_ALL_OTHER_RELAXATION
00138 
00139     void setup_chebyshev(const Scalar lambda_max, const Scalar lambda_min);
00140 
00142     void sweep_chebyshev(MultiVector<Scalar,Node> &X, const MultiVector<Scalar,Node> &B) const;
00143 
00145 
00146   protected:
00148     DefaultRelaxation(const DefaultRelaxation& source);
00149 
00151     // NTS: This should eventually disappear into some other Kokkos class...
00152     void ExtractDiagonal();    
00153 
00155     bool UpdateJacobiTemp(size_t num_vectors, size_t vec_leng) const;
00156 
00158     bool UpdateChebyTemp(size_t num_vectors, size_t vec_leng) const;
00159 
00161     RCP<Node> node_;
00162 
00163     // packed: array of offsets, ordinals, values
00164     ArrayRCP<const Ordinal> inds_;
00165     ArrayRCP<const Ordinal> sml_ptrs_;
00166     ArrayRCP<const size_t>  big_ptrs_;
00167     ArrayRCP<const Scalar>  vals_;
00168 
00170     ArrayRCP<Scalar> diagonal_;
00171 
00173     mutable ArrayRCP<Scalar> tempJacobiVector_;
00174     mutable size_t lastNumJacobiVectors_;
00175 
00176     // Arrays containing temp vectors for Chebyshev
00177     mutable ArrayRCP<Scalar> tempChebyVectorX_;
00178     mutable ArrayRCP<Scalar> tempChebyVectorW_;
00179     mutable size_t lastNumChebyVectors_;
00180     mutable bool cheby_setup_done_;
00181     mutable bool first_cheby_iteration_;
00182 
00183     // Constants for Chebyshev
00184     mutable Scalar lmin_,lmax_,delta_,s1_,oneOverTheta_,rho_,rho_new_,dtemp1_,dtemp2_;
00185 
00186     Ordinal numRows_;
00187     bool isInit_, isEmpty_;
00188   }; //class DefaultRelaxation declaration
00189 
00190   /**********************************************************************/
00191   template<class Scalar, class Ordinal, class Node>
00192   DefaultRelaxation<Scalar,Ordinal,Node>::DefaultRelaxation(const RCP<Node> &node)
00193   : node_(node)
00194   , lastNumJacobiVectors_(0)
00195   , lastNumChebyVectors_(0)
00196   , cheby_setup_done_(false)
00197   , first_cheby_iteration_(false)  
00198   , isInit_(false)
00199   , isEmpty_(false)
00200   {
00201     lmin_=lmax_=delta_=s1_=oneOverTheta_=rho_=rho_new_=dtemp1_=dtemp2_=Teuchos::ScalarTraits<Scalar>::zero();
00202   }
00203 
00204   /**********************************************************************/
00205   template<class Scalar, class Ordinal, class Node>
00206   DefaultRelaxation<Scalar,Ordinal,Node>::~DefaultRelaxation() {
00207   }
00208 
00209   /**********************************************************************/
00210   template <class Scalar, class Ordinal, class Node>
00211   template <class UNSUPPORTED_GRAPH, class UNSUPPORTED_MATRIX>
00212   void DefaultRelaxation<Scalar,Ordinal,Node>::initializeData(
00213                         const RCP<const UNSUPPORTED_GRAPH > &graph,
00214                         const RCP<const UNSUPPORTED_MATRIX> &matrix) 
00215   {
00216     // not implemented for general graphs and matrices
00217     TEUCHOS_TEST_FOR_EXCEPT(true);
00218   }
00219 
00220   /**********************************************************************/
00221   template <class Scalar, class Ordinal, class Node>
00222   void DefaultRelaxation<Scalar,Ordinal,Node>::initializeData(
00223                 const RCP<const DefaultCrsGraph<Ordinal,Node>         > &graph, 
00224                 const RCP<const DefaultCrsMatrix<Scalar,Ordinal,Node> > &matrix)
00225   {
00226     std::string tfecfFuncName("initializeData()");
00227     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00228         isInit_ == true,
00229         std::runtime_error, " structure already initialized."
00230     )
00231     numRows_ = graph->getNumRows();
00232     if (graph->isEmpty() || numRows_ == 0) {
00233       isEmpty_ = true;
00234     }
00235     else {
00236       isEmpty_ = false;
00237       big_ptrs_ = graph->getPointers();
00238       sml_ptrs_ = graph->getSmallPointers();
00239       inds_ = graph->getIndices();
00240       vals_ = matrix->getValues();
00241       TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00242           inds_.size() != vals_.size(),
00243           std::runtime_error, " graph and matrix are not congruent."
00244       )
00245     }
00246     isInit_ = true;
00247     ExtractDiagonal();
00248   } //initializeData()
00249 
00250   /**********************************************************************/
00251   template <class Scalar, class Ordinal, class Node>
00252   RCP<Node> DefaultRelaxation<Scalar,Ordinal,Node>::getNode() const { 
00253     return node_; 
00254   }
00255 
00256   /**********************************************************************/
00257   // Sets the diagonal inverted for relaxation using a MultiVector
00258   template <class Scalar, class Ordinal, class Node>
00259   void DefaultRelaxation<Scalar,Ordinal,Node>::setDiagonal(MultiVector<Scalar,Node> & diag){
00260     // Allocate space for diagonal
00261     diagonal_ = arcp<Scalar>(numRows_);    
00262 
00263     // NTS: Copy diag over
00264 
00265     // Make it fail for now...
00266     TEUCHOS_TEST_FOR_EXCEPT(true);
00267   }
00268     
00269   /**********************************************************************/
00270   template <class Scalar, class Ordinal, class Node>
00271   void DefaultRelaxation<Scalar,Ordinal,Node>::ExtractDiagonal(){    
00272     TEUCHOS_TEST_FOR_EXCEPTION(isInit_ == false, std::runtime_error, 
00273         Teuchos::typeName(*this) << "::ExtractDiagonal(): initializeValues() hasn't been called.");
00274 
00275     // Allocate space for diagonal
00276     diagonal_ = arcp<Scalar>(numRows_);    
00277       
00278     // Extract the diagonal 
00279     if (big_ptrs_ != null) {
00280       typedef ExtractDiagonalOp<Scalar,size_t,Ordinal>  Op;
00281       ReadyBufferHelper<Node> rbh(node_);
00282       Op wdp;
00283       rbh.begin();
00284       wdp.numRows = numRows_;
00285       wdp.ptrs    = rbh.template addConstBuffer<size_t>(big_ptrs_);
00286       wdp.inds    = rbh.template addConstBuffer<Ordinal>(inds_);
00287       wdp.vals    = rbh.template addConstBuffer<Scalar>(vals_);
00288       wdp.diag    = rbh.template addNonConstBuffer<Scalar>(diagonal_);
00289       rbh.end();
00290       node_->template parallel_for<Op>(0,numRows_,wdp);
00291     }
00292     else {
00293       typedef ExtractDiagonalOp<Scalar,Ordinal,Ordinal>  Op;
00294       ReadyBufferHelper<Node> rbh(node_);
00295       Op wdp;
00296       rbh.begin();
00297       wdp.numRows = numRows_;
00298       wdp.ptrs    = rbh.template addConstBuffer<Ordinal>(sml_ptrs_);
00299       wdp.inds    = rbh.template addConstBuffer<Ordinal>(inds_);
00300       wdp.vals    = rbh.template addConstBuffer<Scalar>(vals_);
00301       wdp.diag    = rbh.template addNonConstBuffer<Scalar>(diagonal_);
00302       rbh.end();
00303       node_->template parallel_for<Op>(0,numRows_,wdp);
00304     }
00305   } //ExtractDiagonal()
00306 
00307   /**********************************************************************/
00308   template <class Scalar, class Ordinal, class Node>
00309   bool DefaultRelaxation<Scalar,Ordinal,Node>::UpdateJacobiTemp(size_t num_vectors, size_t vec_leng) const{
00310     // Re-allocate memory if needed
00311     if(num_vectors > lastNumJacobiVectors_){
00312       tempJacobiVector_= null;
00313       lastNumJacobiVectors_=num_vectors;
00314       tempJacobiVector_ = arcp<Scalar>(vec_leng*lastNumJacobiVectors_); 
00315       return true;
00316     }
00317     return false;
00318   } //UpdateJacobiTemp()
00319 
00320   /**********************************************************************/
00321   template <class Scalar, class Ordinal, class Node>
00322   bool DefaultRelaxation<Scalar,Ordinal,Node>::UpdateChebyTemp(size_t num_vectors, size_t vec_leng) const{
00323     // Re-allocate memory if needed
00324     if(num_vectors > lastNumChebyVectors_){
00325       tempChebyVectorW_= null;
00326       tempChebyVectorX_= null;
00327       lastNumChebyVectors_ = num_vectors;
00328       tempChebyVectorW_ = arcp<Scalar>(numRows_*lastNumChebyVectors_);
00329       tempChebyVectorX_ = arcp<Scalar>(vec_leng*lastNumChebyVectors_); 
00330       return true;
00331     }
00332     return false;
00333   } //UpdateChebyTemp()
00334 
00335 
00336 #ifdef ENABLE_ALL_OTHER_RELAXATION
00337   /**********************************************************************/
00338   template <class Scalar, class Ordinal, class Node>
00339   void DefaultRelaxation<Scalar,Ordinal,Node>::sweep_fine_hybrid(Scalar dampingFactor_,
00340                          MultiVector<Scalar,Node> &X, const MultiVector<Scalar,Node> &B) const{
00341 
00342     TEUCHOS_TEST_FOR_EXCEPTION(isInit_ == false, std::runtime_error,
00343         Teuchos::typeName(*this) << "::sweep_fine_hybrid(): operation not fully initialized.");
00344     TEUCHOS_TEST_FOR_EXCEPT(X.getNumCols() != B.getNumCols());
00345     ReadyBufferHelper<Node> rbh(node_);
00346     if (isEmpty_ == true) {
00347       // This makes no sense to try to call ...
00348       TEUCHOS_TEST_FOR_EXCEPT(true);
00349     }
00350     else {
00351       if (big_ptrs_ != null) {
00352         typedef DefaultFineGrainHybridGaussSeidelOp<Scalar,size_t,Ordinal>  Op;
00353         Op wdp;
00354         rbh.begin();
00355         wdp.numRows = numRows_;
00356         wdp.ptrs    = rbh.template addConstBuffer<size_t>(big_ptrs_);
00357         wdp.inds    = rbh.template addConstBuffer<Ordinal>(inds_);
00358         wdp.vals    = rbh.template addConstBuffer<Scalar>(vals_);
00359         wdp.x       = rbh.template addNonConstBuffer<Scalar>(X.getValuesNonConst());
00360         wdp.b       = rbh.template addConstBuffer<Scalar>(B.getValues());
00361         wdp.diag    = rbh.template addConstBuffer<Scalar>(diagonal_);
00362         wdp.damping_factor = dampingFactor_;
00363         wdp.xstride = X.getStride();
00364         wdp.bstride = B.getStride();
00365         rbh.end();
00366         const size_t numRHS = X.getNumCols();
00367         node_->template parallel_for<Op>(0,numRows_*numRHS,wdp);
00368       }
00369       else {
00370         typedef DefaultFineGrainHybridGaussSeidelOp<Scalar,Ordinal,Ordinal>  Op;
00371         Op wdp;
00372         rbh.begin();
00373         wdp.numRows = numRows_;
00374         wdp.ptrs    = rbh.template addConstBuffer<Ordinal>(sml_ptrs_);
00375         wdp.inds    = rbh.template addConstBuffer<Ordinal>(inds_);
00376         wdp.vals    = rbh.template addConstBuffer<Scalar>(vals_);
00377         wdp.x       = rbh.template addNonConstBuffer<Scalar>(X.getValuesNonConst());
00378         wdp.b       = rbh.template addConstBuffer<Scalar>(B.getValues());
00379         wdp.diag    = rbh.template addConstBuffer<Scalar>(diagonal_);
00380         wdp.damping_factor = dampingFactor_;
00381         wdp.xstride = X.getStride();
00382         wdp.bstride = B.getStride();
00383         rbh.end();
00384         const size_t numRHS = X.getNumCols();
00385         node_->template parallel_for<Op>(0,numRows_*numRHS,wdp);
00386       }
00387     }
00388     return;
00389   } //sweep_fine_hybrid()
00390 
00391   /**********************************************************************/
00392   template <class Scalar, class Ordinal, class Node>
00393   void DefaultRelaxation<Scalar,Ordinal,Node>::sweep_coarse_hybrid(Scalar dampingFactor_,size_t num_chunks,
00394                          MultiVector<Scalar,Node> &X, const MultiVector<Scalar,Node> &B) const{
00395     typedef DefaultCoarseGrainHybridGaussSeidelOp<Scalar,Ordinal>  Op;
00396 
00397     TEUCHOS_TEST_FOR_EXCEPTION(isInit_ == false, std::runtime_error,
00398         Teuchos::typeName(*this) << "::sweep_coarse_hybrid(): operation not fully initialized.");
00399     TEUCHOS_TEST_FOR_EXCEPT(X.getNumCols() != B.getNumCols());
00400     ReadyBufferHelper<Node> rbh(node_);   
00401 
00402     if (isEmpty_ == true) {
00403       // This makes no sense to try to call ...
00404       TEUCHOS_TEST_FOR_EXCEPT(true);
00405     }
00406     else {
00407       Op wdp;
00408       rbh.begin();
00409       wdp.numRows = numRows_;
00410       wdp.numChunks = num_chunks;
00411       wdp.inds    = rbh.template addConstBuffer<Ordinal>(inds_);
00412       wdp.vals    = rbh.template addConstBuffer<Scalar>(vals_);
00413       wdp.x       = rbh.template addNonConstBuffer<Scalar>(X.getValuesNonConst());
00414       wdp.b       = rbh.template addConstBuffer<Scalar>(B.getValues());
00415       wdp.diag    = rbh.template addConstBuffer<Scalar>(diagonal_);
00416       wdp.damping_factor = dampingFactor_;
00417       wdp.xstride = X.getStride();
00418       wdp.bstride = B.getStride();
00419       rbh.end();
00420       const size_t numRHS = X.getNumCols();
00421       node_->template parallel_for<Op>(0,num_chunks*numRHS,wdp);
00422     }
00423     return;
00424   } //sweep_coarse_hybrid()
00425 #endif //ifdef ENABLE_ALL_OTHER_RELAXATION
00426 
00427   
00428   /********************************************************************/
00429   template <class Scalar, class Ordinal, class Node>
00430   void DefaultRelaxation<Scalar,Ordinal,Node>::sweep_jacobi(Scalar dampingFactor_,
00431                          MultiVector<Scalar,Node> &X, const MultiVector<Scalar,Node> &B) const{
00432 
00433     TEUCHOS_TEST_FOR_EXCEPTION(isInit_ == false, std::runtime_error,
00434         Teuchos::typeName(*this) << "::sweep_jacobi(): operation not fully initialized.");
00435     TEUCHOS_TEST_FOR_EXCEPT(X.getNumCols() != B.getNumCols());
00436     ReadyBufferHelper<Node> rbh(node_);
00437 
00438     // Copy x over to the temp vector
00439     // NTS: The MultiVector copy constructor is a View. We need to do this the hard way.
00440     MultiVector<Scalar,Node> X0(X.getNode());
00441     UpdateJacobiTemp(X.getNumCols(),X.getNumRows());
00442     X0.initializeValues(X.getNumRows(),B.getNumCols(),tempJacobiVector_,B.getStride());
00443     DefaultArithmetic<MultiVector<Scalar,Node> >::Assign(X0,X);
00444 
00445     if (isEmpty_ == true) {
00446       // This makes no sense to try to call ...
00447       TEUCHOS_TEST_FOR_EXCEPT(true);
00448     }
00449     else {
00450       if (big_ptrs_ != null) {
00451         typedef DefaultJacobiOp<Scalar,size_t,Ordinal>  Op;
00452         Op wdp;
00453         rbh.begin();
00454         wdp.numRows = numRows_;
00455         wdp.ptrs    = rbh.template addConstBuffer<size_t>(big_ptrs_);
00456         wdp.inds    = rbh.template addConstBuffer<Ordinal>(inds_);
00457         wdp.vals    = rbh.template addConstBuffer<Scalar>(vals_);
00458         wdp.diag    = rbh.template addConstBuffer<Scalar>(diagonal_);
00459         wdp.x       = rbh.template addNonConstBuffer<Scalar>(X.getValuesNonConst());
00460         wdp.x0      = rbh.template addConstBuffer<Scalar>(X0.getValues());
00461         wdp.b       = rbh.template addConstBuffer<Scalar>(B.getValues());
00462         wdp.damping_factor = dampingFactor_;
00463         wdp.xstride = X.getStride();
00464         wdp.bstride = B.getStride();
00465         rbh.end();
00466         const size_t numRHS = X.getNumCols();
00467         node_->template parallel_for<Op>(0,numRows_*numRHS,wdp);
00468       }
00469       else {
00470         typedef DefaultJacobiOp<Scalar,Ordinal,Ordinal>  Op;
00471         Op wdp;
00472         rbh.begin();
00473         wdp.numRows = numRows_;
00474         wdp.ptrs    = rbh.template addConstBuffer<Ordinal>(sml_ptrs_);
00475         wdp.inds    = rbh.template addConstBuffer<Ordinal>(inds_);
00476         wdp.vals    = rbh.template addConstBuffer<Scalar>(vals_);
00477         wdp.diag    = rbh.template addConstBuffer<Scalar>(diagonal_);
00478         wdp.x       = rbh.template addNonConstBuffer<Scalar>(X.getValuesNonConst());
00479         wdp.x0      = rbh.template addConstBuffer<Scalar>(X0.getValues());
00480         wdp.b       = rbh.template addConstBuffer<Scalar>(B.getValues());
00481         wdp.damping_factor = dampingFactor_;
00482         wdp.xstride = X.getStride();
00483         wdp.bstride = B.getStride();
00484         rbh.end();
00485         const size_t numRHS = X.getNumCols();
00486         node_->template parallel_for<Op>(0,numRows_*numRHS,wdp);
00487       }
00488       return;
00489     }
00490   } //sweep_jacobi()
00491 
00492   /********************************************************************/
00493   template <class Scalar, class Ordinal, class Node>
00494   void DefaultRelaxation<Scalar,Ordinal,Node>::setup_chebyshev(const Scalar lambda_max, const Scalar lambda_min){
00495     lmax_=lambda_max;
00496     lmin_=lambda_min;
00497 
00498     Scalar alpha = lmin_;
00499     Scalar beta  = 1.1*lmax_;
00500     delta_ = 2.0 / (beta-alpha);
00501     Scalar theta_ = 0.5 * (beta+alpha);
00502     s1_    = theta_ * delta_;
00503     oneOverTheta_=1.0 / theta_;
00504     rho_=1.0/s1_;
00505 
00506     first_cheby_iteration_=true;
00507     cheby_setup_done_=true;
00508   } //setup_chebyshev()
00509 
00510 
00511   /********************************************************************/
00512   template <class Scalar, class Ordinal, class Node>
00513   void DefaultRelaxation<Scalar,Ordinal,Node>::sweep_chebyshev(MultiVector<Scalar,Node> &X, const MultiVector<Scalar,Node> &B) const{
00514 
00515     TEUCHOS_TEST_FOR_EXCEPTION(isInit_ == false, std::runtime_error,
00516                Teuchos::typeName(*this) << "::sweep_chebyshev(): operation not fully initialized.");
00517     TEUCHOS_TEST_FOR_EXCEPT(X.getNumCols() != B.getNumCols());
00518     //size_t xstride = X.getStride();
00519     //size_t bstride = B.getStride();
00520     //TEUCHOS_TEST_FOR_EXCEPT(xstride != bstride); //TODO JJH don't think we want this b/c X is imported, B is local
00521     TEUCHOS_TEST_FOR_EXCEPT(cheby_setup_done_ == false);
00522     
00523     ReadyBufferHelper<Node> rbh(node_);
00524     
00525     // If the number of multivectors has changed, we need to run Cheby from scratch
00526     first_cheby_iteration_= UpdateChebyTemp(X.getNumCols(),X.getNumRows()) || first_cheby_iteration_;
00527     
00528     // Copy X to X0 so we get the matvec correct
00529     MultiVector<Scalar,Node> X0(X.getNode());
00530     X0.initializeValues(X.getNumRows(),X.getNumCols(),tempChebyVectorX_,X.getStride());
00531     DefaultArithmetic<MultiVector<Scalar,Node> >::Assign(X0,X);
00532 
00533     // Update Scalars 
00534     if(!first_cheby_iteration_){
00535       rho_new_ = 1.0 / (2.0*s1_ - rho_);
00536       dtemp1_=rho_*rho_new_;
00537       dtemp2_=2*rho_new_*delta_;
00538       rho_=rho_new_;
00539     }    
00540     
00541     if (isEmpty_ == true) {
00542       // This makes no sense to try to call ...
00543       TEUCHOS_TEST_FOR_EXCEPT(true);
00544     }
00545     else {
00546       if (big_ptrs_ != null) {
00547         typedef DefaultChebyshevOp<Scalar,size_t,Ordinal>  Op;
00548         Op wdp;
00549         rbh.begin();
00550         wdp.numRows = numRows_;
00551         wdp.ptrs    = rbh.template addConstBuffer<size_t>(big_ptrs_);
00552         wdp.inds    = rbh.template addConstBuffer<Ordinal>(inds_);
00553         wdp.vals    = rbh.template addConstBuffer<Scalar>(vals_);
00554         wdp.x       = rbh.template addNonConstBuffer<Scalar>(X.getValuesNonConst());
00555         wdp.x0      = rbh.template addConstBuffer<Scalar>(tempChebyVectorX_);
00556         wdp.w       = rbh.template addNonConstBuffer<Scalar>(tempChebyVectorW_);
00557         wdp.b       = rbh.template addConstBuffer<Scalar>(B.getValues());
00558         wdp.diag    = rbh.template addConstBuffer<Scalar>(diagonal_);
00559         wdp.stride = X.getStride();
00560         wdp.first_step=first_cheby_iteration_;
00561         wdp.zero_initial_guess=false;//HAQ Fix me later
00562         wdp.oneOverTheta = oneOverTheta_;
00563         wdp.dtemp1 = dtemp1_;
00564         wdp.dtemp2 = dtemp2_;
00565 
00566         rbh.end();    
00567         const size_t numRHS = X.getNumCols();
00568         node_->template parallel_for<Op>(0,numRows_*numRHS,wdp);
00569       }
00570       else {
00571         typedef DefaultChebyshevOp<Scalar,Ordinal,Ordinal>  Op;
00572         Op wdp;
00573         rbh.begin();
00574         wdp.numRows = numRows_;
00575         wdp.ptrs    = rbh.template addConstBuffer<Ordinal>(sml_ptrs_);
00576         wdp.inds    = rbh.template addConstBuffer<Ordinal>(inds_);
00577         wdp.vals    = rbh.template addConstBuffer<Scalar>(vals_);
00578         wdp.x       = rbh.template addNonConstBuffer<Scalar>(X.getValuesNonConst());
00579         wdp.x0      = rbh.template addConstBuffer<Scalar>(tempChebyVectorX_);
00580         wdp.w       = rbh.template addNonConstBuffer<Scalar>(tempChebyVectorW_);
00581         wdp.b       = rbh.template addConstBuffer<Scalar>(B.getValues());
00582         wdp.diag    = rbh.template addConstBuffer<Scalar>(diagonal_);
00583         wdp.stride = X.getStride();
00584         wdp.first_step=first_cheby_iteration_;
00585         wdp.zero_initial_guess=false;//HAQ Fix me later
00586         wdp.oneOverTheta = oneOverTheta_;
00587         wdp.dtemp1 = dtemp1_;
00588         wdp.dtemp2 = dtemp2_;
00589 
00590         rbh.end();    
00591         const size_t numRHS = X.getNumCols();
00592         node_->template parallel_for<Op>(0,numRows_*numRHS,wdp);
00593       }
00594     }
00595     
00596     first_cheby_iteration_=false;
00597 
00598   } //sweep_chebyshev()
00599 
00600 } // namespace Kokkos
00601 
00602 #endif // KOKKOS_DEFAULTRELAXATION_HPP
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends