Kokkos_DefaultRelaxation.hpp

00001 //@HEADER
00002 // ************************************************************************
00003 // 
00004 //          Kokkos: Node API and Parallel Node Kernels
00005 //              Copyright (2009) Sandia Corporation
00006 // 
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 // 
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //  
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //  
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00025 // 
00026 // ************************************************************************
00027 //@HEADER
00028 
00029 #ifndef KOKKOS_DEFAULTRELAXATION_HPP
00030 #define KOKKOS_DEFAULTRELAXATION_HPP
00031 
00032 #include <Teuchos_ArrayRCP.hpp>
00033 #include <Teuchos_DataAccess.hpp>
00034 #include <Teuchos_TestForException.hpp>
00035 #include <Teuchos_TypeNameTraits.hpp>
00036 #include <Teuchos_ScalarTraits.hpp>
00037 #include <stdexcept>
00038 
00039 #include "Kokkos_ConfigDefs.hpp"
00040 #include "Kokkos_CrsMatrix.hpp" 
00041 #include "Kokkos_CrsGraph.hpp" 
00042 #include "Kokkos_MultiVector.hpp"
00043 #include "Kokkos_NodeHelpers.hpp"
00044 #include "Kokkos_DefaultArithmetic.hpp"
00045 #include "Kokkos_DefaultRelaxationKernelOps.hpp"
00046 
00047 #include <stdio.h>
00048 
00049 namespace Kokkos {
00050 
00056   template <class Scalar, class Ordinal, class Node = DefaultNode::DefaultNodeType>
00057   class DefaultRelaxation {
00058   public:
00059     typedef Scalar  ScalarType;
00060     typedef Ordinal OrdinalType;
00061     typedef Node    NodeType;
00062 
00064 
00066 
00068     DefaultRelaxation(const Teuchos::RCP<Node> &node = DefaultNode::getDefaultNode());
00069 
00071     ~DefaultRelaxation();
00072 
00074 
00076 
00077     
00079     Teuchos::RCP<Node> getNode() const;
00080 
00082 
00084 
00086 
00088     template <class GRAPH>
00089     Teuchos::DataAccess initializeStructure(GRAPH &graph, Teuchos::DataAccess cv);
00090 
00092     template <class MATRIX>
00093     Teuchos::DataAccess initializeValues(MATRIX &matrix, Teuchos::DataAccess cv);
00094 
00096     Teuchos::DataAccess initializeStructure(CrsGraph<Ordinal,Node> &graph, Teuchos::DataAccess cv);
00097 
00099     Teuchos::DataAccess initializeValues(CrsMatrix<Scalar,Node> &matrix, Teuchos::DataAccess cv);
00100 
00102     void setDiagonal(MultiVector<Scalar,Node> & diag);    
00103 
00105     void clear();
00106 
00108 
00110 
00112 
00114 
00116     void sweep_jacobi(Scalar dampingFactor_, MultiVector<Scalar,Node> &X, const MultiVector<Scalar,Node> &B) const;
00117 
00119     void sweep_fine_hybrid(Scalar dampingFactor_, MultiVector<Scalar,Node> &X, const MultiVector<Scalar,Node> &B) const;
00120 
00122     void sweep_coarse_hybrid(Scalar dampingFactor_, size_t num_chunks, MultiVector<Scalar,Node> &X, const MultiVector<Scalar,Node> &B) const;
00123 
00125     void setup_chebyshev(const Scalar lambda_max, const Scalar lambda_min);
00126 
00128     void sweep_chebyshev(MultiVector<Scalar,Node> &X, const MultiVector<Scalar,Node> &B) const;
00129 
00131 
00132   protected:
00134     DefaultRelaxation(const DefaultRelaxation& source);
00135 
00137     // NTS: This should eventually disappear into some other Kokkos class...
00138     void ExtractDiagonal();    
00139 
00140     // Update the temp vector size
00141     bool UpdateJacobiTemp(size_t num_vectors, size_t vec_leng) const;
00142     bool UpdateChebyTemp(size_t num_vectors, size_t vec_leng) const;
00143 
00144     // My node
00145     Teuchos::RCP<Node> node_;
00146 
00147     // we do this one of two ways: 
00148     // 1D/packed: array of offsets, pointer for ordinals, pointer for values. obviously the smallest footprint.
00149     Teuchos::ArrayRCP<const Ordinal> pbuf_inds1D_;
00150     Teuchos::ArrayRCP<const size_t>  pbuf_offsets1D_;
00151     Teuchos::ArrayRCP<const Scalar>  pbuf_vals1D_;
00152     // 2D: array of pointers
00153     Teuchos::ArrayRCP<const Ordinal *> pbuf_inds2D_;
00154     Teuchos::ArrayRCP<const Scalar  *> pbuf_vals2D_;
00155     Teuchos::ArrayRCP<size_t>          pbuf_numEntries_;
00156     
00157     // Array containing matrix diagonal for easy access
00158     Teuchos::ArrayRCP<Scalar> diagonal_;
00159 
00160     // Array containing a temp vector for Jacobi
00161     mutable Teuchos::ArrayRCP<Scalar> tempJacobiVector_;
00162     mutable size_t lastNumJacobiVectors_;
00163 
00164     // Arrays containing temp vectors for Chebyshev
00165     mutable Teuchos::ArrayRCP<Scalar> tempChebyVectorX_;
00166     mutable Teuchos::ArrayRCP<Scalar> tempChebyVectorW_;
00167     mutable size_t lastNumChebyVectors_;
00168     mutable bool cheby_setup_done_;
00169     mutable bool first_cheby_iteration_;
00170 
00171     // Constants for Chebyshev
00172     mutable Scalar lmin_,lmax_,delta_,s1_,oneOverTheta_,rho_,rho_new_,dtemp1_,dtemp2_;
00173 
00174     size_t numRows_;
00175     bool indsInit_, valsInit_, isPacked_, isEmpty_;
00176   };
00177 
00178 
00179   /**********************************************************************/
00180   template<class Scalar, class Ordinal, class Node>
00181   DefaultRelaxation<Scalar,Ordinal,Node>::DefaultRelaxation(const Teuchos::RCP<Node> &node)
00182   : node_(node)
00183   , lastNumJacobiVectors_(0)
00184   , lastNumChebyVectors_(0)
00185   , cheby_setup_done_(false)
00186   , first_cheby_iteration_(false)  
00187   , indsInit_(false)
00188   , valsInit_(false)
00189   , isPacked_(false) 
00190   , isEmpty_(false)
00191   {
00192     lmin_=lmax_=delta_=s1_=oneOverTheta_=rho_=rho_new_=dtemp1_=dtemp2_=Teuchos::ScalarTraits<Scalar>::zero();
00193   }
00194 
00195   /**********************************************************************/
00196   template<class Scalar, class Ordinal, class Node>
00197   DefaultRelaxation<Scalar,Ordinal,Node>::~DefaultRelaxation() {
00198     clear();
00199   }
00200 
00201   /**********************************************************************/
00202   template<class Scalar, class Ordinal, class Node>
00203   template <class GRAPH>
00204   Teuchos::DataAccess DefaultRelaxation<Scalar,Ordinal,Node>::initializeStructure(GRAPH &graph, Teuchos::DataAccess cv) {
00205     // not implemented for general sparse graphs
00206     TEST_FOR_EXCEPT(true);
00207   }
00208 
00209   /**********************************************************************/
00210   template<class Scalar, class Ordinal, class Node>
00211   template <class MATRIX>
00212   Teuchos::DataAccess DefaultRelaxation<Scalar,Ordinal,Node>::initializeValues(MATRIX &graph, Teuchos::DataAccess cv) {
00213     // not implemented for general sparse matrices
00214     TEST_FOR_EXCEPT(true);
00215   }
00216 
00217   /**********************************************************************/
00218   template <class Scalar, class Ordinal, class Node>
00219   Teuchos::DataAccess DefaultRelaxation<Scalar,Ordinal,Node>::initializeStructure(CrsGraph<Ordinal,Node> &graph, Teuchos::DataAccess cv) {
00220     using Teuchos::ArrayRCP;
00221     TEST_FOR_EXCEPTION(cv != Teuchos::View, std::runtime_error,
00222         Teuchos::typeName(*this) << "::initializeStructure(): requires View access.");
00223     TEST_FOR_EXCEPTION(indsInit_ == true, std::runtime_error, 
00224         Teuchos::typeName(*this) << "::initializeStructure(): structure already initialized.");
00225     numRows_ = graph.getNumRows();
00226     if (graph.isEmpty() || numRows_ == 0) {
00227       isEmpty_ = true;
00228     }
00229     else if (graph.isPacked()) {
00230       isEmpty_ = false;
00231       isPacked_ = true;
00232       pbuf_inds1D_    = graph.getPackedIndices();
00233       pbuf_offsets1D_ = graph.getPackedOffsets();
00234     }
00235     else {
00236       isEmpty_ = false;
00237       isPacked_ = false;
00238       pbuf_inds2D_     = node_->template allocBuffer<const Ordinal *>(numRows_);
00239       pbuf_numEntries_ = node_->template allocBuffer<size_t>(numRows_);
00240       ArrayRCP<const Ordinal *> inds2Dview = node_->template viewBufferNonConst<const Ordinal *>(WriteOnly, numRows_, pbuf_inds2D_);
00241       ArrayRCP<         size_t> numEntview = node_->template viewBufferNonConst<         size_t>(WriteOnly, numRows_, pbuf_numEntries_);
00242       for (size_t r=0; r < numRows_; ++r) {
00243         ArrayRCP<const Ordinal> rowinds = graph.get2DIndices(r);
00244         if (rowinds != Teuchos::null) {
00245           inds2Dview[r] = rowinds.getRawPtr();
00246           numEntview[r] = rowinds.size();
00247         }
00248         else {
00249           inds2Dview[r] = NULL;
00250           numEntview[r] = 0;
00251         }
00252       }
00253     }
00254     indsInit_ = true;
00255     return Teuchos::View;
00256   }
00257 
00258   /**********************************************************************/
00259   template <class Scalar, class Ordinal, class Node>
00260   Teuchos::DataAccess DefaultRelaxation<Scalar,Ordinal,Node>::initializeValues(CrsMatrix<Scalar,Node> &matrix, Teuchos::DataAccess cv) {
00261     using Teuchos::ArrayRCP;
00262     TEST_FOR_EXCEPTION(cv != Teuchos::View, std::runtime_error,
00263         Teuchos::typeName(*this) << "::initializeValues(): requires View access.");
00264     TEST_FOR_EXCEPTION(valsInit_ == true, std::runtime_error, 
00265         Teuchos::typeName(*this) << "::initializeValues(): values already initialized.");
00266     TEST_FOR_EXCEPTION(numRows_ != matrix.getNumRows() || (!isEmpty_ && isPacked_ != matrix.isPacked()), std::runtime_error,
00267         Teuchos::typeName(*this) << "::initializeValues(): matrix not compatible with previously supplied graph.");
00268     if (isEmpty_ || matrix.isEmpty() || numRows_ == 0) {
00269       isEmpty_ = true;
00270     }
00271     else if (matrix.isPacked()) {
00272       isEmpty_ = false;
00273       pbuf_vals1D_ = matrix.getPackedValues();
00274     }
00275     else {
00276       isEmpty_ = false;
00277       pbuf_vals2D_ = node_->template allocBuffer<const Scalar *>(numRows_);
00278       ArrayRCP<const Scalar *> vals2Dview = node_->template viewBufferNonConst<const Scalar *>(WriteOnly, numRows_, pbuf_vals2D_);
00279       for (size_t r=0; r < numRows_; ++r) {
00280         ArrayRCP<const Scalar> rowvals = matrix.get2DValues(r);
00281         if (rowvals != Teuchos::null) {
00282           vals2Dview[r] = rowvals.getRawPtr();
00283         }
00284         else {
00285           vals2Dview[r] = NULL;
00286         }
00287       }
00288     }
00289     
00290     valsInit_ = true;
00291     ExtractDiagonal();
00292     return Teuchos::View;
00293   }
00294 
00295   /**********************************************************************/
00296   template <class Scalar, class Ordinal, class Node>
00297   Teuchos::RCP<Node> DefaultRelaxation<Scalar,Ordinal,Node>::getNode() const { 
00298     return node_; 
00299   }
00300 
00301   /**********************************************************************/
00302   template <class Scalar, class Ordinal, class Node>
00303   void DefaultRelaxation<Scalar,Ordinal,Node>::clear() {
00304     pbuf_inds1D_      = Teuchos::null;
00305     pbuf_offsets1D_   = Teuchos::null;
00306     pbuf_vals1D_      = Teuchos::null;
00307     pbuf_inds2D_      = Teuchos::null;
00308     pbuf_vals2D_      = Teuchos::null;
00309     pbuf_numEntries_  = Teuchos::null;
00310     diagonal_         = Teuchos::null;
00311     tempJacobiVector_ = Teuchos::null;
00312     tempChebyVectorW_ = Teuchos::null;
00313     tempChebyVectorX_ = Teuchos::null;
00314     indsInit_ = false;
00315     valsInit_ = false;
00316     isPacked_ = false;
00317     isEmpty_  = false;
00318     lastNumJacobiVectors_=0;
00319     lastNumChebyVectors_=0;
00320   }
00321 
00322   /**********************************************************************/
00323   // Sets the diagonal inverted for relaxation using a Kokkos::MultiVector
00324   template <class Scalar, class Ordinal, class Node>
00325   void DefaultRelaxation<Scalar,Ordinal,Node>::setDiagonal(MultiVector<Scalar,Node> & diag){
00326     // Allocate space for diagonal
00327     diagonal_ = node_->template allocBuffer<Scalar>(numRows_);    
00328 
00329     // NTS: Copy diag over
00330 
00331     // Make it fail for now...
00332     TEST_FOR_EXCEPT(true);
00333   }
00334     
00335   /**********************************************************************/
00336   template <class Scalar, class Ordinal, class Node>
00337   void DefaultRelaxation<Scalar,Ordinal,Node>::ExtractDiagonal(){    
00338     using Teuchos::ArrayRCP;
00339     TEST_FOR_EXCEPTION(valsInit_ == false, std::runtime_error, 
00340         Teuchos::typeName(*this) << "::ExtractDiagonal(): initializeValues() hasn't been called.");
00341 
00342     // Allocate space for diagonal
00343     diagonal_ = node_->template allocBuffer<Scalar>(numRows_);    
00344       
00345     if (pbuf_vals1D_ != Teuchos::null){
00346       // Extract the diagonal for Type 1 storage
00347       typedef ExtractDiagonalOp1<Scalar,Ordinal>  Op1D;
00348       ReadyBufferHelper<Node> rbh(node_);
00349       Op1D wdp;
00350       rbh.begin();
00351       wdp.numRows = numRows_;
00352       wdp.offsets = rbh.template addConstBuffer<size_t>(pbuf_offsets1D_);
00353       wdp.inds    = rbh.template addConstBuffer<Ordinal>(pbuf_inds1D_);
00354       wdp.vals    = rbh.template addConstBuffer<Scalar>(pbuf_vals1D_);
00355       wdp.diag    = rbh.template addNonConstBuffer<Scalar>(diagonal_);
00356       rbh.end();
00357       node_->template parallel_for<Op1D>(0,numRows_,wdp);
00358     }
00359     else {
00360       // Extract the diagonal for Type 2 storage
00361       typedef ExtractDiagonalOp2<Scalar,Ordinal>  Op2D;
00362       ReadyBufferHelper<Node> rbh(node_);
00363       Op2D wdp;
00364       rbh.begin();
00365       wdp.numRows = numRows_;
00366       wdp.numEntries = rbh.template addConstBuffer<size_t>(pbuf_numEntries_);
00367       wdp.inds_beg   = rbh.template addConstBuffer<const Ordinal *>(pbuf_inds2D_);
00368       wdp.vals_beg   = rbh.template addConstBuffer<const Scalar *>(pbuf_vals2D_);
00369       wdp.diag    = rbh.template addNonConstBuffer<Scalar>(diagonal_);
00370       rbh.end();
00371       rbh.end();
00372       node_->template parallel_for<Op2D>(0,numRows_,wdp);
00373       
00374     }
00375   }
00376   /**********************************************************************/
00377   template <class Scalar, class Ordinal, class Node>
00378   bool DefaultRelaxation<Scalar,Ordinal,Node>::UpdateJacobiTemp(size_t num_vectors, size_t vec_leng) const{
00379     // Re-allocate memory if needed
00380     if(num_vectors > lastNumJacobiVectors_){
00381       tempJacobiVector_=Teuchos::null;
00382       lastNumJacobiVectors_=num_vectors;
00383       tempJacobiVector_ = node_->template allocBuffer<Scalar>(vec_leng*lastNumJacobiVectors_); 
00384       return true;
00385     }
00386     return false;
00387   }
00388 
00389   /**********************************************************************/
00390   template <class Scalar, class Ordinal, class Node>
00391   bool DefaultRelaxation<Scalar,Ordinal,Node>::UpdateChebyTemp(size_t num_vectors, size_t vec_leng) const{
00392     // Re-allocate memory if needed
00393     if(num_vectors > lastNumChebyVectors_){
00394       tempChebyVectorW_=Teuchos::null;
00395       tempChebyVectorX_=Teuchos::null;
00396       lastNumChebyVectors_=num_vectors;
00397       tempChebyVectorW_ = node_->template allocBuffer<Scalar>(numRows_*lastNumChebyVectors_);
00398       tempChebyVectorX_ = node_->template allocBuffer<Scalar>(vec_leng*lastNumChebyVectors_); 
00399       return true;
00400     }
00401     return false;
00402   }
00403 
00404 
00405   /**********************************************************************/
00406   template <class Scalar, class Ordinal, class Node>
00407   void DefaultRelaxation<Scalar,Ordinal,Node>::sweep_fine_hybrid(Scalar dampingFactor_,
00408              MultiVector<Scalar,Node> &X, const MultiVector<Scalar,Node> &B) const{
00409     typedef DefaultFineGrainHybridGaussSeidelOp1<Scalar,Ordinal>  Op1D;
00410     typedef DefaultFineGrainHybridGaussSeidelOp2<Scalar,Ordinal>  Op2D;
00411 
00412 
00413     TEST_FOR_EXCEPTION(indsInit_ == false || valsInit_ == false, std::runtime_error,
00414         Teuchos::typeName(*this) << "::sweep_fine_hybrid(): operation not fully initialized.");
00415     TEST_FOR_EXCEPT(X.getNumCols() != B.getNumCols());
00416     ReadyBufferHelper<Node> rbh(node_);
00417     if (isEmpty_ == true) {
00418       // This makes no sense to try to call ...
00419       TEST_FOR_EXCEPT(true);
00420     }
00421     else if (isPacked_ == true) {
00422       Op1D wdp;
00423       rbh.begin();
00424       wdp.numRows = numRows_;
00425       wdp.offsets = rbh.template addConstBuffer<size_t>(pbuf_offsets1D_);
00426       wdp.inds    = rbh.template addConstBuffer<Ordinal>(pbuf_inds1D_);
00427       wdp.vals    = rbh.template addConstBuffer<Scalar>(pbuf_vals1D_);
00428       wdp.x       = rbh.template addNonConstBuffer<Scalar>(X.getValuesNonConst());
00429       wdp.b       = rbh.template addConstBuffer<Scalar>(B.getValues());
00430       wdp.diag    = rbh.template addConstBuffer<Scalar>(diagonal_);
00431       wdp.damping_factor = dampingFactor_;
00432       wdp.xstride = X.getStride();
00433       wdp.bstride = B.getStride();
00434       rbh.end();
00435       const size_t numRHS = X.getNumCols();
00436       node_->template parallel_for<Op1D>(0,numRows_*numRHS,wdp);
00437     }
00438     else {
00439       Op2D wdp;
00440       rbh.begin();
00441       wdp.numRows = numRows_;
00442       wdp.numEntries = rbh.template addConstBuffer<size_t>(pbuf_numEntries_);
00443       wdp.inds_beg   = rbh.template addConstBuffer<const Ordinal *>(pbuf_inds2D_);
00444       wdp.vals_beg   = rbh.template addConstBuffer<const Scalar *>(pbuf_vals2D_);
00445       wdp.x       = rbh.template addNonConstBuffer<Scalar>(X.getValuesNonConst());
00446       wdp.b       = rbh.template addConstBuffer<Scalar>(B.getValues());
00447       wdp.diag    = rbh.template addConstBuffer<Scalar>(diagonal_);
00448       wdp.damping_factor = dampingFactor_;
00449       wdp.xstride = X.getStride();
00450       wdp.bstride = B.getStride();
00451       rbh.end();
00452       const size_t numRHS = X.getNumCols();
00453       node_->template parallel_for<Op2D>(0,numRows_*numRHS,wdp);
00454     }
00455     return;
00456   }
00457 
00458   /**********************************************************************/
00459   template <class Scalar, class Ordinal, class Node>
00460   void DefaultRelaxation<Scalar,Ordinal,Node>::sweep_coarse_hybrid(Scalar dampingFactor_,size_t num_chunks,
00461              MultiVector<Scalar,Node> &X, const MultiVector<Scalar,Node> &B) const{
00462     typedef DefaultCoarseGrainHybridGaussSeidelOp1<Scalar,Ordinal>  Op1D;
00463     typedef DefaultCoarseGrainHybridGaussSeidelOp2<Scalar,Ordinal>  Op2D;
00464 
00465 
00466     TEST_FOR_EXCEPTION(indsInit_ == false || valsInit_ == false, std::runtime_error,
00467         Teuchos::typeName(*this) << "::sweep_coarse_hybrid(): operation not fully initialized.");
00468     TEST_FOR_EXCEPT(X.getNumCols() != B.getNumCols());
00469     ReadyBufferHelper<Node> rbh(node_);   
00470 
00471     if (isEmpty_ == true) {
00472       // This makes no sense to try to call ...
00473       TEST_FOR_EXCEPT(true);
00474     }
00475     else if (isPacked_ == true) {
00476       Op1D wdp;
00477       rbh.begin();
00478       wdp.numRows = numRows_;
00479       wdp.numChunks = num_chunks;
00480       wdp.offsets = rbh.template addConstBuffer<size_t>(pbuf_offsets1D_);
00481       wdp.inds    = rbh.template addConstBuffer<Ordinal>(pbuf_inds1D_);
00482       wdp.vals    = rbh.template addConstBuffer<Scalar>(pbuf_vals1D_);
00483       wdp.x       = rbh.template addNonConstBuffer<Scalar>(X.getValuesNonConst());
00484       wdp.b       = rbh.template addConstBuffer<Scalar>(B.getValues());
00485       wdp.diag    = rbh.template addConstBuffer<Scalar>(diagonal_);
00486       wdp.damping_factor = dampingFactor_;
00487       wdp.xstride = X.getStride();
00488       wdp.bstride = B.getStride();
00489       rbh.end();
00490       const size_t numRHS = X.getNumCols();
00491       node_->template parallel_for<Op1D>(0,num_chunks*numRHS,wdp);
00492     }
00493     else {
00494       Op2D wdp;
00495       rbh.begin();
00496       wdp.numRows = numRows_;
00497       wdp.numChunks = num_chunks;
00498       wdp.numEntries = rbh.template addConstBuffer<size_t>(pbuf_numEntries_);
00499       wdp.inds_beg   = rbh.template addConstBuffer<const Ordinal *>(pbuf_inds2D_);
00500       wdp.vals_beg   = rbh.template addConstBuffer<const Scalar *>(pbuf_vals2D_);
00501       wdp.x       = rbh.template addNonConstBuffer<Scalar>(X.getValuesNonConst());
00502       wdp.b       = rbh.template addConstBuffer<Scalar>(B.getValues());
00503       wdp.diag    = rbh.template addConstBuffer<Scalar>(diagonal_);
00504       wdp.damping_factor = dampingFactor_;
00505       wdp.xstride = X.getStride();
00506       wdp.bstride = B.getStride();
00507       rbh.end();
00508       const size_t numRHS = X.getNumCols();
00509       node_->template parallel_for<Op2D>(0,num_chunks*numRHS,wdp);
00510     }
00511     return;
00512   }
00513 
00514   
00515   /********************************************************************/
00516   template <class Scalar, class Ordinal, class Node>
00517   void DefaultRelaxation<Scalar,Ordinal,Node>::sweep_jacobi(Scalar dampingFactor_,
00518              MultiVector<Scalar,Node> &X, const MultiVector<Scalar,Node> &B) const{
00519     typedef DefaultJacobiOp1<Scalar,Ordinal>  Op1D;
00520     typedef DefaultJacobiOp2<Scalar,Ordinal>  Op2D;
00521 
00522 
00523     TEST_FOR_EXCEPTION(indsInit_ == false || valsInit_ == false, std::runtime_error,
00524         Teuchos::typeName(*this) << "::sweep_jacobi(): operation not fully initialized.");
00525     TEST_FOR_EXCEPT(X.getNumCols() != B.getNumCols());
00526     ReadyBufferHelper<Node> rbh(node_);
00527 
00528     // Copy x over to the temp vector
00529     // NTS: The MultiVector copy constructor is a View. We need to do this the hard way.
00530     MultiVector<Scalar,Node> X0(X.getNode());
00531     UpdateJacobiTemp(X.getNumCols(),X.getNumRows());
00532     X0.initializeValues(X.getNumRows(),B.getNumCols(),tempJacobiVector_,B.getStride());
00533     DefaultArithmetic<MultiVector<Scalar,Node> >::Assign(X0,X);
00534 
00535     if (isEmpty_ == true) {
00536       // This makes no sense to try to call ...
00537       TEST_FOR_EXCEPT(true);
00538     }
00539     else if (isPacked_ == true) {
00540       Op1D wdp;
00541       rbh.begin();
00542       wdp.numRows = numRows_;
00543       wdp.offsets = rbh.template addConstBuffer<size_t>(pbuf_offsets1D_);
00544       wdp.inds    = rbh.template addConstBuffer<Ordinal>(pbuf_inds1D_);
00545       wdp.vals    = rbh.template addConstBuffer<Scalar>(pbuf_vals1D_);
00546       wdp.x       = rbh.template addNonConstBuffer<Scalar>(X.getValuesNonConst());
00547       wdp.x0      = rbh.template addConstBuffer<Scalar>(X0.getValues());
00548       wdp.b       = rbh.template addConstBuffer<Scalar>(B.getValues());
00549       wdp.diag    = rbh.template addConstBuffer<Scalar>(diagonal_);
00550       wdp.damping_factor = dampingFactor_;
00551       wdp.xstride = X.getStride();
00552       wdp.bstride = B.getStride();
00553       rbh.end();
00554       const size_t numRHS = X.getNumCols();
00555       node_->template parallel_for<Op1D>(0,numRows_*numRHS,wdp);
00556     }
00557     else {
00558       Op2D wdp;
00559       rbh.begin();
00560       wdp.numRows = numRows_;
00561       wdp.numEntries = rbh.template addConstBuffer<size_t>(pbuf_numEntries_);
00562       wdp.inds_beg   = rbh.template addConstBuffer<const Ordinal *>(pbuf_inds2D_);
00563       wdp.vals_beg   = rbh.template addConstBuffer<const Scalar *>(pbuf_vals2D_);
00564       wdp.x       = rbh.template addNonConstBuffer<Scalar>(X.getValuesNonConst());
00565       wdp.x0      = rbh.template addConstBuffer<Scalar>(X0.getValues());
00566       wdp.b       = rbh.template addConstBuffer<Scalar>(B.getValues());
00567       wdp.diag    = rbh.template addConstBuffer<Scalar>(diagonal_);
00568       wdp.damping_factor = dampingFactor_;
00569       wdp.xstride = X.getStride();
00570       wdp.bstride = B.getStride();
00571       rbh.end();
00572       const size_t numRHS = X.getNumCols();
00573       node_->template parallel_for<Op2D>(0,numRows_*numRHS,wdp);
00574     }
00575     return;
00576   }
00577 
00578   /********************************************************************/
00579   template <class Scalar, class Ordinal, class Node>
00580   void DefaultRelaxation<Scalar,Ordinal,Node>::setup_chebyshev(const Scalar lambda_max, const Scalar lambda_min){
00581     lmax_=lambda_max;
00582     lmin_=lambda_min;
00583 
00584     Scalar alpha = lmin_;
00585     Scalar beta  = 1.1*lmax_;
00586     delta_ = 2.0 / (beta-alpha);
00587     Scalar theta_ = 0.5 * (beta+alpha);
00588     s1_    = theta_ * delta_;
00589     oneOverTheta_=1.0 / theta_;
00590     rho_=1.0/s1_;
00591 
00592 
00593     first_cheby_iteration_=true;
00594     cheby_setup_done_=true;
00595   }
00596 
00597 
00598   /********************************************************************/
00599   template <class Scalar, class Ordinal, class Node>
00600   void DefaultRelaxation<Scalar,Ordinal,Node>::sweep_chebyshev(MultiVector<Scalar,Node> &X, const MultiVector<Scalar,Node> &B) const{
00601     typedef DefaultChebyshevOp1<Scalar,Ordinal>  Op1D;
00602     //    typedef DefaultChebyOp2<Scalar,Ordinal>  Op2D;
00603     
00604     TEST_FOR_EXCEPTION(indsInit_ == false || valsInit_ == false, std::runtime_error,
00605            Teuchos::typeName(*this) << "::sweep_jacobi(): operation not fully initialized.");
00606     TEST_FOR_EXCEPT(X.getNumCols() != B.getNumCols());
00607     //size_t xstride = X.getStride();
00608     //size_t bstride = B.getStride();
00609     //TEST_FOR_EXCEPT(xstride != bstride); //TODO JJH don't think we want this b/c X is imported, B is local
00610     TEST_FOR_EXCEPT(cheby_setup_done_ == false);
00611     
00612     ReadyBufferHelper<Node> rbh(node_);
00613     
00614     // If the number of multivectors has changed, we need to run Cheby from scratch
00615     first_cheby_iteration_= UpdateChebyTemp(X.getNumCols(),X.getNumRows()) || first_cheby_iteration_;
00616     
00617     // Copy X to X0 so we get the matvec correct
00618     MultiVector<Scalar,Node> X0(X.getNode());
00619     X0.initializeValues(X.getNumRows(),X.getNumCols(),tempChebyVectorX_,X.getStride());
00620     DefaultArithmetic<MultiVector<Scalar,Node> >::Assign(X0,X);
00621 
00622     // Update Scalars 
00623     if(!first_cheby_iteration_){
00624       rho_new_ = 1.0 / (2.0*s1_ - rho_);
00625       dtemp1_=rho_*rho_new_;
00626       dtemp2_=2*rho_new_*delta_;
00627       rho_=rho_new_;
00628     }    
00629     //    printf("    rho = %6.4e rho_new = %6.4e dtemp1=%6.4e dtemp2=%6.4e\n",rho_,rho_new_,dtemp1_,dtemp2_);
00630 
00631 
00632     
00633     if (isEmpty_ == true) {
00634       // This makes no sense to try to call ...
00635       TEST_FOR_EXCEPT(true);
00636     }
00637     else if (isPacked_ == true) {
00638       Op1D wdp;
00639       rbh.begin();
00640       wdp.numRows = numRows_;
00641       wdp.offsets = rbh.template addConstBuffer<size_t>(pbuf_offsets1D_);
00642       wdp.inds    = rbh.template addConstBuffer<Ordinal>(pbuf_inds1D_);
00643       wdp.vals    = rbh.template addConstBuffer<Scalar>(pbuf_vals1D_);
00644       wdp.x       = rbh.template addNonConstBuffer<Scalar>(X.getValuesNonConst());
00645       wdp.x0      = rbh.template addConstBuffer<Scalar>(tempChebyVectorX_);
00646       wdp.w       = rbh.template addNonConstBuffer<Scalar>(tempChebyVectorW_);
00647       wdp.b       = rbh.template addConstBuffer<Scalar>(B.getValues());
00648       wdp.diag    = rbh.template addConstBuffer<Scalar>(diagonal_);
00649       wdp.stride = X.getStride();
00650       wdp.first_step=first_cheby_iteration_;
00651       wdp.zero_initial_guess=false;//HAQ Fix me later
00652       wdp.oneOverTheta = oneOverTheta_;
00653       wdp.dtemp1 = dtemp1_;
00654       wdp.dtemp2 = dtemp2_;
00655       
00656       rbh.end();    
00657       const size_t numRHS = X.getNumCols();
00658       node_->template parallel_for<Op1D>(0,numRows_*numRHS,wdp);
00659     }
00660     else {
00661       /*      Op2D wdp;
00662         rbh.begin();
00663         wdp.numRows = numRows_;
00664         wdp.numEntries = rbh.template addConstBuffer<size_t>(pbuf_numEntries_);
00665         wdp.inds_beg   = rbh.template addConstBuffer<const Ordinal *>(pbuf_inds2D_);
00666         wdp.vals_beg   = rbh.template addConstBuffer<const Scalar *>(pbuf_vals2D_);
00667         wdp.x       = rbh.template addNonConstBuffer<Scalar>(X.getValuesNonConst());
00668         wdp.x0      = rbh.template addConstBuffer<Scalar>(X0.getValues());
00669         wdp.b       = rbh.template addConstBuffer<Scalar>(B.getValues());
00670         wdp.diag    = rbh.template addConstBuffer<Scalar>(diagonal_);
00671         wdp.damping_factor = dampingFactor_;
00672         wdp.xstride = X.getStride();
00673         wdp.bstride = B.getStride();
00674         rbh.end();
00675         const size_t numRHS = X.getNumCols();
00676         node_->template parallel_for<Op2D>(0,numRows_*numRHS,wdp);
00677       */
00678     }
00679     
00680     first_cheby_iteration_=false;
00681     return;
00682   }
00683 
00684 } // namespace Kokkos
00685 
00686 #endif /* KOKKOS_DEFAULTRELAXATION_HPP */