Kokkos Node API and Local Linear Algebra Kernels Version of the Day
Kokkos_DefaultRelaxation.hpp
00001 //@HEADER
00002 // ************************************************************************
00003 // 
00004 //          Kokkos: Node API and Parallel Node Kernels
00005 //              Copyright (2009) Sandia Corporation
00006 // 
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 // 
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //  
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //  
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00025 // 
00026 // ************************************************************************
00027 //@HEADER
00028 
00029 #ifndef KOKKOS_DEFAULTRELAXATION_HPP
00030 #define KOKKOS_DEFAULTRELAXATION_HPP
00031 
00032 #include <Teuchos_ArrayRCP.hpp>
00033 #include <Teuchos_DataAccess.hpp>
00034 #include <Teuchos_TestForException.hpp>
00035 #include <Teuchos_TypeNameTraits.hpp>
00036 #include <Teuchos_ScalarTraits.hpp>
00037 #include <stdexcept>
00038 
00039 #include "Kokkos_ConfigDefs.hpp"
00040 #include "Kokkos_DefaultKernels.hpp"
00041 #include "Kokkos_CrsMatrix.hpp" 
00042 #include "Kokkos_CrsGraph.hpp" 
00043 #include "Kokkos_MultiVector.hpp"
00044 #include "Kokkos_NodeHelpers.hpp"
00045 #include "Kokkos_DefaultArithmetic.hpp"
00046 #include "Kokkos_DefaultRelaxationKernelOps.hpp"
00047 
00048 #include <stdio.h>
00049 
00050 namespace Kokkos {
00051 
00057   template <class Scalar, class Ordinal, class Node = DefaultNode::DefaultNodeType>
00058   class DefaultRelaxation {
00059   public:
00060     typedef Scalar  ScalarType;
00061     typedef Ordinal OrdinalType;
00062     typedef Node    NodeType;
00063 
00065 
00067 
00069     DefaultRelaxation(const RCP<Node> &node = DefaultNode::getDefaultNode());
00070 
00072     ~DefaultRelaxation();
00073 
00075 
00077 
00078     
00080     RCP<Node> getNode() const;
00081 
00083 
00085 
00087 
00089     template <class GRAPH>
00090     Teuchos::DataAccess initializeStructure(GRAPH &graph, Teuchos::DataAccess cv);
00091 
00093     template <class MATRIX>
00094     Teuchos::DataAccess initializeValues(MATRIX &matrix, Teuchos::DataAccess cv);
00095 
00097     template <class SparseOps>
00098     Teuchos::DataAccess initializeStructure(CrsGraph<Ordinal,Node,SparseOps> &graph, Teuchos::DataAccess cv);
00099 
00101     template <class SparseOps>
00102     Teuchos::DataAccess initializeValues(CrsMatrix<Scalar,Ordinal,Node,SparseOps> &matrix, Teuchos::DataAccess cv);
00103 
00105     void setDiagonal(MultiVector<Scalar,Node> & diag);    
00106 
00108     void clear();
00109 
00111 
00113 
00115 
00117 
00119     void sweep_jacobi(Scalar dampingFactor_, MultiVector<Scalar,Node> &X, const MultiVector<Scalar,Node> &B) const;
00120 
00122     void sweep_fine_hybrid(Scalar dampingFactor_, MultiVector<Scalar,Node> &X, const MultiVector<Scalar,Node> &B) const;
00123 
00125     void sweep_coarse_hybrid(Scalar dampingFactor_, size_t num_chunks, MultiVector<Scalar,Node> &X, const MultiVector<Scalar,Node> &B) const;
00126 
00128     void setup_chebyshev(const Scalar lambda_max, const Scalar lambda_min);
00129 
00131     void sweep_chebyshev(MultiVector<Scalar,Node> &X, const MultiVector<Scalar,Node> &B) const;
00132 
00134 
00135   protected:
00137     DefaultRelaxation(const DefaultRelaxation& source);
00138 
00140     // NTS: This should eventually disappear into some other Kokkos class...
00141     void ExtractDiagonal();    
00142 
00143     // Update the temp vector size
00144     bool UpdateJacobiTemp(size_t num_vectors, size_t vec_leng) const;
00145     bool UpdateChebyTemp(size_t num_vectors, size_t vec_leng) const;
00146 
00147     // My node
00148     RCP<Node> node_;
00149 
00150     // we do this one of two ways: 
00151     // 1D/packed: array of offsets, pointer for ordinals, pointer for values. obviously the smallest footprint.
00152     ArrayRCP<const Ordinal> pbuf_inds1D_;
00153     ArrayRCP<const size_t>  pbuf_offsets1D_;
00154     ArrayRCP<const Scalar>  pbuf_vals1D_;
00155     // 2D: array of pointers
00156     ArrayRCP<const Ordinal *> pbuf_inds2D_;
00157     ArrayRCP<const Scalar  *> pbuf_vals2D_;
00158     ArrayRCP<size_t>          pbuf_numEntries_;
00159     
00160     // Array containing matrix diagonal for easy access
00161     ArrayRCP<Scalar> diagonal_;
00162 
00163     // Array containing a temp vector for Jacobi
00164     mutable ArrayRCP<Scalar> tempJacobiVector_;
00165     mutable size_t lastNumJacobiVectors_;
00166 
00167     // Arrays containing temp vectors for Chebyshev
00168     mutable ArrayRCP<Scalar> tempChebyVectorX_;
00169     mutable ArrayRCP<Scalar> tempChebyVectorW_;
00170     mutable size_t lastNumChebyVectors_;
00171     mutable bool cheby_setup_done_;
00172     mutable bool first_cheby_iteration_;
00173 
00174     // Constants for Chebyshev
00175     mutable Scalar lmin_,lmax_,delta_,s1_,oneOverTheta_,rho_,rho_new_,dtemp1_,dtemp2_;
00176 
00177     size_t numRows_;
00178     bool indsInit_, valsInit_, isPacked_, isEmpty_;
00179   };
00180 
00181 
00182   /**********************************************************************/
00183   template<class Scalar, class Ordinal, class Node>
00184   DefaultRelaxation<Scalar,Ordinal,Node>::DefaultRelaxation(const RCP<Node> &node)
00185   : node_(node)
00186   , lastNumJacobiVectors_(0)
00187   , lastNumChebyVectors_(0)
00188   , cheby_setup_done_(false)
00189   , first_cheby_iteration_(false)  
00190   , indsInit_(false)
00191   , valsInit_(false)
00192   , isPacked_(false) 
00193   , isEmpty_(false)
00194   {
00195     lmin_=lmax_=delta_=s1_=oneOverTheta_=rho_=rho_new_=dtemp1_=dtemp2_=Teuchos::ScalarTraits<Scalar>::zero();
00196   }
00197 
00198   /**********************************************************************/
00199   template<class Scalar, class Ordinal, class Node>
00200   DefaultRelaxation<Scalar,Ordinal,Node>::~DefaultRelaxation() {
00201     clear();
00202   }
00203 
00204   /**********************************************************************/
00205   template <class Scalar, class Ordinal, class Node>
00206   template <class GRAPH>
00207   Teuchos::DataAccess DefaultRelaxation<Scalar,Ordinal,Node>::initializeStructure(GRAPH &graph, Teuchos::DataAccess cv) {
00208     // not implemented for general sparse graphs
00209     TEST_FOR_EXCEPT(true);
00210   }
00211 
00212   /**********************************************************************/
00213   template <class Scalar, class Ordinal, class Node>
00214   template <class MATRIX>
00215   Teuchos::DataAccess DefaultRelaxation<Scalar,Ordinal,Node>::initializeValues(MATRIX &graph, Teuchos::DataAccess cv) {
00216     // not implemented for general sparse matrices
00217     TEST_FOR_EXCEPT(true);
00218   }
00219 
00220   /**********************************************************************/
00221   template <class Scalar, class Ordinal, class Node>
00222   template <class SparseOps>
00223   Teuchos::DataAccess DefaultRelaxation<Scalar,Ordinal,Node>::initializeStructure(CrsGraph<Ordinal,Node,SparseOps> &graph, Teuchos::DataAccess cv) {
00224     TEST_FOR_EXCEPTION(cv != Teuchos::View, std::runtime_error,
00225         Teuchos::typeName(*this) << "::initializeStructure(): requires View access.");
00226     TEST_FOR_EXCEPTION(indsInit_ == true, std::runtime_error, 
00227         Teuchos::typeName(*this) << "::initializeStructure(): structure already initialized.");
00228     numRows_ = graph.getNumRows();
00229     if (graph.isEmpty() || numRows_ == 0) {
00230       isEmpty_ = true;
00231     }
00232     else if (graph.isPacked()) {
00233       isEmpty_ = false;
00234       isPacked_ = true;
00235       pbuf_inds1D_    = graph.getPackedIndices();
00236       pbuf_offsets1D_ = graph.getPackedOffsets();
00237     }
00238     else {
00239       isEmpty_ = false;
00240       isPacked_ = false;
00241       pbuf_inds2D_     = node_->template allocBuffer<const Ordinal *>(numRows_);
00242       pbuf_numEntries_ = node_->template allocBuffer<size_t>(numRows_);
00243       ArrayRCP<const Ordinal *> inds2Dview = node_->template viewBufferNonConst<const Ordinal *>(WriteOnly, numRows_, pbuf_inds2D_);
00244       ArrayRCP<         size_t> numEntview = node_->template viewBufferNonConst<         size_t>(WriteOnly, numRows_, pbuf_numEntries_);
00245       for (size_t r=0; r < numRows_; ++r) {
00246         ArrayRCP<const Ordinal> rowinds = graph.get2DIndices(r);
00247         if (rowinds != null) {
00248           inds2Dview[r] = rowinds.getRawPtr();
00249           numEntview[r] = rowinds.size();
00250         }
00251         else {
00252           inds2Dview[r] = NULL;
00253           numEntview[r] = 0;
00254         }
00255       }
00256     }
00257     indsInit_ = true;
00258     return Teuchos::View;
00259   }
00260 
00261   /**********************************************************************/
00262   template <class Scalar, class Ordinal, class Node>
00263   template <class SparseOps>
00264   Teuchos::DataAccess DefaultRelaxation<Scalar,Ordinal,Node>::initializeValues(CrsMatrix<Scalar,Ordinal,Node,SparseOps> &matrix, Teuchos::DataAccess cv) {
00265     TEST_FOR_EXCEPTION(cv != Teuchos::View, std::runtime_error,
00266         Teuchos::typeName(*this) << "::initializeValues(): requires View access.");
00267     TEST_FOR_EXCEPTION(valsInit_ == true, std::runtime_error, 
00268         Teuchos::typeName(*this) << "::initializeValues(): values already initialized.");
00269     TEST_FOR_EXCEPTION(numRows_ != matrix.getNumRows() || (!isEmpty_ && isPacked_ != matrix.isPacked()), std::runtime_error,
00270         Teuchos::typeName(*this) << "::initializeValues(): matrix not compatible with previously supplied graph.");
00271     if (isEmpty_ || matrix.isEmpty() || numRows_ == 0) {
00272       isEmpty_ = true;
00273     }
00274     else if (matrix.isPacked()) {
00275       isEmpty_ = false;
00276       pbuf_vals1D_ = matrix.getPackedValues();
00277     }
00278     else {
00279       isEmpty_ = false;
00280       pbuf_vals2D_ = node_->template allocBuffer<const Scalar *>(numRows_);
00281       ArrayRCP<const Scalar *> vals2Dview = node_->template viewBufferNonConst<const Scalar *>(WriteOnly, numRows_, pbuf_vals2D_);
00282       for (size_t r=0; r < numRows_; ++r) {
00283         ArrayRCP<const Scalar> rowvals = matrix.get2DValues(r);
00284         if (rowvals != null) {
00285           vals2Dview[r] = rowvals.getRawPtr();
00286         }
00287         else {
00288           vals2Dview[r] = NULL;
00289         }
00290       }
00291     }
00292     
00293     valsInit_ = true;
00294     ExtractDiagonal();
00295     return Teuchos::View;
00296   }
00297 
00298   /**********************************************************************/
00299   template <class Scalar, class Ordinal, class Node>
00300   RCP<Node> DefaultRelaxation<Scalar,Ordinal,Node>::getNode() const { 
00301     return node_; 
00302   }
00303 
00304   /**********************************************************************/
00305   template <class Scalar, class Ordinal, class Node>
00306   void DefaultRelaxation<Scalar,Ordinal,Node>::clear() {
00307     pbuf_inds1D_      = null;
00308     pbuf_offsets1D_   = null;
00309     pbuf_vals1D_      = null;
00310     pbuf_inds2D_      = null;
00311     pbuf_vals2D_      = null;
00312     pbuf_numEntries_  = null;
00313     diagonal_         = null;
00314     tempJacobiVector_ = null;
00315     tempChebyVectorW_ = null;
00316     tempChebyVectorX_ = null;
00317     indsInit_ = false;
00318     valsInit_ = false;
00319     isPacked_ = false;
00320     isEmpty_  = false;
00321     lastNumJacobiVectors_=0;
00322     lastNumChebyVectors_=0;
00323   }
00324 
00325   /**********************************************************************/
00326   // Sets the diagonal inverted for relaxation using a MultiVector
00327   template <class Scalar, class Ordinal, class Node>
00328   void DefaultRelaxation<Scalar,Ordinal,Node>::setDiagonal(MultiVector<Scalar,Node> & diag){
00329     // Allocate space for diagonal
00330     diagonal_ = node_->template allocBuffer<Scalar>(numRows_);    
00331 
00332     // NTS: Copy diag over
00333 
00334     // Make it fail for now...
00335     TEST_FOR_EXCEPT(true);
00336   }
00337     
00338   /**********************************************************************/
00339   template <class Scalar, class Ordinal, class Node>
00340   void DefaultRelaxation<Scalar,Ordinal,Node>::ExtractDiagonal(){    
00341     TEST_FOR_EXCEPTION(valsInit_ == false, std::runtime_error, 
00342         Teuchos::typeName(*this) << "::ExtractDiagonal(): initializeValues() hasn't been called.");
00343 
00344     // Allocate space for diagonal
00345     diagonal_ = node_->template allocBuffer<Scalar>(numRows_);    
00346       
00347     if (pbuf_vals1D_ != null){
00348       // Extract the diagonal for Type 1 storage
00349       typedef ExtractDiagonalOp1<Scalar,Ordinal>  Op1D;
00350       ReadyBufferHelper<Node> rbh(node_);
00351       Op1D wdp;
00352       rbh.begin();
00353       wdp.numRows = numRows_;
00354       wdp.offsets = rbh.template addConstBuffer<size_t>(pbuf_offsets1D_);
00355       wdp.inds    = rbh.template addConstBuffer<Ordinal>(pbuf_inds1D_);
00356       wdp.vals    = rbh.template addConstBuffer<Scalar>(pbuf_vals1D_);
00357       wdp.diag    = rbh.template addNonConstBuffer<Scalar>(diagonal_);
00358       rbh.end();
00359       node_->template parallel_for<Op1D>(0,numRows_,wdp);
00360     }
00361     else {
00362       // Extract the diagonal for Type 2 storage
00363       typedef ExtractDiagonalOp2<Scalar,Ordinal>  Op2D;
00364       ReadyBufferHelper<Node> rbh(node_);
00365       Op2D wdp;
00366       rbh.begin();
00367       wdp.numRows = numRows_;
00368       wdp.numEntries = rbh.template addConstBuffer<size_t>(pbuf_numEntries_);
00369       wdp.inds_beg   = rbh.template addConstBuffer<const Ordinal *>(pbuf_inds2D_);
00370       wdp.vals_beg   = rbh.template addConstBuffer<const Scalar *>(pbuf_vals2D_);
00371       wdp.diag    = rbh.template addNonConstBuffer<Scalar>(diagonal_);
00372       rbh.end();
00373       rbh.end();
00374       node_->template parallel_for<Op2D>(0,numRows_,wdp);
00375       
00376     }
00377   }
00378   /**********************************************************************/
00379   template <class Scalar, class Ordinal, class Node>
00380   bool DefaultRelaxation<Scalar,Ordinal,Node>::UpdateJacobiTemp(size_t num_vectors, size_t vec_leng) const{
00381     // Re-allocate memory if needed
00382     if(num_vectors > lastNumJacobiVectors_){
00383       tempJacobiVector_= null;
00384       lastNumJacobiVectors_=num_vectors;
00385       tempJacobiVector_ = node_->template allocBuffer<Scalar>(vec_leng*lastNumJacobiVectors_); 
00386       return true;
00387     }
00388     return false;
00389   }
00390 
00391   /**********************************************************************/
00392   template <class Scalar, class Ordinal, class Node>
00393   bool DefaultRelaxation<Scalar,Ordinal,Node>::UpdateChebyTemp(size_t num_vectors, size_t vec_leng) const{
00394     // Re-allocate memory if needed
00395     if(num_vectors > lastNumChebyVectors_){
00396       tempChebyVectorW_= null;
00397       tempChebyVectorX_= null;
00398       lastNumChebyVectors_=num_vectors;
00399       tempChebyVectorW_ = node_->template allocBuffer<Scalar>(numRows_*lastNumChebyVectors_);
00400       tempChebyVectorX_ = node_->template allocBuffer<Scalar>(vec_leng*lastNumChebyVectors_); 
00401       return true;
00402     }
00403     return false;
00404   }
00405 
00406 
00407   /**********************************************************************/
00408   template <class Scalar, class Ordinal, class Node>
00409   void DefaultRelaxation<Scalar,Ordinal,Node>::sweep_fine_hybrid(Scalar dampingFactor_,
00410              MultiVector<Scalar,Node> &X, const MultiVector<Scalar,Node> &B) const{
00411     typedef DefaultFineGrainHybridGaussSeidelOp1<Scalar,Ordinal>  Op1D;
00412     typedef DefaultFineGrainHybridGaussSeidelOp2<Scalar,Ordinal>  Op2D;
00413 
00414 
00415     TEST_FOR_EXCEPTION(indsInit_ == false || valsInit_ == false, std::runtime_error,
00416         Teuchos::typeName(*this) << "::sweep_fine_hybrid(): operation not fully initialized.");
00417     TEST_FOR_EXCEPT(X.getNumCols() != B.getNumCols());
00418     ReadyBufferHelper<Node> rbh(node_);
00419     if (isEmpty_ == true) {
00420       // This makes no sense to try to call ...
00421       TEST_FOR_EXCEPT(true);
00422     }
00423     else if (isPacked_ == true) {
00424       Op1D wdp;
00425       rbh.begin();
00426       wdp.numRows = numRows_;
00427       wdp.offsets = rbh.template addConstBuffer<size_t>(pbuf_offsets1D_);
00428       wdp.inds    = rbh.template addConstBuffer<Ordinal>(pbuf_inds1D_);
00429       wdp.vals    = rbh.template addConstBuffer<Scalar>(pbuf_vals1D_);
00430       wdp.x       = rbh.template addNonConstBuffer<Scalar>(X.getValuesNonConst());
00431       wdp.b       = rbh.template addConstBuffer<Scalar>(B.getValues());
00432       wdp.diag    = rbh.template addConstBuffer<Scalar>(diagonal_);
00433       wdp.damping_factor = dampingFactor_;
00434       wdp.xstride = X.getStride();
00435       wdp.bstride = B.getStride();
00436       rbh.end();
00437       const size_t numRHS = X.getNumCols();
00438       node_->template parallel_for<Op1D>(0,numRows_*numRHS,wdp);
00439     }
00440     else {
00441       Op2D wdp;
00442       rbh.begin();
00443       wdp.numRows = numRows_;
00444       wdp.numEntries = rbh.template addConstBuffer<size_t>(pbuf_numEntries_);
00445       wdp.inds_beg   = rbh.template addConstBuffer<const Ordinal *>(pbuf_inds2D_);
00446       wdp.vals_beg   = rbh.template addConstBuffer<const Scalar *>(pbuf_vals2D_);
00447       wdp.x       = rbh.template addNonConstBuffer<Scalar>(X.getValuesNonConst());
00448       wdp.b       = rbh.template addConstBuffer<Scalar>(B.getValues());
00449       wdp.diag    = rbh.template addConstBuffer<Scalar>(diagonal_);
00450       wdp.damping_factor = dampingFactor_;
00451       wdp.xstride = X.getStride();
00452       wdp.bstride = B.getStride();
00453       rbh.end();
00454       const size_t numRHS = X.getNumCols();
00455       node_->template parallel_for<Op2D>(0,numRows_*numRHS,wdp);
00456     }
00457     return;
00458   }
00459 
00460   /**********************************************************************/
00461   template <class Scalar, class Ordinal, class Node>
00462   void DefaultRelaxation<Scalar,Ordinal,Node>::sweep_coarse_hybrid(Scalar dampingFactor_,size_t num_chunks,
00463              MultiVector<Scalar,Node> &X, const MultiVector<Scalar,Node> &B) const{
00464     typedef DefaultCoarseGrainHybridGaussSeidelOp1<Scalar,Ordinal>  Op1D;
00465     typedef DefaultCoarseGrainHybridGaussSeidelOp2<Scalar,Ordinal>  Op2D;
00466 
00467 
00468     TEST_FOR_EXCEPTION(indsInit_ == false || valsInit_ == false, std::runtime_error,
00469         Teuchos::typeName(*this) << "::sweep_coarse_hybrid(): operation not fully initialized.");
00470     TEST_FOR_EXCEPT(X.getNumCols() != B.getNumCols());
00471     ReadyBufferHelper<Node> rbh(node_);   
00472 
00473     if (isEmpty_ == true) {
00474       // This makes no sense to try to call ...
00475       TEST_FOR_EXCEPT(true);
00476     }
00477     else if (isPacked_ == true) {
00478       Op1D wdp;
00479       rbh.begin();
00480       wdp.numRows = numRows_;
00481       wdp.numChunks = num_chunks;
00482       wdp.offsets = rbh.template addConstBuffer<size_t>(pbuf_offsets1D_);
00483       wdp.inds    = rbh.template addConstBuffer<Ordinal>(pbuf_inds1D_);
00484       wdp.vals    = rbh.template addConstBuffer<Scalar>(pbuf_vals1D_);
00485       wdp.x       = rbh.template addNonConstBuffer<Scalar>(X.getValuesNonConst());
00486       wdp.b       = rbh.template addConstBuffer<Scalar>(B.getValues());
00487       wdp.diag    = rbh.template addConstBuffer<Scalar>(diagonal_);
00488       wdp.damping_factor = dampingFactor_;
00489       wdp.xstride = X.getStride();
00490       wdp.bstride = B.getStride();
00491       rbh.end();
00492       const size_t numRHS = X.getNumCols();
00493       node_->template parallel_for<Op1D>(0,num_chunks*numRHS,wdp);
00494     }
00495     else {
00496       Op2D wdp;
00497       rbh.begin();
00498       wdp.numRows = numRows_;
00499       wdp.numChunks = num_chunks;
00500       wdp.numEntries = rbh.template addConstBuffer<size_t>(pbuf_numEntries_);
00501       wdp.inds_beg   = rbh.template addConstBuffer<const Ordinal *>(pbuf_inds2D_);
00502       wdp.vals_beg   = rbh.template addConstBuffer<const Scalar *>(pbuf_vals2D_);
00503       wdp.x       = rbh.template addNonConstBuffer<Scalar>(X.getValuesNonConst());
00504       wdp.b       = rbh.template addConstBuffer<Scalar>(B.getValues());
00505       wdp.diag    = rbh.template addConstBuffer<Scalar>(diagonal_);
00506       wdp.damping_factor = dampingFactor_;
00507       wdp.xstride = X.getStride();
00508       wdp.bstride = B.getStride();
00509       rbh.end();
00510       const size_t numRHS = X.getNumCols();
00511       node_->template parallel_for<Op2D>(0,num_chunks*numRHS,wdp);
00512     }
00513     return;
00514   }
00515 
00516   
00517   /********************************************************************/
00518   template <class Scalar, class Ordinal, class Node>
00519   void DefaultRelaxation<Scalar,Ordinal,Node>::sweep_jacobi(Scalar dampingFactor_,
00520              MultiVector<Scalar,Node> &X, const MultiVector<Scalar,Node> &B) const{
00521     typedef DefaultJacobiOp1<Scalar,Ordinal>  Op1D;
00522     typedef DefaultJacobiOp2<Scalar,Ordinal>  Op2D;
00523 
00524 
00525     TEST_FOR_EXCEPTION(indsInit_ == false || valsInit_ == false, std::runtime_error,
00526         Teuchos::typeName(*this) << "::sweep_jacobi(): operation not fully initialized.");
00527     TEST_FOR_EXCEPT(X.getNumCols() != B.getNumCols());
00528     ReadyBufferHelper<Node> rbh(node_);
00529 
00530     // Copy x over to the temp vector
00531     // NTS: The MultiVector copy constructor is a View. We need to do this the hard way.
00532     MultiVector<Scalar,Node> X0(X.getNode());
00533     UpdateJacobiTemp(X.getNumCols(),X.getNumRows());
00534     X0.initializeValues(X.getNumRows(),B.getNumCols(),tempJacobiVector_,B.getStride());
00535     DefaultArithmetic<MultiVector<Scalar,Node> >::Assign(X0,X);
00536 
00537     if (isEmpty_ == true) {
00538       // This makes no sense to try to call ...
00539       TEST_FOR_EXCEPT(true);
00540     }
00541     else if (isPacked_ == true) {
00542       Op1D wdp;
00543       rbh.begin();
00544       wdp.numRows = numRows_;
00545       wdp.offsets = rbh.template addConstBuffer<size_t>(pbuf_offsets1D_);
00546       wdp.inds    = rbh.template addConstBuffer<Ordinal>(pbuf_inds1D_);
00547       wdp.vals    = rbh.template addConstBuffer<Scalar>(pbuf_vals1D_);
00548       wdp.x       = rbh.template addNonConstBuffer<Scalar>(X.getValuesNonConst());
00549       wdp.x0      = rbh.template addConstBuffer<Scalar>(X0.getValues());
00550       wdp.b       = rbh.template addConstBuffer<Scalar>(B.getValues());
00551       wdp.diag    = rbh.template addConstBuffer<Scalar>(diagonal_);
00552       wdp.damping_factor = dampingFactor_;
00553       wdp.xstride = X.getStride();
00554       wdp.bstride = B.getStride();
00555       rbh.end();
00556       const size_t numRHS = X.getNumCols();
00557       node_->template parallel_for<Op1D>(0,numRows_*numRHS,wdp);
00558     }
00559     else {
00560       Op2D wdp;
00561       rbh.begin();
00562       wdp.numRows = numRows_;
00563       wdp.numEntries = rbh.template addConstBuffer<size_t>(pbuf_numEntries_);
00564       wdp.inds_beg   = rbh.template addConstBuffer<const Ordinal *>(pbuf_inds2D_);
00565       wdp.vals_beg   = rbh.template addConstBuffer<const Scalar *>(pbuf_vals2D_);
00566       wdp.x       = rbh.template addNonConstBuffer<Scalar>(X.getValuesNonConst());
00567       wdp.x0      = rbh.template addConstBuffer<Scalar>(X0.getValues());
00568       wdp.b       = rbh.template addConstBuffer<Scalar>(B.getValues());
00569       wdp.diag    = rbh.template addConstBuffer<Scalar>(diagonal_);
00570       wdp.damping_factor = dampingFactor_;
00571       wdp.xstride = X.getStride();
00572       wdp.bstride = B.getStride();
00573       rbh.end();
00574       const size_t numRHS = X.getNumCols();
00575       node_->template parallel_for<Op2D>(0,numRows_*numRHS,wdp);
00576     }
00577     return;
00578   }
00579 
00580   /********************************************************************/
00581   template <class Scalar, class Ordinal, class Node>
00582   void DefaultRelaxation<Scalar,Ordinal,Node>::setup_chebyshev(const Scalar lambda_max, const Scalar lambda_min){
00583     lmax_=lambda_max;
00584     lmin_=lambda_min;
00585 
00586     Scalar alpha = lmin_;
00587     Scalar beta  = 1.1*lmax_;
00588     delta_ = 2.0 / (beta-alpha);
00589     Scalar theta_ = 0.5 * (beta+alpha);
00590     s1_    = theta_ * delta_;
00591     oneOverTheta_=1.0 / theta_;
00592     rho_=1.0/s1_;
00593 
00594 
00595     first_cheby_iteration_=true;
00596     cheby_setup_done_=true;
00597   }
00598 
00599 
00600   /********************************************************************/
00601   template <class Scalar, class Ordinal, class Node>
00602   void DefaultRelaxation<Scalar,Ordinal,Node>::sweep_chebyshev(MultiVector<Scalar,Node> &X, const MultiVector<Scalar,Node> &B) const{
00603     typedef DefaultChebyshevOp1<Scalar,Ordinal>  Op1D;
00604     //    typedef DefaultChebyOp2<Scalar,Ordinal>  Op2D;
00605     
00606     TEST_FOR_EXCEPTION(indsInit_ == false || valsInit_ == false, std::runtime_error,
00607            Teuchos::typeName(*this) << "::sweep_jacobi(): operation not fully initialized.");
00608     TEST_FOR_EXCEPT(X.getNumCols() != B.getNumCols());
00609     //size_t xstride = X.getStride();
00610     //size_t bstride = B.getStride();
00611     //TEST_FOR_EXCEPT(xstride != bstride); //TODO JJH don't think we want this b/c X is imported, B is local
00612     TEST_FOR_EXCEPT(cheby_setup_done_ == false);
00613     
00614     ReadyBufferHelper<Node> rbh(node_);
00615     
00616     // If the number of multivectors has changed, we need to run Cheby from scratch
00617     first_cheby_iteration_= UpdateChebyTemp(X.getNumCols(),X.getNumRows()) || first_cheby_iteration_;
00618     
00619     // Copy X to X0 so we get the matvec correct
00620     MultiVector<Scalar,Node> X0(X.getNode());
00621     X0.initializeValues(X.getNumRows(),X.getNumCols(),tempChebyVectorX_,X.getStride());
00622     DefaultArithmetic<MultiVector<Scalar,Node> >::Assign(X0,X);
00623 
00624     // Update Scalars 
00625     if(!first_cheby_iteration_){
00626       rho_new_ = 1.0 / (2.0*s1_ - rho_);
00627       dtemp1_=rho_*rho_new_;
00628       dtemp2_=2*rho_new_*delta_;
00629       rho_=rho_new_;
00630     }    
00631     //    printf("    rho = %6.4e rho_new = %6.4e dtemp1=%6.4e dtemp2=%6.4e\n",rho_,rho_new_,dtemp1_,dtemp2_);
00632 
00633 
00634     
00635     if (isEmpty_ == true) {
00636       // This makes no sense to try to call ...
00637       TEST_FOR_EXCEPT(true);
00638     }
00639     else if (isPacked_ == true) {
00640       Op1D wdp;
00641       rbh.begin();
00642       wdp.numRows = numRows_;
00643       wdp.offsets = rbh.template addConstBuffer<size_t>(pbuf_offsets1D_);
00644       wdp.inds    = rbh.template addConstBuffer<Ordinal>(pbuf_inds1D_);
00645       wdp.vals    = rbh.template addConstBuffer<Scalar>(pbuf_vals1D_);
00646       wdp.x       = rbh.template addNonConstBuffer<Scalar>(X.getValuesNonConst());
00647       wdp.x0      = rbh.template addConstBuffer<Scalar>(tempChebyVectorX_);
00648       wdp.w       = rbh.template addNonConstBuffer<Scalar>(tempChebyVectorW_);
00649       wdp.b       = rbh.template addConstBuffer<Scalar>(B.getValues());
00650       wdp.diag    = rbh.template addConstBuffer<Scalar>(diagonal_);
00651       wdp.stride = X.getStride();
00652       wdp.first_step=first_cheby_iteration_;
00653       wdp.zero_initial_guess=false;//HAQ Fix me later
00654       wdp.oneOverTheta = oneOverTheta_;
00655       wdp.dtemp1 = dtemp1_;
00656       wdp.dtemp2 = dtemp2_;
00657       
00658       rbh.end();    
00659       const size_t numRHS = X.getNumCols();
00660       node_->template parallel_for<Op1D>(0,numRows_*numRHS,wdp);
00661     }
00662     else {
00663       /*      Op2D wdp;
00664         rbh.begin();
00665         wdp.numRows = numRows_;
00666         wdp.numEntries = rbh.template addConstBuffer<size_t>(pbuf_numEntries_);
00667         wdp.inds_beg   = rbh.template addConstBuffer<const Ordinal *>(pbuf_inds2D_);
00668         wdp.vals_beg   = rbh.template addConstBuffer<const Scalar *>(pbuf_vals2D_);
00669         wdp.x       = rbh.template addNonConstBuffer<Scalar>(X.getValuesNonConst());
00670         wdp.x0      = rbh.template addConstBuffer<Scalar>(X0.getValues());
00671         wdp.b       = rbh.template addConstBuffer<Scalar>(B.getValues());
00672         wdp.diag    = rbh.template addConstBuffer<Scalar>(diagonal_);
00673         wdp.damping_factor = dampingFactor_;
00674         wdp.xstride = X.getStride();
00675         wdp.bstride = B.getStride();
00676         rbh.end();
00677         const size_t numRHS = X.getNumCols();
00678         node_->template parallel_for<Op2D>(0,numRows_*numRHS,wdp);
00679       */
00680     }
00681     
00682     first_cheby_iteration_=false;
00683     return;
00684   }
00685 
00686 } // namespace Kokkos
00687 
00688 #endif /* KOKKOS_DEFAULTRELAXATION_HPP */
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends