Kokkos Node API and Local Linear Algebra Kernels Version of the Day
Kokkos_DefaultSparseOps.hpp
00001 //@HEADER
00002 // ************************************************************************
00003 //
00004 //          Kokkos: Node API and Parallel Node Kernels
00005 //              Copyright (2008) Sandia Corporation
00006 //
00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00008 // the U.S. Government retains certain rights in this software.
00009 //
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00038 //
00039 // ************************************************************************
00040 //@HEADER
00041 
00042 #ifndef KOKKOS_DEFAULTSPARSEOPS_HPP
00043 #define KOKKOS_DEFAULTSPARSEOPS_HPP
00044 
00045 #include <Teuchos_CompileTimeAssert.hpp>
00046 #include <Teuchos_DataAccess.hpp>
00047 #include <Teuchos_Describable.hpp>
00048 #include <iterator>
00049 #include <Teuchos_OrdinalTraits.hpp>
00050 #include <Teuchos_SerialDenseMatrix.hpp>
00051 #include <stdexcept>
00052 
00053 #include "Kokkos_ConfigDefs.hpp"
00054 #include "Kokkos_CrsMatrixBase.hpp"
00055 #include "Kokkos_CrsGraphBase.hpp"
00056 
00057 #include "Kokkos_MultiVector.hpp"
00058 #include "Kokkos_NodeHelpers.hpp"
00059 #include "Kokkos_DefaultArithmetic.hpp"
00060 #include "Kokkos_DefaultSparseSolveKernelOps.hpp"
00061 #include "Kokkos_DefaultSparseMultiplyKernelOps.hpp"
00062 
00063 namespace Kokkos {
00064 
00065   namespace details {
00066 
00067     template <class O>
00068     struct rowPtrsInitKernel {
00069       O *rowPtrs;
00070       inline KERNEL_PREFIX void execute(int i) const {
00071         rowPtrs[i] = 0;
00072       }
00073     };
00074 
00075     template <class O, class T>
00076     struct valsInitKernel {
00077       const O *rowPtrs;
00078       T * entries;
00079       inline KERNEL_PREFIX void execute(int i) const {
00080         T *beg = entries+rowPtrs[i],
00081           *end = entries+rowPtrs[i+1];
00082         while (beg != end) {
00083           *beg++ = Teuchos::ScalarTraits<T>::zero();
00084         }
00085       }
00086     };
00087 
00088     class FirstTouchCRSAllocator {
00089       public:
00091       template <class Ordinal, class Node>
00092       static ArrayRCP<Ordinal> allocRowPtrs(const RCP<Node> &node, const ArrayView<const Ordinal> &numEntriesPerRow)
00093       {
00094         const Ordinal numrows = numEntriesPerRow.size();
00095         details::rowPtrsInitKernel<Ordinal> kern;
00096         // allocate
00097         kern.rowPtrs = new Ordinal[numrows+1];
00098         // parallel first touch
00099         node->parallel_for(0,numrows+1,kern);
00100         // encapsulate
00101         ArrayRCP<Ordinal> ptrs = arcp<Ordinal>(kern.rowPtrs,0,numrows+1,true);
00102         // compute in serial. parallelize later, perhaps; it's only O(N)
00103         ptrs[0] = 0;
00104         std::partial_sum( numEntriesPerRow.getRawPtr(), numEntriesPerRow.getRawPtr()+numEntriesPerRow.size(), ptrs.begin()+1 );
00105         return ptrs;
00106       }
00107 
00109       template <class T, class Ordinal, class Node>
00110       static ArrayRCP<T> allocStorage(const RCP<Node> &node, const ArrayView<const Ordinal> &rowPtrs)
00111       {
00112         const Ordinal totalNumEntries = *(rowPtrs.end()-1);
00113         const Ordinal numRows = rowPtrs.size() - 1;
00114         details::valsInitKernel<Ordinal,T> kern;
00115         ArrayRCP<T> vals;
00116         if (totalNumEntries > 0) {
00117           // allocate
00118           kern.entries = new T[totalNumEntries];
00119           kern.rowPtrs = rowPtrs.getRawPtr();
00120           // first touch
00121           node->parallel_for(0,numRows,kern);
00122           // encapsulate
00123           vals = arcp<T>(kern.entries,0,totalNumEntries,true);
00124         }
00125         return vals;
00126       }
00127     };
00128 
00129     class DefaultCRSAllocator {
00130       public:
00132       template <class Ordinal, class Node>
00133       static ArrayRCP<Ordinal> allocRowPtrs(const RCP<Node> &node, const ArrayView<const Ordinal> &numEntriesPerRow)
00134       {
00135         ArrayRCP<Ordinal> ptrs = arcp<Ordinal>( numEntriesPerRow.size() + 1 );
00136         ptrs[0] = 0;
00137         std::partial_sum( numEntriesPerRow.getRawPtr(), numEntriesPerRow.getRawPtr()+numEntriesPerRow.size(), ptrs.begin()+1 );
00138         return ptrs;
00139       }
00140 
00142       template <class T, class Ordinal, class Node>
00143       static ArrayRCP<T> allocStorage(const RCP<Node> &node, const ArrayView<const size_t> &rowPtrs)
00144       {
00145         const Ordinal totalNumEntries = *(rowPtrs.end()-1);
00146         // alloc data
00147         ArrayRCP<T> vals;
00148         if (totalNumEntries > 0) vals = arcp<T>(totalNumEntries);
00149         std::fill( vals.begin(), vals.end(), Teuchos::ScalarTraits<T>::zero() );
00150         return vals;
00151       }
00152     };
00153 
00154   }
00155 
00157 
00159   template <class Ordinal,
00160             class Node>
00161   class DefaultCrsGraph : public CrsGraphBase<Ordinal,Node>
00162   {
00163     public:
00164       DefaultCrsGraph(Ordinal numRows, Ordinal numCols, const RCP<Node> &node, const RCP<ParameterList> &params);
00165       bool isEmpty() const;
00166       void setStructure(const ArrayRCP<const size_t> &ptrs,
00167                         const ArrayRCP<const Ordinal> &inds);
00168       inline ArrayRCP<const size_t>  getPointers() const;
00169       inline void setSmallPointers(const ArrayRCP<const Ordinal> &ptrs);
00170       inline ArrayRCP<const Ordinal> getSmallPointers() const;
00171       inline ArrayRCP<const Ordinal> getIndices() const;
00172       inline bool isInitialized() const;
00173       inline void setMatDesc(Teuchos::EUplo uplo, Teuchos::EDiag diag);
00174       inline void getMatDesc(Teuchos::EUplo &uplo, Teuchos::EDiag &diag) const;
00175     private:
00176       ArrayRCP<const  size_t> big_ptrs_;
00177       ArrayRCP<const Ordinal> sml_ptrs_;
00178       ArrayRCP<const Ordinal> inds_;
00179       bool isInitialized_;
00180       bool isEmpty_;
00181       Teuchos::EUplo  tri_uplo_;
00182       Teuchos::EDiag unit_diag_;
00183   };
00184 
00186 
00188   template <class Scalar,
00189             class Ordinal,
00190             class Node>
00191   class DefaultCrsMatrix : public CrsMatrixBase<Scalar,Ordinal,Node>
00192   {
00193     public:
00194       DefaultCrsMatrix(const RCP<const DefaultCrsGraph<Ordinal,Node> > &graph, const RCP<ParameterList> &params);
00195       void setValues(const ArrayRCP<const Scalar> &vals);
00196       inline ArrayRCP<const Scalar> getValues() const;
00197       inline bool isInitialized() const;
00198     private:
00199       ArrayRCP<const Scalar> vals_;
00200       bool isInitialized_;
00201   };
00202 
00203   template <class Ordinal, class Node>
00204   DefaultCrsGraph<Ordinal,Node>::DefaultCrsGraph(Ordinal numRows, Ordinal numCols, const RCP<Node> &node, const RCP<ParameterList> &params)
00205   : CrsGraphBase<Ordinal,Node>(numRows,numCols,node,params)
00206   , isInitialized_(false)
00207   , isEmpty_(false)
00208   {
00209     // Make sure that users only specialize for Kokkos Node types that are host Nodes (vs. device Nodes, such as GPU Nodes)
00210     Teuchos::CompileTimeAssert<Node::isHostNode == false> cta; (void)cta;
00211   }
00212 
00213   template <class Ordinal, class Node>
00214   bool DefaultCrsGraph<Ordinal,Node>::isEmpty() const
00215   { return isEmpty_; }
00216 
00217   template <class Ordinal, class Node>
00218   void DefaultCrsGraph<Ordinal,Node>::setStructure(
00219                       const ArrayRCP<const size_t>  &ptrs,
00220                       const ArrayRCP<const Ordinal> &inds)
00221   {
00222     std::string tfecfFuncName("setStructure(ptrs,inds)");
00223     const Ordinal numrows = this->getNumRows();
00224 
00225     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00226       ptrs.is_null (),
00227       std::runtime_error,
00228       ": The input array 'ptrs' must be nonnull, even for a matrix with zero "
00229       "rows.");
00230 
00231     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00232       (size_t) ptrs.size() != (size_t) numrows+1,
00233       std::runtime_error,
00234       ": Graph input data are not coherent:\n"
00235       "-- ptrs.size() = " << ptrs.size() << " != numrows+1 = "
00236       << (numrows+1) << ".");
00237 
00238     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00239       ptrs[0] != 0,
00240       std::runtime_error,
00241       ": Graph input data are not coherent:\n"
00242       "-- ptrs[0] = " << ptrs[0] << " != 0.");
00243 
00244     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00245       (size_t) inds.size() != (size_t) ptrs[numrows],
00246       std::runtime_error,
00247       ": Graph input data are not coherent:\n"
00248       "-- inds.size() = " << inds.size() << " != ptrs[numrows="
00249       << numrows << "] = " << ptrs[numrows] << ".");
00250 
00251     const Ordinal numEntries = ptrs[numrows];
00252     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00253       isInitialized_,
00254       std::runtime_error,
00255       " matrix has already been initialized."
00256     )
00257     if (numrows == 0 || numEntries == 0) isEmpty_ = true;
00258     big_ptrs_ = ptrs;
00259     inds_ = inds;
00260     isInitialized_ = true;
00261   }
00262 
00263   template <class Ordinal, class Node>
00264   ArrayRCP<const size_t> DefaultCrsGraph<Ordinal,Node>::getPointers() const
00265   { return big_ptrs_; }
00266 
00267   template <class Ordinal, class Node>
00268   ArrayRCP<const Ordinal> DefaultCrsGraph<Ordinal,Node>::getSmallPointers() const
00269   { return sml_ptrs_; }
00270 
00271   template <class Ordinal, class Node>
00272   void DefaultCrsGraph<Ordinal,Node>::setSmallPointers(const ArrayRCP<const Ordinal> &ptrs)
00273   {
00274     sml_ptrs_ = ptrs;
00275     big_ptrs_ = null;
00276   }
00277 
00278   template <class Ordinal, class Node>
00279   ArrayRCP<const Ordinal> DefaultCrsGraph<Ordinal,Node>::getIndices() const
00280   { return inds_; }
00281 
00282   template <class Ordinal, class Node>
00283   bool DefaultCrsGraph<Ordinal,Node>::isInitialized() const
00284   { return isInitialized_; }
00285 
00286   template <class Ordinal, class Node>
00287   void DefaultCrsGraph<Ordinal,Node>::setMatDesc(Teuchos::EUplo uplo, Teuchos::EDiag diag)
00288   {
00289     tri_uplo_ = uplo;
00290     unit_diag_ = diag;
00291   }
00292 
00293   template <class Ordinal, class Node>
00294   void DefaultCrsGraph<Ordinal,Node>::getMatDesc(Teuchos::EUplo &uplo, Teuchos::EDiag &diag) const
00295   {
00296     uplo = tri_uplo_;
00297     diag = unit_diag_;
00298   }
00299 
00300   template <class Scalar, class Ordinal, class Node>
00301   DefaultCrsMatrix<Scalar,Ordinal,Node>::DefaultCrsMatrix(const RCP<const DefaultCrsGraph<Ordinal,Node> > &graph, const RCP<ParameterList> &params)
00302   : CrsMatrixBase<Scalar,Ordinal,Node>(graph,params)
00303   , isInitialized_(false)
00304   {
00305     // Make sure that users only specialize for Kokkos Node types that are host Nodes (vs. device Nodes, such as GPU Nodes)
00306     Teuchos::CompileTimeAssert<Node::isHostNode == false> cta; (void)cta;
00307   }
00308 
00309   template <class Scalar, class Ordinal, class Node>
00310   void DefaultCrsMatrix<Scalar,Ordinal,Node>::setValues(const ArrayRCP<const Scalar> &vals)
00311   {
00312     std::string tfecfFuncName("setValues(vals)");
00313     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00314         isInitialized_ == true,
00315         std::runtime_error, " matrix is already initialized."
00316     )
00317     vals_ = vals;
00318     isInitialized_ = true;
00319   }
00320 
00321   template <class Scalar, class Ordinal, class Node>
00322   ArrayRCP<const Scalar> DefaultCrsMatrix<Scalar,Ordinal,Node>::getValues() const
00323   { return vals_; }
00324 
00325   template <class Scalar, class Ordinal, class Node>
00326   bool DefaultCrsMatrix<Scalar,Ordinal,Node>::isInitialized() const
00327   {
00328     return isInitialized_;
00329   }
00330 
00339   template <class Scalar, class Ordinal, class Node, class Allocator = details::DefaultCRSAllocator>
00340   class DefaultHostSparseOps : public Teuchos::Describable {
00341   public:
00343 
00344 
00346     typedef Scalar  scalar_type;
00348     typedef Ordinal ordinal_type;
00350     typedef Node    node_type;
00352     typedef DefaultHostSparseOps<Scalar,Ordinal,Node,Allocator> sparse_ops_type;
00353 
00355     template <class O, class N>
00356     struct graph {
00357       typedef DefaultCrsGraph<O,N> graph_type;
00358     };
00359 
00361     template <class S, class O, class N>
00362     struct matrix {
00363       typedef DefaultCrsMatrix<S,O,N> matrix_type;
00364     };
00365 
00386     template <class S2>
00387     struct bind_scalar {
00388       typedef DefaultHostSparseOps<S2,Ordinal,Node,Allocator> other_type;
00389     };
00390 
00408     template <class O2>
00409     struct bind_ordinal {
00410       typedef DefaultHostSparseOps<Scalar,O2,Node,Allocator> other_type;
00411     };
00412 
00414 
00415 
00416 
00418     DefaultHostSparseOps(const RCP<Node> &node);
00419 
00421     DefaultHostSparseOps(const RCP<Node> &node, Teuchos::ParameterList& params);
00422 
00424     ~DefaultHostSparseOps();
00425 
00427 
00428 
00429 
00431     std::string description () const {
00432       using Teuchos::TypeNameTraits;
00433       std::ostringstream os;
00434       os <<  "Kokkos::DefaultHostSparseOps<"
00435          << "Scalar=" << TypeNameTraits<Scalar>::name()
00436          << ", Ordinal=" << TypeNameTraits<Ordinal>::name()
00437          << ", Node=" << TypeNameTraits<Node>::name()
00438          << ">";
00439       return os.str();
00440     }
00441 
00443     void
00444     describe (Teuchos::FancyOStream& out,
00445               const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
00446     {
00447       using Teuchos::EVerbosityLevel;
00448       using Teuchos::includesVerbLevel;
00449       using Teuchos::OSTab;
00450       using Teuchos::rcpFromRef;
00451       using Teuchos::VERB_DEFAULT;
00452       using Teuchos::VERB_NONE;
00453       using Teuchos::VERB_LOW;
00454       using Teuchos::VERB_MEDIUM;
00455       using Teuchos::VERB_HIGH;
00456       using Teuchos::VERB_EXTREME;
00457       using std::endl;
00458 
00459       // Interpret the default verbosity level as VERB_MEDIUM.
00460       const EVerbosityLevel vl =
00461         (verbLevel == VERB_DEFAULT) ? VERB_MEDIUM : verbLevel;
00462 
00463       if (vl == VERB_NONE) {
00464         return;
00465       }
00466       else if (includesVerbLevel (vl, VERB_LOW)) { // vl >= VERB_LOW
00467         out << this->description();
00468 
00469         if (includesVerbLevel (vl, VERB_MEDIUM)) { // vl >= VERB_MEDIUM
00470           out << ":" << endl;
00471           OSTab tab1 (rcpFromRef (out));
00472 
00473           out << "isInitialized_ = " << isInitialized_ << endl;
00474           if (isInitialized_) {
00475             std::string triUplo ("INVALID");
00476             if (tri_uplo_ == Teuchos::UNDEF_TRI) {
00477               triUplo = "UNDEF_TRI";
00478             }
00479             else if (tri_uplo_ == Teuchos::LOWER_TRI) {
00480               triUplo = "LOWER_TRI";
00481             }
00482             else if (tri_uplo_ == Teuchos::UPPER_TRI) {
00483               triUplo = "UPPER_TRI";
00484             }
00485             std::string unitDiag ("INVALID");
00486             if (unit_diag_ == Teuchos::NON_UNIT_DIAG) {
00487               unitDiag = "NON_UNIT_DIAG";
00488             }
00489             else if (unit_diag_ == Teuchos::UNIT_DIAG) {
00490               unitDiag = "UNIT_DIAG";
00491             }
00492 
00493             out << "numRows_ = " << numRows_ << endl
00494                 << "numCols_ = " << numCols_ << endl
00495                 << "isEmpty_ = " << isEmpty_ << endl
00496                 << "tri_uplo_ = " << triUplo << endl
00497                 << "unit_diag_ = " << unitDiag << endl;
00498             if (big_ptrs_.size() > 0) {
00499               out << "numEntries = " << big_ptrs_[big_ptrs_.size()-1] << endl;
00500             }
00501             else if (sml_ptrs_.size() > 0) {
00502               out << "numEntries = " << sml_ptrs_[sml_ptrs_.size()-1] << endl;
00503             }
00504             else {
00505               out << "numEntries = 0" << endl;
00506             }
00507 
00508             if (includesVerbLevel (vl, VERB_EXTREME)) { // vl >= VERB_EXTREME
00509               // Only print out all the sparse matrix's data in
00510               // extreme verbosity mode.
00511               out << "ptrs = [";
00512               if (big_ptrs_.size() > 0) {
00513                 std::copy (big_ptrs_.begin(), big_ptrs_.end(),
00514                            std::ostream_iterator<Ordinal> (out, " "));
00515               }
00516               else {
00517                 std::copy (sml_ptrs_.begin(), sml_ptrs_.end(),
00518                            std::ostream_iterator<Ordinal> (out, " "));
00519               }
00520               out << "]" << endl << "inds_ = [";
00521               std::copy (inds_.begin(), inds_.end(),
00522                          std::ostream_iterator<Ordinal> (out, " "));
00523               out << "]" << endl << "vals_ = [";
00524               std::copy (vals_.begin(), vals_.end(),
00525                          std::ostream_iterator<Scalar> (out, " "));
00526               out << "]" << endl;
00527             } // vl >= VERB_EXTREME
00528           } // if is initialized
00529         } // vl >= VERB_MEDIUM
00530       } // vl >= VERB_LOW
00531     }
00532 
00538     Teuchos::RCP<Teuchos::SerialDenseMatrix<int, scalar_type> >
00539     asDenseMatrix () const
00540     {
00541       using Teuchos::ArrayRCP;
00542       using Teuchos::RCP;
00543       using Teuchos::rcp;
00544       typedef Teuchos::OrdinalTraits<ordinal_type> OTO;
00545       typedef Teuchos::ScalarTraits<scalar_type> STS;
00546       typedef Teuchos::SerialDenseMatrix<int, scalar_type> dense_matrix_type;
00547 
00548       RCP<dense_matrix_type> A_ptr =
00549         rcp (new dense_matrix_type (numRows_, numCols_));
00550       dense_matrix_type& A = *A_ptr; // for notational convenience
00551 
00552       if (big_ptrs_.size() > 0) {
00553         ArrayRCP<const size_t> ptr = big_ptrs_;
00554         for (ordinal_type i = OTO::zero(); i < numRows_; ++i) {
00555           for (size_t k = ptr[i]; k < ptr[i+1]; ++k) {
00556             const ordinal_type j = inds_[k];
00557             const scalar_type A_ij = vals_[k];
00558             A(i,j) += A_ij;
00559           }
00560           if (unit_diag_ == Teuchos::UNIT_DIAG) {
00561             // Respect whatever is in the sparse matrix, even if it is wrong.
00562             // This is helpful for debugging.
00563             A(i,i) += STS::one ();
00564           }
00565         }
00566       }
00567       else if (sml_ptrs_.size() > 0) {
00568         ArrayRCP<const ordinal_type> ptr = sml_ptrs_;
00569         for (ordinal_type i = OTO::zero(); i < numRows_; ++i) {
00570           for (ordinal_type k = ptr[i]; k < ptr[i+1]; ++k) {
00571             const ordinal_type j = inds_[k];
00572             const scalar_type A_ij = vals_[k];
00573             A(i,j) += A_ij;
00574           }
00575           if (unit_diag_ == Teuchos::UNIT_DIAG) {
00576             // Respect whatever is in the sparse matrix, even if it is wrong.
00577             // This is helpful for debugging.
00578             A(i,i) += STS::one ();
00579           }
00580         }
00581       }
00582       else {
00583         TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, "Kokkos::DefaultHost"
00584           "SparseOps::asDenseMatrix: both big_ptrs_ and sml_ptrs_ are empty.  "
00585           "Please report this bug to the Kokkos developers.");
00586       }
00587       return A_ptr;
00588     }
00589 
00591 
00592 
00593 
00595     RCP<Node> getNode() const;
00596 
00598 
00599 
00600 
00602     static ArrayRCP<size_t> allocRowPtrs(const RCP<Node> &node, const ArrayView<const size_t> &numEntriesPerRow)
00603     {
00604       return Allocator::allocRowPtrs(node,numEntriesPerRow);
00605     }
00606 
00608     template <class T>
00609     static ArrayRCP<T> allocStorage(const RCP<Node> &node, const ArrayView<const size_t> &rowPtrs)
00610     {
00611       return Allocator::template allocStorage<T,size_t>(node,rowPtrs);
00612     }
00613 
00615     static void finalizeGraph(Teuchos::EUplo uplo, Teuchos::EDiag diag, DefaultCrsGraph<Ordinal,Node> &graph, const RCP<ParameterList> &params);
00616 
00618     static void finalizeMatrix(const DefaultCrsGraph<Ordinal,Node> &graph, DefaultCrsMatrix<Scalar,Ordinal,Node> &matrix, const RCP<ParameterList> &params);
00619 
00621     static void finalizeGraphAndMatrix(Teuchos::EUplo uplo, Teuchos::EDiag diag, DefaultCrsGraph<Ordinal,Node> &graph, DefaultCrsMatrix<Scalar,Ordinal,Node> &matrix, const RCP<ParameterList> &params);
00622 
00631     void setGraphAndMatrix(const RCP<const DefaultCrsGraph<Ordinal,Node> > &graph,
00632                            const RCP<const DefaultCrsMatrix<Scalar,Ordinal,Node> > &matrix);
00633 
00635 
00636 
00637 
00668     template <class DomainScalar, class RangeScalar>
00669     void
00670     multiply (Teuchos::ETransp trans,
00671               RangeScalar alpha,
00672               const MultiVector<DomainScalar,Node> &X,
00673               MultiVector<RangeScalar,Node> &Y) const;
00674 
00709     template <class DomainScalar, class RangeScalar>
00710     void
00711     multiply (Teuchos::ETransp trans,
00712               RangeScalar alpha,
00713               const MultiVector<DomainScalar,Node> &X,
00714               RangeScalar beta,
00715               MultiVector<RangeScalar,Node> &Y) const;
00716 
00737     template <class DomainScalar, class RangeScalar>
00738     void
00739     solve (Teuchos::ETransp trans,
00740            const MultiVector<DomainScalar,Node> &Y,
00741            MultiVector<RangeScalar,Node> &X) const;
00742 
00744 
00745   protected:
00747     DefaultHostSparseOps(const DefaultHostSparseOps& source);
00748 
00749     void getOffsets(ArrayRCP<const size_t> &ptrs) const {
00750       ptrs = big_ptrs_;
00751     }
00752 
00753     void getOffsets(ArrayRCP<const Ordinal> &ptrs) const {
00754       ptrs = sml_ptrs_;
00755     }
00756 
00757     template <class DomainScalar, class RangeScalar, class OffsetType>
00758     void solvePrivate(Teuchos::ETransp trans,
00759                       const MultiVector<DomainScalar,Node> &Y,
00760                             MultiVector< RangeScalar,Node> &X) const;
00761 
00762     template <class DomainScalar, class RangeScalar, class OffsetType>
00763     void multiplyPrivate(Teuchos::ETransp trans,
00764                          RangeScalar alpha,
00765                          const MultiVector<DomainScalar,Node> &X,
00766                                MultiVector<RangeScalar,Node> &Y) const;
00767 
00768     template <class DomainScalar, class RangeScalar, class OffsetType>
00769     void multiplyPrivate(Teuchos::ETransp trans,
00770                          RangeScalar alpha,
00771                          const MultiVector<DomainScalar,Node> &X,
00772                          RangeScalar beta,
00773                          MultiVector<RangeScalar,Node> &Y) const;
00774 
00776     RCP<Node> node_;
00777 
00778     // packed CRS: array of row pointers, array of indices, array of values
00779     // pointers are EITHER size_t or Ordinal; one will be null
00780     ArrayRCP<const Ordinal> inds_;
00781     ArrayRCP<const size_t>  big_ptrs_;
00782     ArrayRCP<const Ordinal> sml_ptrs_;
00783     ArrayRCP<const Scalar>  vals_;
00784 
00785     Teuchos::EUplo  tri_uplo_;
00786     Teuchos::EDiag unit_diag_;
00787 
00788     Ordinal numRows_;
00789     Ordinal numCols_;
00790     bool isInitialized_;
00791     bool isEmpty_;
00792   };
00793 
00794 
00795   template <class Scalar, class Ordinal, class Node, class Allocator>
00796   void DefaultHostSparseOps<Scalar,Ordinal,Node,Allocator>::finalizeGraph(Teuchos::EUplo uplo, Teuchos::EDiag diag, DefaultCrsGraph<Ordinal,Node> &graph, const RCP<ParameterList> &params)
00797   {
00798     graph.setMatDesc(uplo,diag);
00799     std::string FuncName("Kokkos::DefaultHostSparseOps::finalizeGraph(graph,params)");
00800     TEUCHOS_TEST_FOR_EXCEPTION(
00801         graph.isInitialized() == false,
00802         std::runtime_error, FuncName << ": graph has not yet been initialized."
00803     )
00804     // determine how many non-zeros, so that we can decide whether to reduce the offset pointer type
00805     ArrayRCP<const size_t> bigptrs = graph.getPointers();
00806     const size_t numrows = bigptrs.size() - 1,
00807                    numnz = bigptrs[numrows];
00808     if (numnz < (size_t)Teuchos::OrdinalTraits<Ordinal>::max()) {
00809       ArrayRCP<Ordinal> smallptrs = arcp<Ordinal>(numrows+1);
00810       std::copy( bigptrs.begin(), bigptrs.end(), smallptrs.begin() );
00811       graph.setSmallPointers(smallptrs);
00812     }
00813   }
00814 
00815   template <class Scalar, class Ordinal, class Node, class Allocator>
00816   void DefaultHostSparseOps<Scalar,Ordinal,Node,Allocator>::finalizeMatrix(const DefaultCrsGraph<Ordinal,Node> &graph, DefaultCrsMatrix<Scalar,Ordinal,Node> &matrix, const RCP<ParameterList> &params)
00817   {
00818     // nothing much to do here
00819     std::string FuncName("Kokkos::DefaultHostSparseOps::finalizeMatrix(graph,matrix,params)");
00820     TEUCHOS_TEST_FOR_EXCEPTION(
00821         matrix.isInitialized() == false,
00822         std::runtime_error, FuncName << ": matrix has not yet been initialized."
00823     )
00824   }
00825 
00826   template <class Scalar, class Ordinal, class Node, class Allocator>
00827   void DefaultHostSparseOps<Scalar,Ordinal,Node,Allocator>::finalizeGraphAndMatrix(Teuchos::EUplo uplo, Teuchos::EDiag diag, DefaultCrsGraph<Ordinal,Node> &graph, DefaultCrsMatrix<Scalar,Ordinal,Node> &matrix, const RCP<ParameterList> &params)
00828   {
00829     // finalize them individually; no benefit to doing them together
00830     finalizeGraph(uplo,diag,graph,params);
00831     finalizeMatrix(graph,matrix,params);
00832   }
00833 
00834 
00835   template<class Scalar, class Ordinal, class Node, class Allocator>
00836   DefaultHostSparseOps<Scalar,Ordinal,Node,Allocator>::DefaultHostSparseOps(const RCP<Node> &node)
00837   : node_(node)
00838   , numRows_(0)
00839   , numCols_(0)
00840   , isInitialized_(false)
00841   , isEmpty_(false)
00842   {
00843     // Make sure that users only specialize DefaultHostSparseOps for
00844     // Kokkos Node types that are host Nodes (vs. device Nodes, such
00845     // as GPU Nodes).
00846     Teuchos::CompileTimeAssert<Node::isHostNode == false> cta; (void)cta;
00847   }
00848 
00849   template<class Scalar, class Ordinal, class Node, class Allocator>
00850   DefaultHostSparseOps<Scalar,Ordinal,Node,Allocator>::
00851   DefaultHostSparseOps (const RCP<Node> &node, Teuchos::ParameterList& params)
00852   : node_(node)
00853   , numRows_(0)
00854   , numCols_(0)
00855   , isInitialized_(false)
00856   , isEmpty_(false)
00857   {
00858     (void) params; // Not using this yet.
00859 
00860     // Make sure that users only specialize DefaultHostSparseOps for
00861     // Kokkos Node types that are host Nodes (vs. device Nodes, such
00862     // as GPU Nodes).
00863     Teuchos::CompileTimeAssert<Node::isHostNode == false> cta; (void)cta;
00864   }
00865 
00866   template<class Scalar, class Ordinal, class Node, class Allocator>
00867   DefaultHostSparseOps<Scalar,Ordinal,Node,Allocator>::~DefaultHostSparseOps() {
00868   }
00869 
00870   template <class Scalar, class Ordinal, class Node, class Allocator>
00871   RCP<Node> DefaultHostSparseOps<Scalar,Ordinal,Node,Allocator>::getNode() const {
00872     return node_;
00873   }
00874 
00875   template <class Scalar, class Ordinal, class Node, class Allocator>
00876   void DefaultHostSparseOps<Scalar,Ordinal,Node,Allocator>::setGraphAndMatrix(
00877               const RCP<const DefaultCrsGraph<Ordinal,Node> > &opgraph,
00878               const RCP<const DefaultCrsMatrix<Scalar,Ordinal,Node> > &opmatrix)
00879   {
00880     std::string tfecfFuncName("setGraphAndMatrix(uplo,diag,graph,matrix)");
00881     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00882         isInitialized_ == true,
00883         std::runtime_error, " operators already initialized.");
00884     numRows_ = opgraph->getNumRows();
00885     numCols_ = opgraph->getNumCols();
00886     if (opgraph->isEmpty() || numRows_ == 0 || numCols_ == 0) {
00887       isEmpty_ = true;
00888     }
00889     else {
00890       isEmpty_ = false;
00891       big_ptrs_ = opgraph->getPointers();
00892       sml_ptrs_ = opgraph->getSmallPointers();
00893       inds_ = opgraph->getIndices();
00894       vals_ = opmatrix->getValues();
00895       TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00896         big_ptrs_ != null && sml_ptrs_ != null,
00897         std::logic_error, " Internal logic error: graph has small and big pointers. Please notify Kokkos team."
00898       )
00899       const size_t lenptrs = (big_ptrs_ != null ? big_ptrs_.size() : sml_ptrs_.size());
00900       // these checks just about the most that we can perform
00901       TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00902           lenptrs != (size_t)numRows_+1 || inds_.size() != vals_.size(),
00903           std::runtime_error, " matrix and graph seem incongruent.");
00904     }
00905     opgraph->getMatDesc( tri_uplo_, unit_diag_ );
00906     isInitialized_ = true;
00907   }
00908 
00909 
00910   template <class Scalar, class Ordinal, class Node, class Allocator>
00911   template <class DomainScalar, class RangeScalar, class OffsetType>
00912   void DefaultHostSparseOps<Scalar,Ordinal,Node,Allocator>::solvePrivate(Teuchos::ETransp trans,
00913                                                         const MultiVector<DomainScalar,Node> &Y,
00914                                                               MultiVector< RangeScalar,Node> &X) const
00915   {
00916     std::string tfecfFuncName("solve(trans,Y,X)");
00917     typedef DefaultSparseSolveOp<         Scalar,OffsetType,Ordinal,DomainScalar,RangeScalar>   Op;
00918     typedef DefaultSparseTransposeSolveOp<Scalar,OffsetType,Ordinal,DomainScalar,RangeScalar>  TOp;
00919     ArrayRCP<const OffsetType> ptrs;
00920     getOffsets(ptrs);
00921     ReadyBufferHelper<Node> rbh(node_);
00922     if (numRows_ == 0) {
00923       // null op
00924     }
00925     else if (isEmpty_) {
00926       TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(unit_diag_ != Teuchos::UNIT_DIAG, std::runtime_error,
00927           " solve of empty matrix only valid for an implicit unit diagonal.");
00928       // solve I * X = Y for X = Y
00929       DefaultArithmetic<MultiVector<RangeScalar,Node> >::Assign(X,Y);
00930     }
00931     else {
00932       if (trans == Teuchos::NO_TRANS) {
00933         Op wdp;
00934         rbh.begin();
00935         wdp.x       = rbh.template addNonConstBuffer<DomainScalar>(X.getValuesNonConst());
00936         wdp.y       = rbh.template addConstBuffer<RangeScalar>(Y.getValues());
00937         wdp.offs    = rbh.template addConstBuffer< OffsetType>(ptrs);
00938         wdp.inds    = rbh.template addConstBuffer<    Ordinal>(inds_);
00939         wdp.vals    = rbh.template addConstBuffer<     Scalar>(vals_);
00940         rbh.end();
00941         wdp.numRows = numRows_;
00942         wdp.unitDiag = (unit_diag_ == Teuchos::UNIT_DIAG ? true : false);
00943         wdp.upper    = ( tri_uplo_ == Teuchos::UPPER_TRI ? true : false);
00944         wdp.xstride = X.getStride();
00945         wdp.ystride = Y.getStride();
00946         wdp.numRHS  = X.getNumCols();
00947         // no parallel for you
00948         wdp.execute();
00949       }
00950       else {
00951         TOp wdp;
00952         rbh.begin();
00953         wdp.x       = rbh.template addNonConstBuffer<DomainScalar>(X.getValuesNonConst());
00954         wdp.y       = rbh.template addConstBuffer<RangeScalar>(Y.getValues());
00955         wdp.offs    = rbh.template addConstBuffer< OffsetType>(ptrs);
00956         wdp.inds    = rbh.template addConstBuffer<    Ordinal>(inds_);
00957         wdp.vals    = rbh.template addConstBuffer<     Scalar>(vals_);
00958         rbh.end();
00959         wdp.numRows = numRows_;
00960         wdp.unitDiag = (unit_diag_ == Teuchos::UNIT_DIAG ? true : false);
00961         wdp.upper    = ( tri_uplo_ == Teuchos::UPPER_TRI ? true : false);
00962         wdp.xstride = X.getStride();
00963         wdp.ystride = Y.getStride();
00964         wdp.numRHS  = X.getNumCols();
00965         // no parallel for you
00966         wdp.execute();
00967       }
00968     }
00969     return;
00970   }
00971 
00972 
00973   template <class Scalar, class Ordinal, class Node, class Allocator>
00974   template <class DomainScalar, class RangeScalar>
00975   void DefaultHostSparseOps<Scalar,Ordinal,Node,Allocator>::solve(Teuchos::ETransp trans,
00976                                                         const MultiVector<DomainScalar,Node> &Y,
00977                                                               MultiVector< RangeScalar,Node> &X) const
00978   {
00979     std::string tfecfFuncName("solve(trans,Y,X)");
00980     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00981         isInitialized_ == false,
00982         std::runtime_error, " this solve was not fully initialized."
00983     );
00984     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00985         (size_t)X.getNumCols() != (size_t)Y.getNumCols(),
00986         std::runtime_error, " Left hand side and right hand side multivectors have differing numbers of vectors."
00987     );
00988     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00989         (size_t)X.getNumRows() < (size_t)numRows_,
00990         std::runtime_error, " Left-hand-side multivector does not have enough rows. "
00991                             "Likely cause is that the column map was not provided to "
00992                             "the Tpetra::CrsMatrix in the case of an implicit unit diagonal."
00993     );
00994     if (big_ptrs_ != null) {
00995       solvePrivate<DomainScalar,RangeScalar,size_t>(trans,Y,X);
00996     }
00997     else {
00998       solvePrivate<DomainScalar,RangeScalar,Ordinal>(trans,Y,X);
00999     }
01000     return;
01001   }
01002 
01003 
01004   template <class Scalar, class Ordinal, class Node, class Allocator>
01005   template <class DomainScalar, class RangeScalar, class OffsetType>
01006   void DefaultHostSparseOps<Scalar,Ordinal,Node,Allocator>::multiplyPrivate(
01007                                 Teuchos::ETransp trans,
01008                                 RangeScalar alpha,
01009                                 const MultiVector<DomainScalar,Node> &X,
01010                                       MultiVector<RangeScalar ,Node> &Y) const
01011   {
01012     // the 1 template parameter below means that beta is not used in computations
01013     // and the output multivector enjoys overwrite semantics (i.e., will overwrite data/NaNs in Y)
01014     typedef DefaultSparseMultiplyOp<         Scalar,OffsetType,Ordinal,DomainScalar,RangeScalar, 1>  Op;
01015     typedef DefaultSparseTransposeMultiplyOp<Scalar,OffsetType,Ordinal,DomainScalar,RangeScalar, 1> TOp;
01016     ArrayRCP<const OffsetType> ptrs;
01017     getOffsets(ptrs);
01018     ReadyBufferHelper<Node> rbh(node_);
01019     if (isEmpty_ == true) {
01020       // Y <= 0 * X
01021       //   <= 0
01022       DefaultArithmetic<MultiVector<RangeScalar,Node> >::Init(Y,Teuchos::ScalarTraits<RangeScalar>::zero());
01023     }
01024     else {
01025       if (trans == Teuchos::NO_TRANS) {
01026         Op wdp;
01027         rbh.begin();
01028         wdp.alpha   = alpha;
01029         wdp.beta    = Teuchos::ScalarTraits<RangeScalar>::zero(); // not used
01030         wdp.numRows = numRows_;
01031         wdp.y       = rbh.template addNonConstBuffer<RangeScalar>(Y.getValuesNonConst());
01032         wdp.x       = rbh.template addConstBuffer<DomainScalar>(X.getValues());
01033         wdp.offs    = rbh.template addConstBuffer<  OffsetType>(ptrs);
01034         wdp.inds    = rbh.template addConstBuffer<     Ordinal>(inds_);
01035         wdp.vals    = rbh.template addConstBuffer<      Scalar>(vals_);
01036         wdp.xstride = X.getStride();
01037         wdp.ystride = Y.getStride();
01038         wdp.numRHS  = X.getNumCols();
01039         rbh.end();
01040         node_->template parallel_for<Op>(0,numRows_,wdp);
01041       }
01042       else {
01043         TOp wdp;
01044         rbh.begin();
01045         wdp.alpha   = alpha;
01046         wdp.beta    = Teuchos::ScalarTraits<RangeScalar>::zero(); // not used
01047         wdp.numRows = numRows_;
01048         wdp.numCols = Y.getNumRows();
01049         wdp.y       = rbh.template addNonConstBuffer<RangeScalar>(Y.getValuesNonConst());
01050         wdp.x       = rbh.template addConstBuffer<DomainScalar>(X.getValues());
01051         wdp.offs    = rbh.template addConstBuffer<  OffsetType>(ptrs);
01052         wdp.inds    = rbh.template addConstBuffer<     Ordinal>(inds_);
01053         wdp.vals    = rbh.template addConstBuffer<      Scalar>(vals_);
01054         wdp.xstride = X.getStride();
01055         wdp.ystride = Y.getStride();
01056         wdp.numRHS  = X.getNumCols();
01057         rbh.end();
01058         // no parallel for you
01059         wdp.execute();
01060       }
01061     }
01062     return;
01063   }
01064 
01065   template <class Scalar, class Ordinal, class Node, class Allocator>
01066   template <class DomainScalar, class RangeScalar>
01067   void DefaultHostSparseOps<Scalar,Ordinal,Node,Allocator>::multiply(
01068                                 Teuchos::ETransp trans,
01069                                 RangeScalar alpha,
01070                                 const MultiVector<DomainScalar,Node> &X,
01071                                       MultiVector<RangeScalar ,Node> &Y) const
01072   {
01073     std::string tfecfFuncName("multiply(trans,alpha,X,Y)");
01074     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
01075         isInitialized_ == false,
01076         std::runtime_error, " sparse ops not initialized.");
01077     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
01078         X.getNumCols() != Y.getNumCols(),
01079         std::runtime_error, " X and Y do not have the same number of columns.");
01080     if (big_ptrs_ != null) {
01081       multiplyPrivate<DomainScalar,RangeScalar,size_t>(trans,alpha,X,Y);
01082     }
01083     else {
01084       multiplyPrivate<DomainScalar,RangeScalar,Ordinal>(trans,alpha,X,Y);
01085     }
01086     return;
01087   }
01088 
01089 
01090   template <class Scalar, class Ordinal, class Node, class Allocator>
01091   template <class DomainScalar, class RangeScalar, class OffsetType>
01092   void DefaultHostSparseOps<Scalar,Ordinal,Node,Allocator>::multiplyPrivate(
01093                                 Teuchos::ETransp trans,
01094                                 RangeScalar alpha, const MultiVector<DomainScalar,Node> &X,
01095                                 RangeScalar beta, MultiVector<RangeScalar,Node> &Y) const
01096   {
01097     std::string tfecfFuncName("multiply(trans,alpha,X,beta,Y)");
01098     // the 0 template parameter below means that beta is used in computations
01099     // and the output multivector enjoys accumulation semantics (i.e., will not overwrite data/NaNs in Y)
01100     typedef DefaultSparseMultiplyOp<         Scalar,OffsetType,Ordinal,DomainScalar,RangeScalar, 0>  Op;
01101     typedef DefaultSparseTransposeMultiplyOp<Scalar,OffsetType,Ordinal,DomainScalar,RangeScalar, 0> TOp;
01102     ArrayRCP<const OffsetType> ptrs;
01103     getOffsets(ptrs);
01104     ReadyBufferHelper<Node> rbh(node_);
01105     if (isEmpty_ == true) {
01106       // Y <= alpha * 0 * X + beta * Y
01107       //   <= beta * Y
01108       // NOTE: this neglects NaNs in X, which don't satisfy 0*NaN == 0
01109       //       however, X and Y may be of different size, and we need entries to determine how to mix those potential NaNs in X into Y
01110       //       therefore, the best we can do is scale Y to zero. Setting Y to zero would destroy NaNs in Y, which violates the semantics of the call.
01111       DefaultArithmetic<MultiVector<RangeScalar,Node> >::Scale(Y,beta);
01112     }
01113     else {
01114       if (trans == Teuchos::NO_TRANS) {
01115         Op wdp;
01116         rbh.begin();
01117         wdp.alpha   = alpha;
01118         wdp.beta    = beta;
01119         wdp.numRows = numRows_;
01120         wdp.y       = rbh.template addNonConstBuffer<RangeScalar>(Y.getValuesNonConst());
01121         wdp.x       = rbh.template addConstBuffer<DomainScalar>(X.getValues());
01122         wdp.offs    = rbh.template addConstBuffer<  OffsetType>(ptrs);
01123         wdp.inds    = rbh.template addConstBuffer<     Ordinal>(inds_);
01124         wdp.vals    = rbh.template addConstBuffer<      Scalar>(vals_);
01125         wdp.xstride = X.getStride();
01126         wdp.ystride = Y.getStride();
01127         wdp.numRHS  = X.getNumCols();
01128         rbh.end();
01129         node_->template parallel_for<Op>(0,numRows_,wdp);
01130       }
01131       else {
01132         TOp wdp;
01133         rbh.begin();
01134         wdp.alpha   = alpha;
01135         wdp.beta    = beta;
01136         wdp.numRows = numRows_;
01137         wdp.numCols = Y.getNumRows();
01138         wdp.y       = rbh.template addNonConstBuffer<RangeScalar>(Y.getValuesNonConst());
01139         wdp.x       = rbh.template addConstBuffer<DomainScalar>(X.getValues());
01140         wdp.offs    = rbh.template addConstBuffer<  OffsetType>(ptrs);
01141         wdp.inds    = rbh.template addConstBuffer<     Ordinal>(inds_);
01142         wdp.vals    = rbh.template addConstBuffer<      Scalar>(vals_);
01143         wdp.xstride = X.getStride();
01144         wdp.ystride = Y.getStride();
01145         wdp.numRHS  = X.getNumCols();
01146         rbh.end();
01147         // no parallel for you
01148         wdp.execute();
01149       }
01150     }
01151     return;
01152   }
01153 
01154   template <class Scalar, class Ordinal, class Node, class Allocator>
01155   template <class DomainScalar, class RangeScalar>
01156   void DefaultHostSparseOps<Scalar,Ordinal,Node,Allocator>::multiply(
01157                                 Teuchos::ETransp trans,
01158                                 RangeScalar alpha, const MultiVector<DomainScalar,Node> &X,
01159                                 RangeScalar beta, MultiVector<RangeScalar,Node> &Y) const
01160   {
01161     std::string tfecfFuncName("multiply(trans,alpha,X,beta,Y)");
01162     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
01163         isInitialized_ == false,
01164         std::runtime_error, " sparse ops not initialized.");
01165     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
01166         X.getNumCols() != Y.getNumCols(),
01167         std::runtime_error, " X and Y do not have the same number of columns.");
01168     if (big_ptrs_ != null) {
01169       multiplyPrivate<DomainScalar,RangeScalar,size_t>(trans,alpha,X,beta,Y);
01170     }
01171     else {
01172       multiplyPrivate<DomainScalar,RangeScalar,Ordinal>(trans,alpha,X,beta,Y);
01173     }
01174   }
01175 
01207   template <class Ordinal, class Node, class Allocator>
01208   class DefaultHostSparseOps<void, Ordinal, Node, Allocator> : public Teuchos::Describable {
01209   public:
01211 
01212 
01214     typedef void scalar_type;
01216     typedef Ordinal ordinal_type;
01218     typedef Node node_type;
01220     typedef DefaultHostSparseOps<void, Ordinal, Node, Allocator> sparse_ops_type;
01221 
01223     template <class O, class N>
01224     struct graph {
01225       typedef DefaultCrsGraph<O,N> graph_type;
01226     };
01227 
01229     template <class S, class O, class N>
01230     struct matrix {
01231       typedef DefaultCrsMatrix<S,O,N> matrix_type;
01232     };
01233 
01247     template <class S2>
01248     struct bind_scalar {
01249       typedef DefaultHostSparseOps<S2,Ordinal,Node,Allocator> other_type;
01250     };
01251 
01262     template <class O2>
01263     struct bind_ordinal {
01264       typedef DefaultHostSparseOps<void, O2, Node, Allocator> other_type;
01265     };
01266 
01268 
01269 
01270 
01272     DefaultHostSparseOps (const RCP<Node> &node) {
01273       TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, "Someone attempted to "
01274         "instantiate Kokkos::DefaultHostSparseOps with Scalar=void.  "
01275         "This is not allowed.  "
01276         "The Scalar=void specialization exists only for its typedefs.  "
01277         "Please report this bug to the Kokkos developers.");
01278     }
01279 
01281     DefaultHostSparseOps (const RCP<Node> &node, Teuchos::ParameterList& params) {
01282       TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, "Someone attempted to "
01283         "instantiate Kokkos::DefaultHostSparseOps with Scalar=void.  "
01284         "This is not allowed.  "
01285         "The Scalar=void specialization exists only for its typedefs.  "
01286         "Please report this bug to the Kokkos developers.");
01287     }
01288 
01290     ~DefaultHostSparseOps() {
01291       // We don't throw an exception here, because throwing exceptions
01292       // in a destructor may cause the application to terminate [1].
01293       // However, it's impossible that execution will reach this
01294       // point, since all the constructors throw exceptions.
01295       //
01296       // [1] http://www.parashift.com/c++-faq/dtors-shouldnt-throw.html
01297     }
01298 
01300 
01301 
01302 
01304     std::string description () const {
01305       using Teuchos::TypeNameTraits;
01306       std::ostringstream os;
01307       os <<  "Kokkos::DefaultHostSparseOps<"
01308          << "Scalar=void"
01309          << ", Ordinal=" << TypeNameTraits<Ordinal>::name()
01310          << ", Node=" << TypeNameTraits<Node>::name()
01311          << ">";
01312       return os.str();
01313     }
01314 
01316     void
01317     describe (Teuchos::FancyOStream& out,
01318               const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
01319     {
01320       using Teuchos::includesVerbLevel;
01321       using Teuchos::VERB_DEFAULT;
01322       using Teuchos::VERB_NONE;
01323       using Teuchos::VERB_LOW;
01324       using Teuchos::VERB_MEDIUM;
01325       using Teuchos::VERB_HIGH;
01326       using Teuchos::VERB_EXTREME;
01327 
01328       // Interpret the default verbosity level as VERB_MEDIUM.
01329       const Teuchos::EVerbosityLevel vl =
01330         (verbLevel == VERB_DEFAULT) ? VERB_MEDIUM : verbLevel;
01331 
01332       if (vl == VERB_NONE) {
01333         return;
01334       }
01335       else if (includesVerbLevel (vl, VERB_LOW)) { // vl >= VERB_LOW
01336         out << this->description() << std::endl;
01337       }
01338     }
01339 
01341 
01342 
01343 
01345     RCP<Node> getNode() const {
01346       // You're not supposed to instantiate this object, so we always
01347       // return null here.
01348       return Teuchos::null;
01349     }
01350 
01352 
01353 
01354 
01362     static ArrayRCP<size_t> allocRowPtrs(const RCP<Node> &node, const ArrayView<const size_t> &numEntriesPerRow)
01363     {
01364       return Allocator::allocRowPtrs(node,numEntriesPerRow);
01365     }
01366 
01374     template <class T>
01375     static ArrayRCP<T> allocStorage(const RCP<Node> &node, const ArrayView<const size_t> &rowPtrs)
01376     {
01377       return Allocator::template allocStorage<T,size_t>(node,rowPtrs);
01378     }
01379 
01387     static void finalizeGraph(Teuchos::EUplo uplo, Teuchos::EDiag diag, DefaultCrsGraph<Ordinal,Node> &graph, const RCP<ParameterList> &params) {
01388       using Teuchos::ArrayRCP;
01389       using Teuchos::as;
01390 
01391       std::string FuncName("Kokkos::DefaultHostSparseOps::finalizeGraph(graph,params)");
01392 
01393       graph.setMatDesc(uplo,diag);
01394       TEUCHOS_TEST_FOR_EXCEPTION(graph.isInitialized() == false,
01395         std::runtime_error, FuncName << ": graph has not yet been initialized.");
01396 
01397       // determine how many non-zeros, so that we can decide whether to reduce the offset pointer type
01398       ArrayRCP<const size_t> bigptrs = graph.getPointers();
01399       const size_t numrows = bigptrs.size() - 1,
01400         numnz = bigptrs[numrows];
01401       if (numnz < as<size_t> (Teuchos::OrdinalTraits<Ordinal>::max())) {
01402         ArrayRCP<Ordinal> smallptrs = arcp<Ordinal>(numrows+1);
01403         std::copy( bigptrs.begin(), bigptrs.end(), smallptrs.begin() );
01404         graph.setSmallPointers(smallptrs);
01405       }
01406     }
01407 
01409 
01410   private:
01412     DefaultHostSparseOps(const DefaultHostSparseOps& source);
01413   };
01414 
01419 } // namespace Kokkos
01420 
01421 #endif /* KOKKOS_DEFAULTSPARSEOPS_HPP */
01422 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends