Kokkos Node API and Local Linear Algebra Kernels Version of the Day
Kokkos_CrsGraph.hpp
00001 //@HEADER
00002 // ************************************************************************
00003 // 
00004 //          Kokkos: Node API and Parallel Node Kernels
00005 //              Copyright (2004) Sandia Corporation
00006 // 
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 // 
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //  
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //  
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00025 // 
00026 // ************************************************************************
00027 //@HEADER
00028 
00029 #ifndef KOKKOS_CRSGRAPH_HPP
00030 #define KOKKOS_CRSGRAPH_HPP
00031 
00032 #include <Teuchos_RCP.hpp>
00033 #include <Teuchos_TypeNameTraits.hpp>
00034 #include <Teuchos_TestForException.hpp>
00035 #include <Teuchos_ArrayRCP.hpp>
00036 #include <Teuchos_CompileTimeAssert.hpp>
00037 
00038 #include "Kokkos_ConfigDefs.hpp"
00039 #include "Kokkos_DefaultNode.hpp"
00040 
00041 namespace Kokkos {
00042 
00043   //=========================================================================================================================
00044   // 
00045   // A host-resident CrsGraph
00046   // 
00047   //=========================================================================================================================
00048 
00052   template <class Ordinal, 
00053             class Node,
00054             class LocalMatOps>
00055   class CrsGraphHostCompute {
00056   public:
00057 
00058     typedef Ordinal               OrdinalType;
00059     typedef Node                  NodeType;
00060     typedef LocalMatOps           LocalMatOpsType;
00061 
00063 
00064 
00066     CrsGraphHostCompute(size_t numRows, const RCP<Node> &node);
00067 
00069     virtual ~CrsGraphHostCompute();
00070 
00072 
00074 
00075     
00077     RCP<Node> getNode() const;
00078 
00080 
00082 
00083 
00085     size_t getNumRows() const;
00086 
00088     size_t getNumEntries() const;
00089 
00091     bool isEmpty() const;
00092 
00094     bool isFinalized() const;
00095 
00098     bool is1DStructure() const;
00099 
00102     bool is2DStructure() const;
00103 
00105     bool isOptimized() const;
00106 
00108 
00111     void set1DStructure(ArrayRCP<Ordinal> inds, 
00112                         ArrayRCP<size_t>  rowBegs,
00113                         ArrayRCP<size_t>  rowEnds);
00114                         
00116 
00119     void set2DStructure(ArrayRCP<ArrayRCP<Ordinal> > inds,
00120                         ArrayRCP<size_t>                      numEntriesPerRow);
00121 
00123 
00132     void get1DStructure(ArrayRCP<Ordinal> &inds, 
00133                         ArrayRCP<size_t>  &rowBegs,
00134                         ArrayRCP<size_t>  &rowEnds);
00135 
00137 
00141     void get2DStructure(ArrayRCP<ArrayRCP<Ordinal> > &inds,
00142                         ArrayRCP<size_t>                      &numEntriesPerRow);
00143 
00145 
00149     void finalize(bool OptimizeStorage);
00150 
00157     template <class Scalar>
00158     void finalize(bool OptimizeStorage, ArrayRCP<ArrayRCP<Scalar> > &values2D, ArrayRCP<Scalar> &values1D);
00159 
00161     virtual void clear();
00162 
00164 
00165   protected:
00167     CrsGraphHostCompute(const CrsGraphHostCompute& sources);
00168 
00169     RCP<Node> node_;
00170     size_t numRows_, numEntries_;
00171     bool isFinalized_, isEmpty_, is1D_, is2D_, isOpt_;
00172 
00173     // 2D storage
00174     ArrayRCP<ArrayRCP<Ordinal> >  indices2D_;
00175     ArrayRCP<size_t>              numEntriesPerRow_;
00176     // 1D storage
00177     ArrayRCP<Ordinal>             indices1D_;
00178     ArrayRCP<size_t>              rowBegs_, rowEnds_;
00179   };
00180 
00181 
00182   //==============================================================================
00183   template <class Ordinal, class Node, class LocalMatOps>
00184   CrsGraphHostCompute<Ordinal,Node,LocalMatOps>::CrsGraphHostCompute(size_t numRows, const RCP<Node> &node) 
00185   : node_(node)
00186   , numRows_(numRows)
00187   {
00188     CrsGraphHostCompute<Ordinal,Node,LocalMatOps>::clear();
00189   }
00190 
00191   //==============================================================================
00192   template <class Ordinal, class Node, class LocalMatOps>
00193   CrsGraphHostCompute<Ordinal,Node,LocalMatOps>::~CrsGraphHostCompute() {
00194   }
00195 
00196   // ======= clear ===========
00197   template <class Ordinal, class Node, class LocalMatOps>
00198   void CrsGraphHostCompute<Ordinal,Node,LocalMatOps>::clear() {
00199     isFinalized_   = false;
00200     isEmpty_       = false;
00201     is1D_          = false;
00202     is2D_          = false;
00203     isOpt_         = false;
00204     numEntries_    = 0;
00205     indices2D_        = null;
00206     numEntriesPerRow_ = null;
00207     rowBegs_          = null;
00208     rowEnds_          = null;
00209     indices1D_        = null;
00210   }
00211 
00212   // ======= node ===========
00213   template <class Ordinal, class Node, class LocalMatOps>
00214   RCP<Node> CrsGraphHostCompute<Ordinal,Node,LocalMatOps>::getNode() const {
00215     return node_;
00216   }
00217 
00218   // ======= numrows ===========
00219   template <class Ordinal, class Node, class LocalMatOps>
00220   size_t CrsGraphHostCompute<Ordinal,Node,LocalMatOps>::getNumRows() const {
00221     return numRows_;
00222   }
00223 
00224   // ======= numentries ===========
00225   template <class Ordinal, class Node, class LocalMatOps>
00226   size_t CrsGraphHostCompute<Ordinal,Node,LocalMatOps>::getNumEntries() const {
00227     return numEntries_;
00228   }
00229 
00230   // ======= isempty ===========
00231   template <class Ordinal, class Node, class LocalMatOps>
00232   bool CrsGraphHostCompute<Ordinal,Node,LocalMatOps>::isEmpty() const {
00233     return isEmpty_;
00234   }
00235 
00236   // ======= isfinalized ===========
00237   template <class Ordinal, class Node, class LocalMatOps>
00238   bool CrsGraphHostCompute<Ordinal,Node,LocalMatOps>::isFinalized() const {
00239     return isFinalized_;
00240   }
00241 
00242   // ======= is1d ===========
00243   template <class Ordinal, class Node, class LocalMatOps>
00244   bool CrsGraphHostCompute<Ordinal,Node,LocalMatOps>::is1DStructure() const {
00245     return is1D_;
00246   }
00247 
00248   // ======= is2d ===========
00249   template <class Ordinal, class Node, class LocalMatOps>
00250   bool CrsGraphHostCompute<Ordinal,Node,LocalMatOps>::is2DStructure() const {
00251     return is2D_;
00252   }
00253 
00254   // ======= isopt ===========
00255   template <class Ordinal, class Node, class LocalMatOps>
00256   bool CrsGraphHostCompute<Ordinal,Node,LocalMatOps>::isOptimized() const {
00257     return isOpt_;
00258   }
00259 
00260   // ======= get 1d ===========
00261   template <class Ordinal, class Node, class LocalMatOps>
00262   void CrsGraphHostCompute<Ordinal,Node,LocalMatOps>::get1DStructure(ArrayRCP<Ordinal> &inds, 
00263                                                                      ArrayRCP<size_t>  &rowBegs,
00264                                                                      ArrayRCP<size_t>  &rowEnds)
00265   {
00266     inds = indices1D_;
00267     rowBegs = rowBegs_;
00268     rowEnds = rowEnds_;
00269   }
00270 
00271   // ======= get 2d ===========
00272   template <class Ordinal, class Node, class LocalMatOps>
00273   void CrsGraphHostCompute<Ordinal,Node,LocalMatOps>::get2DStructure(ArrayRCP<ArrayRCP<Ordinal> > &inds,
00274                                                                      ArrayRCP<size_t>                      &numEntriesPerRow) 
00275   {
00276     inds = indices2D_;
00277     numEntriesPerRow = numEntriesPerRow_;
00278   }
00279 
00280   // ======= set 1d ===========
00281   template <class Ordinal, class Node, class LocalMatOps>
00282   void CrsGraphHostCompute<Ordinal,Node,LocalMatOps>::set1DStructure(ArrayRCP<Ordinal> inds, 
00283                                                                      ArrayRCP<size_t>  rowBegs,
00284                                                                      ArrayRCP<size_t>  rowEnds)
00285   {
00286     TEST_FOR_EXCEPTION( (size_t)rowBegs.size() != numRows_+1 || (size_t)rowEnds.size() != numRows_, std::runtime_error, 
00287         Teuchos::typeName(*this) << "::set1DStructure(inds,rowBegs,rowEnds): rowBegs and rowEnds are not correctly sized.");
00288     TEST_FOR_EXCEPTION( (size_t)rowBegs[numRows_] > (size_t)inds.size(), std::runtime_error,
00289         Teuchos::typeName(*this) << "::set1DStructure(inds,rowBegs,rowEnds): rowBegs contents to not agree with inds size.");
00290     this->clear();
00291     //
00292     indices1D_ = inds;
00293     rowBegs_ = rowBegs;
00294     rowEnds_ = rowEnds;
00295     if (numRows_ > 0) {
00296       for (size_t i=0; i < this->getNumRows(); ++i) {
00297         numEntries_ += (this->rowEnds_[i] - this->rowBegs_[i]);
00298 #ifdef HAVE_KOKKOS_DEBUG
00299         // row i goes like [ begs[i] , ends[i] )
00300         // sanity        : begs[i] <= ends[i]
00301         // ordering      : begs[i] <= begs[i+1]
00302         // no overlapping: ends[i] <= begs[i+1]
00303         TEST_FOR_EXCEPTION( rowBegs_[i+1] < rowBegs_[i] || rowEnds_[i] < rowBegs_[i] || rowEnds_[i] > rowBegs_[i+1], std::runtime_error,
00304             Teuchos::typeName(*this) << "::set1DStructure(inds,rowBegs,rowEnds): ends and begs are not consistent.");
00305 #endif
00306       }
00307     }
00308     is1D_ = true;
00309   }
00310 
00311   // ======= set 2d ===========
00312   template <class Ordinal, class Node, class LocalMatOps>
00313   void CrsGraphHostCompute<Ordinal,Node,LocalMatOps>::set2DStructure(ArrayRCP<ArrayRCP<Ordinal> > inds,
00314                                                                      ArrayRCP<size_t>                      numEntriesPerRow)
00315   {
00316     TEST_FOR_EXCEPTION( (size_t)inds.size() != numRows_ || (size_t)numEntriesPerRow.size() != numRows_, std::runtime_error,
00317         Teuchos::typeName(*this) << "::set2DStructure(inds,numEntriesPerRow): numEntriesPerRow and inds must have as many entries as the number of rows specified to the constructor.");
00318     this->clear();
00319     //
00320     indices2D_  = inds;
00321     if (indices2D_ != null) {
00322       numEntriesPerRow_ = numEntriesPerRow;
00323       numEntries_ = std::accumulate(this->numEntriesPerRow_.begin(), this->numEntriesPerRow_.end(), 0);
00324 #ifdef HAVE_KOKKOS_DEBUG
00325       for (size_t i=0; i<numRows_; ++i) {
00326         TEST_FOR_EXCEPTION( (size_t)inds[i].size() < numEntriesPerRow[i], std::runtime_error,
00327             Teuchos::typeName(*this) << "::set2DStructure(): inds[" << i << "] == " << inds[i] 
00328             << " is not large enough for the specified number of entries, "
00329             << " numEntriesPerRow[" << i << "] == " << numEntriesPerRow[i]);
00330       }
00331 #endif
00332     }
00333     is2D_ = true;
00334   }
00335 
00336   // ======= finalize ===========
00337   template <class Ordinal, class Node, class LocalMatOps>
00338   void CrsGraphHostCompute<Ordinal,Node,LocalMatOps>::finalize(bool OptimizeStorage)
00339   {
00340     // allocations not done using the Node. no current need for host-based nodes, and 
00341     // this leads to incorrect behavior when we try to reuse this code from child CrsGraphDeviceCompute
00342     if (isFinalized() && !(OptimizeStorage == true && isOptimized() == false)) return;
00343     if ((indices1D_ == null && indices2D_ == null) || (this->getNumEntries() == 0)) {
00344       isEmpty_ = true;
00345     }
00346     else {
00347       isEmpty_ = false;
00348       if (OptimizeStorage) {
00349         // move into packed 1D storage
00350         if (is1DStructure() == false) {
00351           // allocate 1D storage
00352           // we these are for host use, so we'll forgo the view
00353           indices1D_ = arcp<Ordinal>(this->getNumEntries());
00354         }
00355         ArrayRCP<size_t> offsets = arcp<size_t>(numRows_+1);
00356         // copy/pack data
00357         size_t curoffset = 0;
00358         size_t curnuminds;
00359         typename ArrayRCP<Ordinal>::iterator oldinds, newinds;
00360         newinds = indices1D_.begin();
00361         for (size_t i=0; i < numRows_; ++i) {
00362           offsets[i] = curoffset;
00363           if (is1DStructure()) {
00364             curnuminds = rowEnds_[i] - rowBegs_[i];
00365             oldinds = indices1D_.begin() + rowBegs_[i];
00366           }
00367           else {
00368             curnuminds = numEntriesPerRow_[i];
00369             oldinds = indices2D_[i].begin();
00370           }
00371           std::copy(oldinds, oldinds+curnuminds, newinds);
00372           newinds += curnuminds;
00373           curoffset += curnuminds;
00374         }
00375         offsets[numRows_] = curoffset;
00376         TEST_FOR_EXCEPTION( curoffset != this->getNumEntries(), std::logic_error, 
00377             Teuchos::typeName(*this) << "::finalize(): Internal logic error. Please contact Kokkos team.");
00378         // done with the original row beg/end offsets, can point to the new overlapping one
00379         rowBegs_   = offsets;
00380         rowEnds_   = offsets.persistingView(1,numRows_);
00381         isOpt_     = true;
00382         is1D_      = true;
00383         // delete 2D storage (if there was any)
00384         is2D_      = false;
00385         numEntriesPerRow_ = null;
00386         indices2D_        = null;
00387       }
00388     }
00389     isFinalized_ = true;
00390   }
00391 
00392 
00393   // ======= finalize ===========
00394   // finalize() storage for the graph with associated matrix values
00395   // this is called from a CrsMatrix, and we're doing the finalize the for the graph and matrix at the same time, so the matrix doesn't have to.
00396   template <class Ordinal, class Node, class LocalMatOps>
00397   template <class Scalar>
00398   void CrsGraphHostCompute<Ordinal,Node,LocalMatOps>::finalize(bool OptimizeStorage, ArrayRCP<ArrayRCP<Scalar> > &values2D, ArrayRCP<Scalar> &values1D)
00399   {
00400     if (isFinalized() && !(OptimizeStorage == true && isOptimized() == false)) return;
00401     if ((indices1D_ == null && indices2D_ == null) || (this->getNumEntries() == 0)) {
00402       isEmpty_ = true;
00403     }
00404     else {
00405       isEmpty_ = false;
00406       // move into packed 1D storage
00407       if (OptimizeStorage) {
00408         if (is1DStructure() == false) {
00409           // allocate 1D storage
00410           // we know this is a host-base node, so we'll forgo the view of rowBegs_,rowEnds_
00411           indices1D_ = arcp<Ordinal>(this->getNumEntries());
00412           values1D   = arcp<Scalar >(this->getNumEntries());
00413         }
00414         ArrayRCP<size_t> offsets = arcp<size_t>(numRows_+1);
00415         // copy/pack data
00416         size_t curoffset = 0;
00417         size_t curnuminds;
00418         typename ArrayRCP<Ordinal>::iterator oldinds, newinds;
00419         typename ArrayRCP<Scalar >::iterator oldvals, newvals;
00420         newinds = indices1D_.begin();
00421         newvals = values1D.begin();
00422         for (size_t i=0; i < numRows_; ++i) {
00423           offsets[i] = curoffset;
00424           if (is1DStructure()) {
00425             curnuminds = rowEnds_[i] - rowBegs_[i];
00426             oldinds = indices1D_.begin() + rowBegs_[i];
00427             oldvals = values1D.begin() + rowBegs_[i];
00428           }
00429           else {
00430             curnuminds = numEntriesPerRow_[i];
00431             oldinds = indices2D_[i].begin();
00432             oldvals = values2D[i].begin();
00433           }
00434           std::copy(oldinds, oldinds+curnuminds, newinds);
00435           std::copy(oldvals, oldvals+curnuminds, newvals);
00436           newinds += curnuminds;
00437           newvals += curnuminds;
00438           curoffset += curnuminds;
00439         }
00440         offsets[numRows_] = curoffset;
00441         TEST_FOR_EXCEPTION( curoffset != this->getNumEntries(), std::logic_error, 
00442             Teuchos::typeName(*this) << "::finalize(): Internal logic error. Please contact Kokkos team.");
00443         // done with the original row beg/end offsets, can point to the new overlapping one
00444         rowBegs_   = offsets;
00445         rowEnds_   = offsets.persistingView(1,numRows_);
00446         is1D_      = true;
00447         isOpt_     = true;
00448         // delete 2D storage (if there was any)
00449         is2D_      = false;
00450         numEntriesPerRow_ = null;
00451         indices2D_        = null;
00452         values2D          = null;
00453       }
00454     }
00455     isFinalized_ = true;
00456   }
00457 
00458 
00459   //=========================================================================================================================
00460   // 
00461   // A device-resident CrsGraph
00462   // 
00463   //=========================================================================================================================
00464 
00465 
00473   template <class Ordinal, 
00474             class Node,
00475             class LocalMatOps>
00476   class CrsGraphDeviceCompute : public CrsGraphHostCompute<Ordinal,Node,LocalMatOps> {
00477   public:
00478 
00480 
00481 
00483     CrsGraphDeviceCompute(size_t numRows, const RCP<Node> &node);
00484 
00486     ~CrsGraphDeviceCompute();
00487 
00489 
00491 
00492 
00494 
00498     void finalize(bool OptimizeStorage);
00499 
00501 
00508     template <class Scalar>
00509     void finalize(bool OptimizeStorage, ArrayRCP<ArrayRCP<Scalar> > &values2D, ArrayRCP<Scalar> &values1D, ArrayRCP<Scalar> &d_values1D);
00510 
00512     void getDeviceBuffers(ArrayRCP<Ordinal> &d_inds, ArrayRCP<size_t> &d_offs) const;
00513 
00515     virtual void clear();
00516 
00518 
00519   protected:
00521     CrsGraphDeviceCompute(const CrsGraphDeviceCompute& sources);
00522 
00523     // device storage (always 1D packed)
00524     ArrayRCP<Ordinal> pbuf_indices_;
00525     ArrayRCP<size_t > pbuf_offsets_;
00526   };
00527 
00528   //==============================================================================
00529   template <class Ordinal, class Node, class LocalMatOps>
00530   CrsGraphDeviceCompute<Ordinal,Node,LocalMatOps>::CrsGraphDeviceCompute(size_t numRows, const RCP<Node> &node) 
00531   : CrsGraphHostCompute<Ordinal,Node,LocalMatOps>(numRows,node) 
00532   {}
00533 
00534   //===== destructor =====
00535   template <class Ordinal, class Node, class LocalMatOps>
00536   CrsGraphDeviceCompute<Ordinal,Node,LocalMatOps>::~CrsGraphDeviceCompute() 
00537   {}
00538 
00539   //===== clear =====
00540   template <class Ordinal, class Node, class LocalMatOps>
00541   void CrsGraphDeviceCompute<Ordinal,Node,LocalMatOps>::clear() { 
00542     CrsGraphHostCompute<Ordinal,Node,LocalMatOps>::clear();
00543     pbuf_indices_ = null;
00544     pbuf_offsets_ = null;
00545   }
00546 
00547   //==============================================================================
00548   template <class Ordinal, class Node, class LocalMatOps>
00549   void CrsGraphDeviceCompute<Ordinal,Node,LocalMatOps>::finalize(bool OptimizeStorage)
00550   {
00551     if (this->isFinalized() && !(OptimizeStorage == true && this->isOptimized() == false)) return;
00552     // call "normal" finalize(). this handles the re-structuring of data. below, we will handle the movement to device.
00553     CrsGraphHostCompute<Ordinal,Node,LocalMatOps>::finalize(OptimizeStorage);
00554     // all we're doing here now is copying data to device.
00555     // copy into a 1D structure on the device, regardless of host format
00556     if (this->isEmpty()) {
00557       pbuf_indices_ = null;
00558       pbuf_offsets_ = null;
00559     }
00560     else {
00561       // allocate space on the device and copy data there, in a packed format
00562       pbuf_offsets_ = this->getNode()->template allocBuffer<size_t>(this->getNumRows()+1);
00563       pbuf_indices_ = this->getNode()->template allocBuffer<Ordinal>(this->getNumEntries());
00564       if (this->isOptimized()) {
00565         // should be packed now; single copy should do, and offsets are rowBegs_
00566         this->getNode()->template copyToBuffer<size_t >(this->getNumRows()+1, this->rowBegs_(),   pbuf_offsets_);
00567         this->getNode()->template copyToBuffer<Ordinal>(this->getNumEntries(),this->indices1D_(), pbuf_indices_);
00568       }
00569       else {
00570         ArrayRCP<size_t > view_offsets = this->getNode()->template viewBufferNonConst<size_t >(WriteOnly,pbuf_offsets_.size(),pbuf_offsets_);
00571         ArrayRCP<Ordinal> view_indices = this->getNode()->template viewBufferNonConst<Ordinal>(WriteOnly,pbuf_indices_.size(),pbuf_indices_);
00572         typename ArrayRCP<Ordinal>::iterator oldinds, newinds;
00573         newinds = view_indices.begin();
00574         size_t curnuminds, curoffset = 0;
00575         for (size_t i=0; i < this->getNumRows(); ++i) {
00576           view_offsets[i] = curoffset;
00577           if (this->is1DStructure()) {
00578             curnuminds = this->rowEnds_[i] - this->rowBegs_[i];
00579             oldinds = this->indices1D_.begin() + this->rowBegs_[i];
00580           }
00581           else {
00582             curnuminds = this->numEntriesPerRow_[i];
00583             oldinds = this->indices2D_[i].begin();
00584           }
00585           std::copy(oldinds, oldinds+curnuminds, newinds);
00586           newinds += curnuminds;
00587           curoffset += curnuminds;
00588         }
00589         view_offsets[this->getNumRows()] = curoffset;
00590         TEST_FOR_EXCEPTION( curoffset != this->getNumEntries(), std::logic_error, 
00591             Teuchos::typeName(*this) << "::finalize(): Internal logic error. Please contact Kokkos team.");
00592         view_offsets = null;
00593         view_indices = null;
00594       }
00595     }
00596   }
00597 
00598   // ======= get device ===========
00599   template <class Ordinal, class Node, class LocalMatOps>
00600   void CrsGraphDeviceCompute<Ordinal,Node,LocalMatOps>::getDeviceBuffers(ArrayRCP<Ordinal> &d_inds, ArrayRCP<size_t> &d_offs) const
00601   {
00602     d_inds = pbuf_indices_;
00603     d_offs = pbuf_offsets_;
00604   }
00605 
00606 
00607   //==============================================================================
00608   template <class Ordinal, class Node, class LocalMatOps>
00609   template <class Scalar>
00610   void CrsGraphDeviceCompute<Ordinal,Node,LocalMatOps>::finalize(bool OptimizeStorage, ArrayRCP<ArrayRCP<Scalar> > &h_vals2D, ArrayRCP<Scalar> &h_vals1D, ArrayRCP<Scalar> &d_valsPacked) 
00611   {
00612     if (this->isFinalized() && !(OptimizeStorage == true && this->isOptimized() == false)) return;
00613     // call "normal" finalize(). this handles the re-structuring of data. below, we will handle the movement to device.
00614     CrsGraphHostCompute<Ordinal,Node,LocalMatOps>::finalize(OptimizeStorage,h_vals2D,h_vals1D);
00615     if (this->isEmpty()) {
00616       pbuf_indices_ = null;
00617       pbuf_offsets_ = null;
00618       d_valsPacked  = null;
00619     }
00620     else {
00621       // allocate space on the device and copy data there, in a packed format
00622       pbuf_offsets_ = this->getNode()->template allocBuffer<size_t>(this->getNumRows()+1);
00623       pbuf_indices_ = this->getNode()->template allocBuffer<Ordinal>(this->getNumEntries());
00624       d_valsPacked  = this->getNode()->template allocBuffer<Scalar >(this->getNumEntries());
00625       if (this->isOptimized()) {
00626         // should be packed now; single copy should do, and offsets are rowBegs_
00627         this->getNode()->template copyToBuffer<size_t >(this->getNumRows()+1 ,  this->rowBegs_(), pbuf_offsets_);
00628         this->getNode()->template copyToBuffer<Ordinal>(this->getNumEntries(),this->indices1D_(), pbuf_indices_);
00629         this->getNode()->template copyToBuffer<Scalar >(this->getNumEntries(),        h_vals1D(), d_valsPacked);
00630       }
00631       else {
00632         ArrayRCP<size_t > view_offsets = this->getNode()->template viewBufferNonConst<size_t >(WriteOnly,pbuf_offsets_.size(),pbuf_offsets_);
00633         ArrayRCP<Ordinal> view_indices = this->getNode()->template viewBufferNonConst<Ordinal>(WriteOnly,pbuf_indices_.size(),pbuf_indices_);
00634         ArrayRCP<Scalar >  view_values = this->getNode()->template viewBufferNonConst<Scalar >(WriteOnly, d_valsPacked.size(),d_valsPacked);
00635         typename ArrayRCP<Ordinal>::iterator oldinds, newinds;
00636         typename ArrayRCP<Scalar >::iterator oldvals, newvals;
00637         newinds = view_indices.begin();
00638         newvals = view_values.begin();
00639         size_t curnuminds, curoffset = 0;
00640         for (size_t i=0; i < this->getNumRows(); ++i) {
00641           view_offsets[i] = curoffset;
00642           if (this->is1DStructure()) {
00643             curnuminds = this->rowEnds_[i] - this->rowBegs_[i];
00644             oldinds = this->indices1D_.begin() + this->rowBegs_[i];
00645             oldvals = h_vals1D.begin() + this->rowBegs_[i];
00646           }
00647           else {
00648             curnuminds = this->numEntriesPerRow_[i];
00649             oldinds = this->indices2D_[i].begin();
00650             oldvals = h_vals2D[i].begin();
00651           }
00652           std::copy(oldinds, oldinds+curnuminds, newinds);
00653           std::copy(oldvals, oldvals+curnuminds, newvals);
00654           newinds += curnuminds;
00655           newvals += curnuminds;
00656           curoffset += curnuminds;
00657         }
00658         view_offsets[this->getNumRows()] = curoffset;
00659         TEST_FOR_EXCEPTION( curoffset != this->getNumEntries(), std::logic_error, 
00660             Teuchos::typeName(*this) << "::finalize(): Internal logic error. Please contact Kokkos team.");
00661         view_offsets = null;
00662         view_indices = null;
00663         view_values  = null;
00664       }
00665     }
00666   }
00667 
00668 
00669 
00670   //=========================================================================================================================
00671   // 
00672   // A first-touch allocation host-resident CrsGraph
00673   // 
00674   //=========================================================================================================================
00675 
00679   template <class Ordinal, 
00680             class Node,
00681             class LocalMatOps>
00682   class FirstTouchHostCrsGraph : public CrsGraphHostCompute<Ordinal,Node,LocalMatOps> {
00683   public:
00684 
00685     typedef typename CrsGraphHostCompute<Ordinal,Node,LocalMatOps>::OrdinalType      OrdinalType;
00686     typedef typename CrsGraphHostCompute<Ordinal,Node,LocalMatOps>::NodeType         NodeType;
00687     typedef typename CrsGraphHostCompute<Ordinal,Node,LocalMatOps>::LocalMatOpsType  LocalMatOpsType;
00688 
00690 
00691 
00693     FirstTouchHostCrsGraph(size_t numRows, const RCP<Node> &node);
00694 
00696     virtual ~FirstTouchHostCrsGraph();
00697 
00699 
00701 
00702 
00704 
00708     void finalize(bool OptimizeStorage);
00709 
00716     template <class Scalar>
00717     void finalize(bool OptimizeStorage, ArrayRCP<ArrayRCP<Scalar> > &values2D, ArrayRCP<Scalar> &values1D);
00718 
00720 
00721   protected:
00723     FirstTouchHostCrsGraph(const FirstTouchHostCrsGraph& sources);
00724   };
00725 
00726 #ifndef KERNEL_PREFIX
00727 #define KERNEL_PREFIX
00728 #endif
00729 
00730 
00731 
00732 
00733 
00734 
00735 
00736 
00737   template <class T>
00738   struct FirstTouchCopyIndicesKernel {
00739     const size_t * numEntriesPerRow;
00740     const size_t * offsets1D;
00741     T * data1D;
00742     const ArrayRCP<T> * data2D;
00743 
00744     inline KERNEL_PREFIX void execute(size_t row) {
00745       const size_t rowNumInds = numEntriesPerRow[row];
00746       const T* const oldinds = data2D[row].getRawPtr();
00747       T* const newinds = data1D + offsets1D[row];
00748       std::copy(oldinds, oldinds+rowNumInds, newinds);
00749     }
00750   };
00751 
00752   //==============================================================================
00753   template <class Ordinal, class Node, class LocalMatOps>
00754   FirstTouchHostCrsGraph<Ordinal,Node,LocalMatOps>::FirstTouchHostCrsGraph(size_t numRows, const RCP<Node> &node) 
00755   : CrsGraphHostCompute<Ordinal,Node,LocalMatOps>(numRows,node) 
00756   {
00757     Teuchos::CompileTimeAssert<Node::isHostNode == false> cta; (void)cta;
00758   }
00759 
00760   //==============================================================================
00761   template <class Ordinal, class Node, class LocalMatOps>
00762   FirstTouchHostCrsGraph<Ordinal,Node,LocalMatOps>::~FirstTouchHostCrsGraph() {}
00763 
00764   // ======= finalize ===========
00765   template <class Ordinal, class Node, class LocalMatOps>
00766   void FirstTouchHostCrsGraph<Ordinal,Node,LocalMatOps>::finalize(bool OptimizeStorage)
00767   {
00768     // allocations not done using the Node. no current need for host-based nodes, and 
00769     // this leads to incorrect behavior when we try to reuse this code from child CrsGraphDeviceCompute
00770     if (this->isFinalized() && !(OptimizeStorage == true && this->isOptimized() == false)) return;
00771     if ((this->indices1D_ == null && this->indices2D_ == null) || (this->getNumEntries() == 0)) {
00772       this->isEmpty_ = true;
00773     }
00774     else {
00775       this->isEmpty_ = false;
00776       if (OptimizeStorage) {
00777         // move into packed 1D storage
00778         ArrayRCP<size_t> offsets = arcp<size_t>(this->numRows_+1);
00779         if (this->is2DStructure() == true) {
00780           // 2D to 1D packed: first-touch allocation
00781           // allocate 1D storage
00782           this->indices1D_ = arcp<Ordinal>(this->getNumEntries());
00783           // compute offset array on host thread
00784           {
00785             size_t curoffset = 0;
00786             for (size_t i=0; i < this->numRows_; ++i) {
00787               offsets[i] = curoffset;
00788               curoffset += this->numEntriesPerRow_[i];
00789             }
00790             offsets[this->numRows_] = curoffset;
00791             TEST_FOR_EXCEPTION( curoffset != this->getNumEntries(), std::logic_error, 
00792                 Teuchos::typeName(*this) << "::finalize(): Internal logic error. Please contact Kokkos team.");
00793           }
00794           FirstTouchCopyIndicesKernel<Ordinal> kern;
00795           kern.offsets1D = offsets.getRawPtr();
00796           kern.numEntriesPerRow = this->numEntriesPerRow_.getRawPtr();
00797           kern.data1D = this->indices1D_.getRawPtr();
00798           kern.data2D = this->indices2D_.getRawPtr();
00799           this->getNode()->template parallel_for<FirstTouchCopyIndicesKernel<Ordinal> >(0,this->numRows_,kern);
00800         }
00801         else {
00802           // 1D to 1D packed: no first-touch
00803           // copy/pack data
00804           size_t curoffset = 0;
00805           size_t curnuminds;
00806           typename ArrayRCP<Ordinal>::iterator oldinds, newinds;
00807           newinds = this->indices1D_.begin();
00808           for (size_t i=0; i < this->numRows_; ++i) {
00809             offsets[i] = curoffset;
00810             curnuminds = this->rowEnds_[i] - this->rowBegs_[i];
00811             oldinds = this->indices1D_.begin() + this->rowBegs_[i];
00812             std::copy(oldinds, oldinds+curnuminds, newinds);
00813             newinds += curnuminds;
00814             curoffset += curnuminds;
00815           }
00816           offsets[this->numRows_] = curoffset;
00817           TEST_FOR_EXCEPTION( curoffset != this->getNumEntries(), std::logic_error, 
00818               Teuchos::typeName(*this) << "::finalize(): Internal logic error. Please contact Kokkos team.");
00819         }
00820         // done with the original row beg/end offsets, can point to the new overlapping one
00821         this->rowBegs_   = offsets;
00822         this->rowEnds_   = offsets.persistingView(1,this->numRows_);
00823         this->isOpt_     = true;
00824         this->is1D_      = true;
00825         // delete 2D storage (if any)
00826         this->is2D_      = false;
00827         this->numEntriesPerRow_ = null;
00828         this->indices2D_        = null;
00829       }
00830     }
00831     this->isFinalized_ = true;
00832   }
00833 
00834 
00835   // ======= finalize ===========
00836   // finalize() storage for the graph with associated matrix values
00837   // this is called from a CrsMatrix, and we're doing the finalize the for the graph and matrix at the same time, so the matrix doesn't have to.
00838   template <class Ordinal, class Node, class LocalMatOps>
00839   template <class Scalar>
00840   void FirstTouchHostCrsGraph<Ordinal,Node,LocalMatOps>::finalize(bool OptimizeStorage, ArrayRCP<ArrayRCP<Scalar> > &values2D, ArrayRCP<Scalar> &values1D)
00841   {
00842     if (this->isFinalized() && !(OptimizeStorage == true && this->isOptimized() == false)) return;
00843     if ((this->indices1D_ == null && this->indices2D_ == null) || (this->getNumEntries() == 0)) {
00844       this->isEmpty_ = true;
00845     }
00846     else {
00847       this->isEmpty_ = false;
00848       // move into packed 1D storage
00849       if (OptimizeStorage) {
00850         ArrayRCP<size_t> offsets = arcp<size_t>(this->numRows_+1);
00851         if (this->is2DStructure() == true) {
00852           // 2D to 1D packed: first-touch allocation
00853           // allocate 1D storage
00854           this->indices1D_ = arcp<Ordinal>(this->getNumEntries());
00855           values1D         = arcp<Scalar >(this->getNumEntries());
00856           // compute offset array on host thread
00857           {
00858             size_t curoffset = 0;
00859             for (size_t i=0; i < this->numRows_; ++i) {
00860               offsets[i] = curoffset;
00861               curoffset += this->numEntriesPerRow_[i];
00862             }
00863             offsets[this->numRows_] = curoffset;
00864             TEST_FOR_EXCEPTION( curoffset != this->getNumEntries(), std::logic_error, 
00865                 Teuchos::typeName(*this) << "::finalize(): Internal logic error. Please contact Kokkos team.");
00866           }
00867           {
00868             FirstTouchCopyIndicesKernel<Ordinal> indskern;
00869             indskern.offsets1D = offsets.getRawPtr();
00870             indskern.numEntriesPerRow = this->numEntriesPerRow_.getRawPtr();
00871             indskern.data1D = this->indices1D_.getRawPtr();
00872             indskern.data2D = this->indices2D_.getRawPtr();
00873             this->getNode()->template parallel_for<FirstTouchCopyIndicesKernel<Ordinal> >(0,this->numRows_,indskern);
00874           }
00875           {
00876             FirstTouchCopyIndicesKernel<Scalar> valskern;
00877             valskern.offsets1D = offsets.getRawPtr();
00878             valskern.numEntriesPerRow = this->numEntriesPerRow_.getRawPtr();
00879             valskern.data1D = values1D.getRawPtr();
00880             valskern.data2D = values2D.getRawPtr();
00881             this->getNode()->template parallel_for<FirstTouchCopyIndicesKernel<Scalar> >(0,this->numRows_,valskern);
00882           }
00883         }
00884         else {
00885           // copy/pack data
00886           // 1D to 1D packed: no first-touch
00887           size_t curoffset = 0;
00888           size_t curnuminds;
00889           typename ArrayRCP<Ordinal>::iterator oldinds, newinds;
00890           typename ArrayRCP<Scalar >::iterator oldvals, newvals;
00891           newinds = this->indices1D_.begin();
00892           newvals = values1D.begin();
00893           for (size_t i=0; i < this->numRows_; ++i) {
00894             offsets[i] = curoffset;
00895             curnuminds = this->rowEnds_[i] - this->rowBegs_[i];
00896             oldinds = this->indices1D_.begin() + this->rowBegs_[i];
00897             oldvals = values1D.begin() + this->rowBegs_[i];
00898             std::copy(oldinds, oldinds+curnuminds, newinds);
00899             std::copy(oldvals, oldvals+curnuminds, newvals);
00900             newinds += curnuminds;
00901             newvals += curnuminds;
00902             curoffset += curnuminds;
00903           }
00904           offsets[this->numRows_] = curoffset;
00905           TEST_FOR_EXCEPTION( curoffset != this->getNumEntries(), std::logic_error, 
00906             Teuchos::typeName(*this) << "::finalize(): "
00907             "Internal logic error: curoffset (= " 
00908             << curoffset << ") != this->getNumEntries() (= "
00909             << this->getNumEntries() 
00910             << ").  Please contact Kokkos team.");
00911         }
00912         // done with the original row beg/end offsets, can point to the new overlapping one
00913         this->rowBegs_   = offsets;
00914         this->rowEnds_   = offsets.persistingView(1,this->numRows_);
00915         this->is1D_      = true;
00916         this->isOpt_     = true;
00917         // delete 2D storage (if there was any)
00918         this->is2D_      = false;
00919         this->numEntriesPerRow_ = null;
00920         this->indices2D_        = null;
00921         values2D          = null;
00922       }
00923     }
00924     this->isFinalized_ = true;
00925   }
00926 
00927 
00928 
00929   //=========================================================================================================================
00930   // 
00931   // Specializations
00932   // 
00933   //=========================================================================================================================
00934 
00940   template <class Ordinal, 
00941             class Node,
00942             class LocalMatOps>
00943   class CrsGraph : public CrsGraphHostCompute<Ordinal,Node,LocalMatOps> {
00944   public:
00945     CrsGraph(size_t numRows, const RCP<Node> &node) : CrsGraphHostCompute<Ordinal,Node,LocalMatOps>(numRows,node) {}
00946   private:
00947     CrsGraph(const CrsGraph<Ordinal,Node,LocalMatOps> &graph); // not implemented
00948   };
00949 
00950 #ifndef HAVE_KOKKOS_NO_FIRST_TOUCH_MATVEC_ALLOCATION
00951 #ifdef HAVE_KOKKOS_TBB
00952 
00957   class TBBNode;
00958   template <class Ordinal, 
00959             class LocalMatOps>
00960   class CrsGraph<Ordinal,TBBNode,LocalMatOps> : public FirstTouchHostCrsGraph<Ordinal,TBBNode,LocalMatOps> {
00961   public:
00962     CrsGraph(size_t numRows, const RCP<TBBNode> &node) : FirstTouchHostCrsGraph<Ordinal,TBBNode,LocalMatOps>(numRows,node) {}
00963   private:
00964     CrsGraph(const CrsGraph<Ordinal,TBBNode,LocalMatOps> &graph); // not implemented
00965   };
00966 #endif
00967 #ifdef HAVE_KOKKOS_THREADPOOL
00968   class TPINode;
00974   template <class Ordinal, 
00975             class LocalMatOps>
00976   class CrsGraph<Ordinal,TPINode,LocalMatOps> : public FirstTouchHostCrsGraph<Ordinal,TPINode,LocalMatOps> {
00977   public:
00978     CrsGraph(size_t numRows, const RCP<TPINode> &node) : FirstTouchHostCrsGraph<Ordinal,TPINode,LocalMatOps>(numRows,node) {}
00979   private:
00980     CrsGraph(const CrsGraph<Ordinal,TPINode,LocalMatOps> &graph); // not implemented
00981   };
00982 #endif
00983 #endif
00984 
00990   template <class S, class O, class N> class DefaultDeviceSparseOps;
00991   template <class S,
00992             class Ordinal,
00993             class Node>
00994   class CrsGraph<Ordinal,Node,DefaultDeviceSparseOps<S,Ordinal,Node> > : public CrsGraphDeviceCompute<Ordinal,Node,DefaultDeviceSparseOps<S,Ordinal,Node> > {
00995   public:
00996     CrsGraph(size_t numRows, const RCP<Node> &node) : CrsGraphDeviceCompute<Ordinal,Node,DefaultDeviceSparseOps<S,Ordinal,Node> >(numRows,node) {}
00997   private:
00998     CrsGraph(const CrsGraph<Ordinal,Node,DefaultDeviceSparseOps<S,Ordinal,Node> > &graph); // not implemented
00999   };
01000 
01001 } // namespace Kokkos
01002 
01003 #endif /* KOKKOS_CRSGRAPH_HPP */
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends