Kokkos Node API and Local Linear Algebra Kernels Version of the Day
Kokkos_CrsGraph.hpp
00001 //@HEADER
00002 // ************************************************************************
00003 // 
00004 //          Kokkos: Node API and Parallel Node Kernels
00005 //              Copyright (2004) Sandia Corporation
00006 // 
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 // 
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //  
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //  
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00025 // 
00026 // ************************************************************************
00027 //@HEADER
00028 
00029 #ifndef KOKKOS_CRSGRAPH_HPP
00030 #define KOKKOS_CRSGRAPH_HPP
00031 
00032 #include <Teuchos_RCP.hpp>
00033 #include <Teuchos_TypeNameTraits.hpp>
00034 #include <Teuchos_TestForException.hpp>
00035 #include <Teuchos_ArrayRCP.hpp>
00036 
00037 #include "Kokkos_ConfigDefs.hpp"
00038 #include "Kokkos_DefaultNode.hpp"
00039 
00040 namespace Kokkos {
00041 
00042   //=========================================================================================================================
00043   // 
00044   // A host-resident CrsGraph
00045   // 
00046   //=========================================================================================================================
00047 
00051   template <class Ordinal, 
00052             class Node,
00053             class LocalMatOps>
00054   class CrsGraphHostCompute {
00055   public:
00056 
00057     typedef Ordinal               OrdinalType;
00058     typedef Node                  NodeType;
00059     typedef LocalMatOps           LocalMatOpsType;
00060 
00062 
00063 
00065     CrsGraphHostCompute(size_t numRows, const RCP<Node> &node);
00066 
00068     virtual ~CrsGraphHostCompute();
00069 
00071 
00073 
00074     
00076     RCP<Node> getNode() const;
00077 
00079 
00081 
00082 
00084     size_t getNumRows() const;
00085 
00087     size_t getNumEntries() const;
00088 
00090     bool isEmpty() const;
00091 
00093     bool isFinalized() const;
00094 
00097     bool is1DStructure() const;
00098 
00101     bool is2DStructure() const;
00102 
00104     bool isOptimized() const;
00105 
00107 
00110     void set1DStructure(ArrayRCP<Ordinal> inds, 
00111                         ArrayRCP<size_t>  rowBegs,
00112                         ArrayRCP<size_t>  rowEnds);
00113                         
00115 
00118     void set2DStructure(ArrayRCP<ArrayRCP<Ordinal> > inds,
00119                         ArrayRCP<size_t>                      numEntriesPerRow);
00120 
00122 
00131     void get1DStructure(ArrayRCP<Ordinal> &inds, 
00132                         ArrayRCP<size_t>  &rowBegs,
00133                         ArrayRCP<size_t>  &rowEnds);
00134 
00136 
00140     void get2DStructure(ArrayRCP<ArrayRCP<Ordinal> > &inds,
00141                         ArrayRCP<size_t>                      &numEntriesPerRow);
00142 
00144 
00148     void finalize(bool OptimizeStorage);
00149 
00156     template <class Scalar>
00157     void finalize(bool OptimizeStorage, ArrayRCP<ArrayRCP<Scalar> > &values2D, ArrayRCP<Scalar> &values1D);
00158 
00160     virtual void clear();
00161 
00163 
00164   protected:
00166     CrsGraphHostCompute(const CrsGraphHostCompute& sources);
00167 
00168     RCP<Node> node_;
00169     size_t numRows_, numEntries_;
00170     bool isFinalized_, isEmpty_, is1D_, is2D_, isOpt_;
00171 
00172     // 2D storage
00173     ArrayRCP<ArrayRCP<Ordinal> >  indices2D_;
00174     ArrayRCP<size_t>                       numEntriesPerRow_;
00175     // 1D storage
00176     ArrayRCP<Ordinal>                      indices1D_;
00177     ArrayRCP<size_t>                       rowBegs_, rowEnds_;
00178   };
00179 
00180 
00181   //==============================================================================
00182   template <class Ordinal, class Node, class LocalMatOps>
00183   CrsGraphHostCompute<Ordinal,Node,LocalMatOps>::CrsGraphHostCompute(size_t numRows, const RCP<Node> &node) 
00184   : node_(node)
00185   , numRows_(numRows)
00186   {
00187     CrsGraphHostCompute<Ordinal,Node,LocalMatOps>::clear();
00188   }
00189 
00190   //==============================================================================
00191   template <class Ordinal, class Node, class LocalMatOps>
00192   CrsGraphHostCompute<Ordinal,Node,LocalMatOps>::~CrsGraphHostCompute() {
00193   }
00194 
00195   // ======= clear ===========
00196   template <class Ordinal, class Node, class LocalMatOps>
00197   void CrsGraphHostCompute<Ordinal,Node,LocalMatOps>::clear() {
00198     isFinalized_   = false;
00199     isEmpty_       = false;
00200     is1D_          = false;
00201     is2D_          = false;
00202     isOpt_         = false;
00203     numEntries_    = 0;
00204     indices2D_        = null;
00205     numEntriesPerRow_ = null;
00206     rowBegs_          = null;
00207     rowEnds_          = null;
00208     indices1D_        = null;
00209   }
00210 
00211   // ======= node ===========
00212   template <class Ordinal, class Node, class LocalMatOps>
00213   RCP<Node> CrsGraphHostCompute<Ordinal,Node,LocalMatOps>::getNode() const {
00214     return node_;
00215   }
00216 
00217   // ======= numrows ===========
00218   template <class Ordinal, class Node, class LocalMatOps>
00219   size_t CrsGraphHostCompute<Ordinal,Node,LocalMatOps>::getNumRows() const {
00220     return numRows_;
00221   }
00222 
00223   // ======= numentries ===========
00224   template <class Ordinal, class Node, class LocalMatOps>
00225   size_t CrsGraphHostCompute<Ordinal,Node,LocalMatOps>::getNumEntries() const {
00226     return numEntries_;
00227   }
00228 
00229   // ======= isempty ===========
00230   template <class Ordinal, class Node, class LocalMatOps>
00231   bool CrsGraphHostCompute<Ordinal,Node,LocalMatOps>::isEmpty() const {
00232     return isEmpty_;
00233   }
00234 
00235   // ======= isfinalized ===========
00236   template <class Ordinal, class Node, class LocalMatOps>
00237   bool CrsGraphHostCompute<Ordinal,Node,LocalMatOps>::isFinalized() const {
00238     return isFinalized_;
00239   }
00240 
00241   // ======= is1d ===========
00242   template <class Ordinal, class Node, class LocalMatOps>
00243   bool CrsGraphHostCompute<Ordinal,Node,LocalMatOps>::is1DStructure() const {
00244     return is1D_;
00245   }
00246 
00247   // ======= is2d ===========
00248   template <class Ordinal, class Node, class LocalMatOps>
00249   bool CrsGraphHostCompute<Ordinal,Node,LocalMatOps>::is2DStructure() const {
00250     return is2D_;
00251   }
00252 
00253   // ======= isopt ===========
00254   template <class Ordinal, class Node, class LocalMatOps>
00255   bool CrsGraphHostCompute<Ordinal,Node,LocalMatOps>::isOptimized() const {
00256     return isOpt_;
00257   }
00258 
00259   // ======= get 1d ===========
00260   template <class Ordinal, class Node, class LocalMatOps>
00261   void CrsGraphHostCompute<Ordinal,Node,LocalMatOps>::get1DStructure(ArrayRCP<Ordinal> &inds, 
00262                                                                      ArrayRCP<size_t>  &rowBegs,
00263                                                                      ArrayRCP<size_t>  &rowEnds)
00264   {
00265     inds = indices1D_;
00266     rowBegs = rowBegs_;
00267     rowEnds = rowEnds_;
00268   }
00269 
00270   // ======= get 2d ===========
00271   template <class Ordinal, class Node, class LocalMatOps>
00272   void CrsGraphHostCompute<Ordinal,Node,LocalMatOps>::get2DStructure(ArrayRCP<ArrayRCP<Ordinal> > &inds,
00273                                                                      ArrayRCP<size_t>                      &numEntriesPerRow) 
00274   {
00275     inds = indices2D_;
00276     numEntriesPerRow = numEntriesPerRow_;
00277   }
00278 
00279   // ======= set 1d ===========
00280   template <class Ordinal, class Node, class LocalMatOps>
00281   void CrsGraphHostCompute<Ordinal,Node,LocalMatOps>::set1DStructure(ArrayRCP<Ordinal> inds, 
00282                                                                      ArrayRCP<size_t>  rowBegs,
00283                                                                      ArrayRCP<size_t>  rowEnds)
00284   {
00285     TEST_FOR_EXCEPTION( (size_t)rowBegs.size() != numRows_+1 || (size_t)rowEnds.size() != numRows_, std::runtime_error, 
00286         Teuchos::typeName(*this) << "::set1DStructure(inds,rowBegs,rowEnds): rowBegs and rowEnds are not correctly sized.");
00287     TEST_FOR_EXCEPTION( (size_t)rowBegs[numRows_] > (size_t)inds.size(), std::runtime_error,
00288         Teuchos::typeName(*this) << "::set1DStructure(inds,rowBegs,rowEnds): rowBegs contents to not agree with inds size.");
00289     this->clear();
00290     //
00291     indices1D_ = inds;
00292     rowBegs_ = rowBegs;
00293     rowEnds_ = rowEnds;
00294     if (numRows_ > 0) {
00295       for (size_t i=0; i < this->getNumRows(); ++i) {
00296         numEntries_ += (this->rowEnds_[i] - this->rowBegs_[i]);
00297 #ifdef HAVE_KOKKOS_DEBUG
00298         // row i goes like [ begs[i] , ends[i] )
00299         // sanity        : begs[i] <= ends[i]
00300         // ordering      : begs[i] <= begs[i+1]
00301         // no overlapping: ends[i] <= begs[i+1]
00302         TEST_FOR_EXCEPTION( rowBegs_[i+1] < rowBegs_[i] || rowEnds_[i] < rowBegs_[i] || rowEnds_[i] > rowBegs_[i+1], std::runtime_error,
00303             Teuchos::typeName(*this) << "::set1DStructure(inds,rowBegs,rowEnds): ends and begs are not consistent.");
00304 #endif
00305       }
00306     }
00307     is1D_ = true;
00308   }
00309 
00310   // ======= set 2d ===========
00311   template <class Ordinal, class Node, class LocalMatOps>
00312   void CrsGraphHostCompute<Ordinal,Node,LocalMatOps>::set2DStructure(ArrayRCP<ArrayRCP<Ordinal> > inds,
00313                                                                      ArrayRCP<size_t>                      numEntriesPerRow)
00314   {
00315     TEST_FOR_EXCEPTION( (size_t)inds.size() != numRows_ || (size_t)numEntriesPerRow.size() != numRows_, std::runtime_error,
00316         Teuchos::typeName(*this) << "::set2DStructure(inds,numEntriesPerRow): numEntriesPerRow and inds must have as many entries as the number of rows specified to the constructor.");
00317     this->clear();
00318     //
00319     indices2D_  = inds;
00320     if (indices2D_ != null) {
00321       numEntriesPerRow_ = numEntriesPerRow;
00322       numEntries_ = std::accumulate(this->numEntriesPerRow_.begin(), this->numEntriesPerRow_.end(), 0);
00323 #ifdef HAVE_KOKKOS_DEBUG
00324       for (size_t i=0; i<numRows_; ++i) {
00325         TEST_FOR_EXCEPTION( (size_t)inds[i].size() < numEntriesPerRow[i], std::runtime_error,
00326             Teuchos::typeName(*this) << "::set2DStructure(): inds[" << i << "] == " << inds[i] 
00327             << " is not large enough for the specified number of entries, "
00328             << " numEntriesPerRow[" << i << "] == " << numEntriesPerRow[i]);
00329       }
00330 #endif
00331     }
00332     is2D_ = true;
00333   }
00334 
00335   // ======= finalize ===========
00336   template <class Ordinal, class Node, class LocalMatOps>
00337   void CrsGraphHostCompute<Ordinal,Node,LocalMatOps>::finalize(bool OptimizeStorage)
00338   {
00339     // allocations not done using the Node. no current need for host-based nodes, and 
00340     // this leads to incorrect behavior when we try to reuse this code from child CrsGraphDeviceCompute
00341     if (isFinalized() && !(OptimizeStorage == true && isOptimized() == false)) return;
00342     if ((indices1D_ == null && indices2D_ == null) || (this->getNumEntries() == 0)) {
00343       isEmpty_ = true;
00344     }
00345     else {
00346       isEmpty_ = false;
00347       if (OptimizeStorage) {
00348         // move into packed 1D storage
00349         if (is1DStructure() == false) {
00350           // allocate 1D storage
00351           // we these are for host use, so we'll forgo the view
00352           indices1D_ = arcp<Ordinal>(this->getNumEntries());
00353         }
00354         ArrayRCP<size_t> offsets = arcp<size_t>(numRows_+1);
00355         // copy/pack data
00356         size_t curoffset = 0;
00357         size_t curnuminds;
00358         typename ArrayRCP<Ordinal>::iterator oldinds, newinds;
00359         newinds = indices1D_.begin();
00360         for (size_t i=0; i < numRows_; ++i) {
00361           offsets[i] = curoffset;
00362           if (is1DStructure()) {
00363             curnuminds = rowEnds_[i] - rowBegs_[i];
00364             oldinds = indices1D_.begin() + rowBegs_[i];
00365           }
00366           else {
00367             curnuminds = numEntriesPerRow_[i];
00368             oldinds = indices2D_[i].begin();
00369           }
00370           std::copy(oldinds, oldinds+curnuminds, newinds);
00371           newinds += curnuminds;
00372           curoffset += curnuminds;
00373         }
00374         offsets[numRows_] = curoffset;
00375         TEST_FOR_EXCEPTION( curoffset != this->getNumEntries(), std::logic_error, 
00376             Teuchos::typeName(*this) << "::finalize(): Internal logic error. Please contact Kokkos team.");
00377         // done with the original row beg/end offsets, can point to the new overlapping one
00378         rowBegs_   = offsets;
00379         rowEnds_   = offsets.persistingView(1,numRows_);
00380         isOpt_     = true;
00381         is1D_      = true;
00382         // delete 2D storage (if there was any)
00383         is2D_      = false;
00384         numEntriesPerRow_ = null;
00385         indices2D_        = null;
00386       }
00387     }
00388     isFinalized_ = true;
00389   }
00390 
00391 
00392   // ======= finalize ===========
00393   // finalize() storage for the graph with associated matrix values
00394   // this is called from a CrsMatrix, and we're doing the finalize the for the graph and matrix at the same time, so the matrix doesn't have to.
00395   template <class Ordinal, class Node, class LocalMatOps>
00396   template <class Scalar>
00397   void CrsGraphHostCompute<Ordinal,Node,LocalMatOps>::finalize(bool OptimizeStorage, ArrayRCP<ArrayRCP<Scalar> > &values2D, ArrayRCP<Scalar> &values1D)
00398   {
00399     if (isFinalized() && !(OptimizeStorage == true && isOptimized() == false)) return;
00400     if ((indices1D_ == null && indices2D_ == null) || (this->getNumEntries() == 0)) {
00401       isEmpty_ = true;
00402     }
00403     else {
00404       isEmpty_ = false;
00405       // move into packed 1D storage
00406       if (OptimizeStorage) {
00407         if (is1DStructure() == false) {
00408           // allocate 1D storage
00409           // we know this is a host-base node, so we'll forgo the view of rowBegs_,rowEnds_
00410           indices1D_ = arcp<Ordinal>(this->getNumEntries());
00411           values1D   = arcp<Scalar >(this->getNumEntries());
00412         }
00413         ArrayRCP<size_t> offsets = arcp<size_t>(numRows_+1);
00414         // copy/pack data
00415         size_t curoffset = 0;
00416         size_t curnuminds;
00417         typename ArrayRCP<Ordinal>::iterator oldinds, newinds;
00418         typename ArrayRCP<Scalar >::iterator oldvals, newvals;
00419         newinds = indices1D_.begin();
00420         newvals = values1D.begin();
00421         for (size_t i=0; i < numRows_; ++i) {
00422           offsets[i] = curoffset;
00423           if (is1DStructure()) {
00424             curnuminds = rowEnds_[i] - rowBegs_[i];
00425             oldinds = indices1D_.begin() + rowBegs_[i];
00426             oldvals = values1D.begin() + rowBegs_[i];
00427           }
00428           else {
00429             curnuminds = numEntriesPerRow_[i];
00430             oldinds = indices2D_[i].begin();
00431             oldvals = values2D[i].begin();
00432           }
00433           std::copy(oldinds, oldinds+curnuminds, newinds);
00434           std::copy(oldvals, oldvals+curnuminds, newvals);
00435           newinds += curnuminds;
00436           newvals += curnuminds;
00437           curoffset += curnuminds;
00438         }
00439         offsets[numRows_] = curoffset;
00440         TEST_FOR_EXCEPTION( curoffset != this->getNumEntries(), std::logic_error, 
00441             Teuchos::typeName(*this) << "::finalize(): Internal logic error. Please contact Kokkos team.");
00442         // done with the original row beg/end offsets, can point to the new overlapping one
00443         rowBegs_   = offsets;
00444         rowEnds_   = offsets.persistingView(1,numRows_);
00445         is1D_      = true;
00446         isOpt_     = true;
00447         // delete 2D storage (if there was any)
00448         is2D_      = false;
00449         numEntriesPerRow_ = null;
00450         indices2D_        = null;
00451         values2D          = null;
00452       }
00453     }
00454     isFinalized_ = true;
00455   }
00456 
00457 
00458   //=========================================================================================================================
00459   // 
00460   // A device-resident CrsGraph
00461   // 
00462   //=========================================================================================================================
00463 
00464 
00472   template <class Ordinal, 
00473             class Node,
00474             class LocalMatOps>
00475   class CrsGraphDeviceCompute : public CrsGraphHostCompute<Ordinal,Node,LocalMatOps> {
00476   public:
00477 
00479 
00480 
00482     CrsGraphDeviceCompute(size_t numRows, const RCP<Node> &node);
00483 
00485     ~CrsGraphDeviceCompute();
00486 
00488 
00490 
00491 
00493 
00497     void finalize(bool OptimizeStorage);
00498 
00500 
00507     template <class Scalar>
00508     void finalize(bool OptimizeStorage, ArrayRCP<ArrayRCP<Scalar> > &values2D, ArrayRCP<Scalar> &values1D, ArrayRCP<Scalar> &d_values1D);
00509 
00511     void getDeviceBuffers(ArrayRCP<Ordinal> &d_inds, ArrayRCP<size_t> &d_offs) const;
00512 
00514     virtual void clear();
00515 
00517 
00518   protected:
00520     CrsGraphDeviceCompute(const CrsGraphDeviceCompute& sources);
00521 
00522     // device storage (always 1D packed)
00523     ArrayRCP<Ordinal> pbuf_indices_;
00524     ArrayRCP<size_t > pbuf_offsets_;
00525   };
00526 
00527   //==============================================================================
00528   template <class Ordinal, class Node, class LocalMatOps>
00529   CrsGraphDeviceCompute<Ordinal,Node,LocalMatOps>::CrsGraphDeviceCompute(size_t numRows, const RCP<Node> &node) 
00530   : CrsGraphHostCompute<Ordinal,Node,LocalMatOps>(numRows,node) 
00531   {}
00532 
00533   //===== destructor =====
00534   template <class Ordinal, class Node, class LocalMatOps>
00535   CrsGraphDeviceCompute<Ordinal,Node,LocalMatOps>::~CrsGraphDeviceCompute() 
00536   {}
00537 
00538   //===== clear =====
00539   template <class Ordinal, class Node, class LocalMatOps>
00540   void CrsGraphDeviceCompute<Ordinal,Node,LocalMatOps>::clear() { 
00541     CrsGraphHostCompute<Ordinal,Node,LocalMatOps>::clear();
00542     pbuf_indices_ = null;
00543     pbuf_offsets_ = null;
00544   }
00545 
00546   //==============================================================================
00547   template <class Ordinal, class Node, class LocalMatOps>
00548   void CrsGraphDeviceCompute<Ordinal,Node,LocalMatOps>::finalize(bool OptimizeStorage)
00549   {
00550     if (this->isFinalized() && !(OptimizeStorage == true && this->isOptimized() == false)) return;
00551     // call "normal" finalize(). this handles the re-structuring of data. below, we will handle the movement to device.
00552     CrsGraphHostCompute<Ordinal,Node,LocalMatOps>::finalize(OptimizeStorage);
00553     // all we're doing here now is copying data to device.
00554     // copy into a 1D structure on the device, regardless of host format
00555     if (this->isEmpty()) {
00556       pbuf_indices_ = null;
00557       pbuf_offsets_ = null;
00558     }
00559     else {
00560       // allocate space on the device and copy data there, in a packed format
00561       pbuf_offsets_ = this->getNode()->template allocBuffer<size_t>(this->getNumRows()+1);
00562       pbuf_indices_ = this->getNode()->template allocBuffer<Ordinal>(this->getNumEntries());
00563       if (this->isOptimized()) {
00564         // should be packed now; single copy should do, and offsets are rowBegs_
00565         this->getNode()->template copyToBuffer<size_t >(this->getNumRows()+1, this->rowBegs_(),   pbuf_offsets_);
00566         this->getNode()->template copyToBuffer<Ordinal>(this->getNumEntries(),this->indices1D_(), pbuf_indices_);
00567       }
00568       else {
00569         ArrayRCP<size_t > view_offsets = this->getNode()->template viewBufferNonConst<size_t >(WriteOnly,pbuf_offsets_.size(),pbuf_offsets_);
00570         ArrayRCP<Ordinal> view_indices = this->getNode()->template viewBufferNonConst<Ordinal>(WriteOnly,pbuf_indices_.size(),pbuf_indices_);
00571         typename ArrayRCP<Ordinal>::iterator oldinds, newinds;
00572         newinds = view_indices.begin();
00573         size_t curnuminds, curoffset = 0;
00574         for (size_t i=0; i < this->getNumRows(); ++i) {
00575           view_offsets[i] = curoffset;
00576           if (this->is1DStructure()) {
00577             curnuminds = this->rowEnds_[i] - this->rowBegs_[i];
00578             oldinds = this->indices1D_.begin() + this->rowBegs_[i];
00579           }
00580           else {
00581             curnuminds = this->numEntriesPerRow_[i];
00582             oldinds = this->indices2D_[i].begin();
00583           }
00584           std::copy(oldinds, oldinds+curnuminds, newinds);
00585           newinds += curnuminds;
00586           curoffset += curnuminds;
00587         }
00588         view_offsets[this->getNumRows()] = curoffset;
00589         TEST_FOR_EXCEPTION( curoffset != this->getNumEntries(), std::logic_error, 
00590             Teuchos::typeName(*this) << "::finalize(): Internal logic error. Please contact Kokkos team.");
00591         view_offsets = null;
00592         view_indices = null;
00593       }
00594     }
00595   }
00596 
00597   // ======= get device ===========
00598   template <class Ordinal, class Node, class LocalMatOps>
00599   void CrsGraphDeviceCompute<Ordinal,Node,LocalMatOps>::getDeviceBuffers(ArrayRCP<Ordinal> &d_inds, ArrayRCP<size_t> &d_offs) const
00600   {
00601     d_inds = pbuf_indices_;
00602     d_offs = pbuf_offsets_;
00603   }
00604 
00605 
00606   //==============================================================================
00607   template <class Ordinal, class Node, class LocalMatOps>
00608   template <class Scalar>
00609   void CrsGraphDeviceCompute<Ordinal,Node,LocalMatOps>::finalize(bool OptimizeStorage, ArrayRCP<ArrayRCP<Scalar> > &h_vals2D, ArrayRCP<Scalar> &h_vals1D, ArrayRCP<Scalar> &d_valsPacked) 
00610   {
00611     if (this->isFinalized() && !(OptimizeStorage == true && this->isOptimized() == false)) return;
00612     // call "normal" finalize(). this handles the re-structuring of data. below, we will handle the movement to device.
00613     CrsGraphHostCompute<Ordinal,Node,LocalMatOps>::finalize(OptimizeStorage,h_vals2D,h_vals1D);
00614     if (this->isEmpty()) {
00615       pbuf_indices_ = null;
00616       pbuf_offsets_ = null;
00617       d_valsPacked  = null;
00618     }
00619     else {
00620       // allocate space on the device and copy data there, in a packed format
00621       pbuf_offsets_ = this->getNode()->template allocBuffer<size_t>(this->getNumRows()+1);
00622       pbuf_indices_ = this->getNode()->template allocBuffer<Ordinal>(this->getNumEntries());
00623       d_valsPacked  = this->getNode()->template allocBuffer<Scalar >(this->getNumEntries());
00624       if (this->isOptimized()) {
00625         // should be packed now; single copy should do, and offsets are rowBegs_
00626         this->getNode()->template copyToBuffer<size_t >(this->getNumRows()+1 ,  this->rowBegs_(), pbuf_offsets_);
00627         this->getNode()->template copyToBuffer<Ordinal>(this->getNumEntries(),this->indices1D_(), pbuf_indices_);
00628         this->getNode()->template copyToBuffer<Scalar >(this->getNumEntries(),        h_vals1D(), d_valsPacked);
00629       }
00630       else {
00631         ArrayRCP<size_t > view_offsets = this->getNode()->template viewBufferNonConst<size_t >(WriteOnly,pbuf_offsets_.size(),pbuf_offsets_);
00632         ArrayRCP<Ordinal> view_indices = this->getNode()->template viewBufferNonConst<Ordinal>(WriteOnly,pbuf_indices_.size(),pbuf_indices_);
00633         ArrayRCP<Scalar >  view_values = this->getNode()->template viewBufferNonConst<Scalar >(WriteOnly, d_valsPacked.size(),d_valsPacked);
00634         typename ArrayRCP<Ordinal>::iterator oldinds, newinds;
00635         typename ArrayRCP<Scalar >::iterator oldvals, newvals;
00636         newinds = view_indices.begin();
00637         newvals = view_values.begin();
00638         size_t curnuminds, curoffset = 0;
00639         for (size_t i=0; i < this->getNumRows(); ++i) {
00640           view_offsets[i] = curoffset;
00641           if (this->is1DStructure()) {
00642             curnuminds = this->rowEnds_[i] - this->rowBegs_[i];
00643             oldinds = this->indices1D_.begin() + this->rowBegs_[i];
00644             oldvals = h_vals1D.begin() + this->rowBegs_[i];
00645           }
00646           else {
00647             curnuminds = this->numEntriesPerRow_[i];
00648             oldinds = this->indices2D_[i].begin();
00649             oldvals = h_vals2D[i].begin();
00650           }
00651           std::copy(oldinds, oldinds+curnuminds, newinds);
00652           std::copy(oldvals, oldvals+curnuminds, newvals);
00653           newinds += curnuminds;
00654           newvals += curnuminds;
00655           curoffset += curnuminds;
00656         }
00657         view_offsets[this->getNumRows()] = curoffset;
00658         TEST_FOR_EXCEPTION( curoffset != this->getNumEntries(), std::logic_error, 
00659             Teuchos::typeName(*this) << "::finalize(): Internal logic error. Please contact Kokkos team.");
00660         view_offsets = null;
00661         view_indices = null;
00662         view_values  = null;
00663       }
00664     }
00665   }
00666 
00667   //=========================================================================================================================
00668   // 
00669   // Specializations
00670   // 
00671   //=========================================================================================================================
00672 
00678   template <class Ordinal, 
00679             class Node,
00680             class LocalMatOps>
00681   class CrsGraph : public CrsGraphHostCompute<Ordinal,Node,LocalMatOps> {
00682   public:
00683     CrsGraph(size_t numRows, const RCP<Node> &node) : CrsGraphHostCompute<Ordinal,Node,LocalMatOps>(numRows,node) {}
00684   private:
00685     CrsGraph(const CrsGraph<Ordinal,Node,LocalMatOps> &graph); // not implemented
00686   };
00687 
00693   template <class S, class O, class N> class DefaultDeviceSparseOps;
00694   template <class S,
00695             class Ordinal,
00696             class Node>
00697   class CrsGraph<Ordinal,Node,DefaultDeviceSparseOps<S,Ordinal,Node> > : public CrsGraphDeviceCompute<Ordinal,Node,DefaultDeviceSparseOps<S,Ordinal,Node> > {
00698   public:
00699     CrsGraph(size_t numRows, const RCP<Node> &node) : CrsGraphDeviceCompute<Ordinal,Node,DefaultDeviceSparseOps<S,Ordinal,Node> >(numRows,node) {}
00700   private:
00701     CrsGraph(const CrsGraph<Ordinal,Node,DefaultDeviceSparseOps<S,Ordinal,Node> > &graph); // not implemented
00702   };
00703 
00704 } // namespace Kokkos
00705 
00706 #endif /* KOKKOS_CRSGRAPH_HPP */
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends