Tpetra Matrix/Vector Services Version of the Day
Tpetra_VbrMatrix_def.hpp
00001 //@HEADER
00002 // ************************************************************************
00003 // 
00004 //               Tpetra: Templated Linear Algebra Services Package 
00005 //                 Copyright (2008) Sandia Corporation
00006 // 
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 // 
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //  
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //  
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00025 // 
00026 // ************************************************************************
00027 //@HEADER
00028 
00029 #ifndef TPETRA_VBRMATRIX_DEF_HPP
00030 #define TPETRA_VBRMATRIX_DEF_HPP
00031 
00032 #include <algorithm>
00033 #include <sstream>
00034 
00035 #include <Kokkos_NodeHelpers.hpp>
00036 #include <Tpetra_Vector.hpp>
00037 
00038 #ifdef DOXYGEN_USE_ONLY
00039 #include "Tpetra_VbrMatrix_decl.hpp"
00040 #endif
00041 
00042 namespace Tpetra {
00043 
00044 template<class Scalar,class Node>
00045 void fill_device_ArrayRCP(Teuchos::RCP<Node>& node, Teuchos::ArrayRCP<Scalar>& ptr, Scalar value)
00046 {
00047   Kokkos::ReadyBufferHelper<Node> rbh(node);
00048   Kokkos::InitOp<Scalar> wdp;
00049   wdp.alpha = value;
00050   rbh.begin();
00051   wdp.x = rbh.addNonConstBuffer(ptr);
00052   rbh.end();
00053   node->template parallel_for<Kokkos::InitOp<Scalar> >(0, ptr.size(), wdp);
00054 }
00055 
00056 //-------------------------------------------------------------------
00057 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00058 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::VbrMatrix(const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> > &blkRowMap, size_t maxNumEntriesPerRow, ProfileType pftype)
00059  : DistObject<char, LocalOrdinal, GlobalOrdinal, Node>(convertBlockMapToPointMap(*blkRowMap)),
00060    blkGraph_(Teuchos::rcp(new BlockCrsGraph<LocalOrdinal,GlobalOrdinal,Node>(blkRowMap, maxNumEntriesPerRow, pftype))),
00061    constBlkGraph_(blkGraph_),
00062    lclMatrix_(blkRowMap->getNodeNumBlocks(), blkRowMap->getPointMap()->getNode()),
00063    pbuf_values1D_(),
00064    pbuf_indx_(),
00065    lclMatOps_(blkRowMap->getPointMap()->getNode()),
00066    importer_(),
00067    exporter_(),
00068    importedVec_(),
00069    exportedVec_(),
00070    data_2D_(Teuchos::rcp(new Teuchos::Array<RowGlobalCols>)),
00071    nonlocal_data_(),
00072    is_fill_completed_(false),
00073    is_storage_optimized_(false)
00074 {
00075   //The graph of this VBR matrix is a BlockCrsGraph, which is a CrsGraph where
00076   //each entry in the graph corresponds to a block-entry in the matrix.
00077   //That is, you can think of a VBR matrix as a Crs matrix of dense
00078   //submatrices...
00079 }
00080 
00081 //-------------------------------------------------------------------
00082 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00083 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::VbrMatrix(const Teuchos::RCP<const BlockCrsGraph<LocalOrdinal,GlobalOrdinal,Node> > &blkGraph)
00084  : DistObject<char, LocalOrdinal, GlobalOrdinal, Node>(convertBlockMapToPointMap(*blkGraph->getBlockRowMap())),
00085    blkGraph_(),
00086    constBlkGraph_(blkGraph),
00087    lclMatrix_(blkGraph->getBlockRowMap()->getNodeNumBlocks(),
00088               blkGraph->getBlockRowMap()->getPointMap()->getNode()),
00089    pbuf_values1D_(),
00090    pbuf_indx_(),
00091    lclMatOps_(blkGraph->getBlockRowMap()->getPointMap()->getNode()),
00092    importer_(),
00093    exporter_(),
00094    importedVec_(),
00095    exportedVec_(),
00096    data_2D_(Teuchos::rcp(new Teuchos::Array<RowGlobalCols>)),
00097    nonlocal_data_(),
00098    is_fill_completed_(false),
00099    is_storage_optimized_(false)
00100 {
00101   //The graph of this VBR matrix is a BlockCrsGraph, which is a CrsGraph where
00102   //each entry in the graph corresponds to a block-entry in the matrix.
00103   //That is, you can think of a VBR matrix as a Crs matrix of dense
00104   //submatrices...
00105 
00106   TEST_FOR_EXCEPTION(blkGraph->isFillComplete() == false, std::runtime_error,
00107    "Tpetra::VbrMatrix::VbrMatrix(BlockCrsGraph) ERROR, this constructor requires graph.isFillComplete()==true.");
00108 
00109   createImporterExporter();
00110 
00111   is_fill_completed_ = true;
00112 
00113   optimizeStorage();
00114 
00115   fillLocalMatrix();
00116   fillLocalMatVec();
00117 }
00118 
00119 //-------------------------------------------------------------------
00120 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00121 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::~VbrMatrix()
00122 {
00123 }
00124 
00125 //-------------------------------------------------------------------
00126 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00127 const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > &
00128 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getDomainMap() const
00129 {
00130   return getBlockDomainMap()->getPointMap();
00131 }
00132 
00133 //-------------------------------------------------------------------
00134 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00135 const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > &
00136 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getRangeMap() const
00137 {
00138   return getBlockRangeMap()->getPointMap();
00139 }
00140 
00141 //-------------------------------------------------------------------
00142 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00143 bool
00144 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::isFillComplete() const
00145 {
00146   return is_fill_completed_;
00147 }
00148 
00149 //-------------------------------------------------------------------
00150 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00151 template <class DomainScalar, class RangeScalar>
00152 void
00153 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::multiply(const MultiVector<DomainScalar,LocalOrdinal,GlobalOrdinal,Node> & X, MultiVector<RangeScalar,LocalOrdinal,GlobalOrdinal,Node> &Y, Teuchos::ETransp trans, RangeScalar alpha, RangeScalar beta) const
00154 {
00155   TEST_FOR_EXCEPTION(!isFillComplete(), std::runtime_error,
00156     "Tpetra::VbrMatrix::multiply ERROR, multiply may only be called after fillComplete has been called.");
00157 
00158   const Kokkos::MultiVector<Scalar,Node> *lclX = &X.getLocalMV();
00159   Kokkos::MultiVector<Scalar,Node>        *lclY = &Y.getLocalMVNonConst();
00160 
00161   lclMatOps_.template multiply<Scalar,Scalar>(trans,alpha,*lclX,beta,*lclY);
00162 }
00163 
00164 //-------------------------------------------------------------------
00165 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00166 template<class DomainScalar, class RangeScalar>
00167 void
00168 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::solve(const MultiVector<RangeScalar,LocalOrdinal,GlobalOrdinal,Node>& Y, MultiVector<DomainScalar,LocalOrdinal,GlobalOrdinal,Node>& X, Teuchos::ETransp trans) const
00169 {
00170   TEST_FOR_EXCEPTION(X.isConstantStride() == false || Y.isConstantStride() == false, std::runtime_error,
00171         "Tpetra::VbrMatrix::solve(X,Y): X and Y must be constant stride.");
00172 
00173   TEST_FOR_EXCEPTION(!isFillComplete(), std::runtime_error,
00174     "Tpetra::VbrMatrix::solve ERROR, solve may only be called after fillComplete has been called.");
00175 
00176   TEST_FOR_EXCEPTION(constBlkGraph_->isUpperTriangular()==false && constBlkGraph_->isLowerTriangular()==false, std::runtime_error,
00177     "Tpetra::VbrMatrix::solve ERROR, matrix must be either upper or lower triangular.");
00178 
00179   const Kokkos::MultiVector<RangeScalar,Node> *lclY = &Y.getLocalMV();
00180   Kokkos::MultiVector<DomainScalar,Node>      *lclX = &X.getLocalMVNonConst();
00181 
00182   Teuchos::EUplo triang = constBlkGraph_->isUpperTriangular() ? Teuchos::UPPER_TRI : Teuchos::LOWER_TRI;
00183   Teuchos::EDiag diag = (constBlkGraph_->getNodeNumBlockDiags() < constBlkGraph_->getNodeNumBlockRows()) ? Teuchos::UNIT_DIAG : Teuchos::NON_UNIT_DIAG;
00184 
00185   lclMatOps_.template solve<DomainScalar,RangeScalar>(trans,triang,diag, *lclY,*lclX);
00186 }
00187 
00188 //-------------------------------------------------------------------
00189 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00190 void
00191 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::updateImport(const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& X) const
00192 {
00193   if (importer_ != Teuchos::null) {
00194     if (importedVec_ == Teuchos::null || importedVec_->getNumVectors() != X.getNumVectors()) {
00195       importedVec_ = Teuchos::rcp(new MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>(getBlockColMap()->getPointMap(), X.getNumVectors()));
00196     }
00197 
00198     importedVec_->doImport(X, *importer_, REPLACE);
00199   }
00200 }
00201 
00202 //-------------------------------------------------------------------
00203 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00204 void
00205 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::updateExport(const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& Y) const
00206 {
00207   if (exporter_ != Teuchos::null) {
00208     if (exportedVec_ == Teuchos::null || exportedVec_->getNumVectors() != Y.getNumVectors()) {
00209       exportedVec_ = Teuchos::rcp(new MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>(getBlockColMap()->getPointMap(), Y.getNumVectors()));
00210     }
00211 
00212     exportedVec_->doExport(Y, *exporter_, REPLACE);
00213   }
00214 }
00215 
00216 //-------------------------------------------------------------------
00217 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00218 void
00219 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::apply(
00220          const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &X,
00221                MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &Y,
00222                Teuchos::ETransp trans,
00223                Scalar alpha,
00224                Scalar beta) const
00225 {
00226   if (trans == Teuchos::NO_TRANS) {
00227     updateImport(X);
00228     const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& Xref = importedVec_ == Teuchos::null ? X : *importedVec_;
00229     this->template multiply<Scalar,Scalar>(Xref, Y, trans, alpha, beta);
00230   }
00231   else if (trans == Teuchos::TRANS || trans == Teuchos::CONJ_TRANS) {
00232     updateImport(Y);
00233     MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& Yref = importedVec_ == Teuchos::null ? Y : *importedVec_;
00234     this->template multiply<Scalar,Scalar>(X, Yref, trans, alpha, beta);
00235     if (importedVec_ != Teuchos::null) {
00236       Y.doExport(*importedVec_, *importer_, ADD);
00237     }
00238   }
00239 }
00240 
00241 //-------------------------------------------------------------------
00242 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00243 void
00244 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::applyInverse(
00245          const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &Y,
00246                MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &X,
00247                Teuchos::ETransp trans) const
00248 {
00249   if (trans == Teuchos::NO_TRANS) {
00250     updateImport(Y);
00251     MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& Xref = importedVec_ == Teuchos::null ? X : *importedVec_;
00252     this->template solve<Scalar,Scalar>(Y, Xref, trans);
00253   }
00254   else if (trans == Teuchos::TRANS || trans == Teuchos::CONJ_TRANS) {
00255     updateImport(Y);
00256     const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& Yref = importedVec_ == Teuchos::null ? Y : *importedVec_;
00257     this->template solve<Scalar,Scalar>(Yref, X, trans);
00258     if (importedVec_ != Teuchos::null) {
00259       X.doExport(*importedVec_, *importer_, ADD);
00260     }
00261   }
00262 }
00263 
00264 //-------------------------------------------------------------------
00265 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00266 bool
00267 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::hasTransposeApply() const
00268 {
00269   return true;
00270 }
00271 
00272 //-------------------------------------------------------------------
00273 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00274 const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> > &
00275 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getBlockRowMap() const
00276 {
00277   return constBlkGraph_->getBlockRowMap();
00278 }
00279 
00280 //-------------------------------------------------------------------
00281 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00282 const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > &
00283 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getPointRowMap() const
00284 {
00285   return constBlkGraph_->getBlockRowMap()->getPointMap();
00286 }
00287 
00288 //-------------------------------------------------------------------
00289 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00290 const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> > &
00291 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getBlockColMap() const
00292 {
00293   return constBlkGraph_->getBlockColMap();
00294 }
00295 
00296 //-------------------------------------------------------------------
00297 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00298 const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> > &
00299 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getBlockDomainMap() const
00300 {
00301   return constBlkGraph_->getBlockDomainMap();
00302 }
00303 
00304 //-------------------------------------------------------------------
00305 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00306 const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> > &
00307 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getBlockRangeMap() const
00308 {
00309   return constBlkGraph_->getBlockRangeMap();
00310 }
00311 
00312 //-------------------------------------------------------------------
00313 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00314 const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > &
00315 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getPointColMap() const
00316 {
00317   return constBlkGraph_->getBlockColMap()->getPointMap();
00318 }
00319 
00320 //-------------------------------------------------------------------
00321 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00322 void
00323 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getGlobalBlockRowView(
00324          GlobalOrdinal globalBlockRow,
00325          LocalOrdinal& numPtRows,
00326          Teuchos::ArrayView<const GlobalOrdinal>& blockCols,
00327          Teuchos::Array<LocalOrdinal>& ptColsPerBlockCol,
00328          Teuchos::Array<Teuchos::ArrayRCP<const Scalar> >& blockEntries) const
00329 {
00330   TEST_FOR_EXCEPTION(isFillComplete(), std::runtime_error,
00331       "Tpetra::VbrMatrix::getGlobalBlockRowView internal ERROR, isFillComplete() is required to be false.");
00332 
00333   typedef typename Teuchos::ArrayView<const GlobalOrdinal>::size_type Tsize_t;
00334 
00335   LocalOrdinal localRow = getBlockRowMap()->getLocalBlockID(globalBlockRow);
00336   numPtRows = getBlockRowMap()->getLocalBlockSize(localRow);
00337   constBlkGraph_->getGlobalBlockRowView(globalBlockRow, blockCols);
00338   ptColsPerBlockCol.resize(blockCols.size());
00339   blockEntries.resize(blockCols.size());
00340   for(Tsize_t i=0; i<blockCols.size(); ++i) {
00341     getGlobalBlockEntryView(globalBlockRow, blockCols[i],
00342                            numPtRows, ptColsPerBlockCol[i], blockEntries[i]);
00343   }
00344 }
00345 
00346 //-------------------------------------------------------------------
00347 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00348 void
00349 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getLocalBlockRowView(
00350          LocalOrdinal localBlockRow,
00351          LocalOrdinal& numPtRows,
00352          Teuchos::ArrayView<const LocalOrdinal>& blockCols,
00353          Teuchos::Array<LocalOrdinal>& ptColsPerBlockCol,
00354          Teuchos::ArrayRCP<const Scalar>& blockEntries) const
00355 {
00356   TEST_FOR_EXCEPTION(!isFillComplete(), std::runtime_error,
00357       "Tpetra::VbrMatrix::getGlobalBlockRowView internal ERROR, isFillComplete() is required to be true.");
00358 
00359   typedef typename Teuchos::ArrayView<const GlobalOrdinal>::size_type Tsize_t;
00360   typedef Teuchos::ArrayRCP<const size_t>       Host_View;
00361   typedef Teuchos::ArrayRCP<const LocalOrdinal> Host_View_LO;
00362 
00363   numPtRows = getBlockRowMap()->getLocalBlockSize(localBlockRow);
00364   constBlkGraph_->getLocalBlockRowView(localBlockRow, blockCols);
00365   ptColsPerBlockCol.resize(blockCols.size());
00366   LocalOrdinal num_scalars_in_block_row = 0;
00367   for(Tsize_t i=0; i<blockCols.size(); ++i) {
00368     ptColsPerBlockCol[i] = getBlockColMap()->getLocalBlockSize(blockCols[i]);
00369     num_scalars_in_block_row += numPtRows*ptColsPerBlockCol[i];
00370   }
00371 
00372   Teuchos::RCP<Node> node = getNode();
00373   Host_View bpr = constBlkGraph_->getNodeRowOffsets();
00374   const size_t bindx_offset = bpr[localBlockRow];
00375   Host_View_LO indx = node->template viewBuffer<LocalOrdinal>(1, pbuf_indx_+bindx_offset);
00376   const LocalOrdinal offset = indx[0];
00377   blockEntries = node->template viewBuffer<Scalar>(num_scalars_in_block_row, pbuf_values1D_+offset);
00378 }
00379 
00380 //-------------------------------------------------------------------
00381 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00382 void
00383 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getGlobalBlockEntryViewNonConst(
00384     GlobalOrdinal globalBlockRow,
00385     GlobalOrdinal globalBlockCol,
00386     LocalOrdinal& numPtRows,
00387     LocalOrdinal& numPtCols,
00388     Teuchos::ArrayRCP<Scalar>& blockEntry)
00389 {
00390   //Return a non-const, read-write view of a block-entry (as an ArrayRCP),
00391   //creating/allocating the block-entry if it doesn't already exist, (but only
00392   //if fillComplete hasn't been called yet, and if the arguments numPtRows
00393   //and numPtCols have been set on input).
00394 
00395   LocalOrdinal localBlockRow = getBlockRowMap()->getLocalBlockID(globalBlockRow);
00396 
00397   if (localBlockRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid()) {
00398     VbrUtils::getGlobalBlockEntryViewNonConst(nonlocal_data_,
00399                    globalBlockRow, globalBlockCol,
00400                    numPtRows, numPtCols, blockEntry);
00401     return;
00402   }
00403 
00404   if (is_storage_optimized_) {
00405     TEST_FOR_EXCEPTION(!isFillComplete(), std::runtime_error,
00406       "Tpetra::VbrMatrix::getGlobalBlockEntryViewNonConst internal ERROR, storage is optimized but isFillComplete() is false.");
00407 
00408     LocalOrdinal localBlockCol = getBlockColMap()->getLocalBlockID(globalBlockCol);
00409 
00410     getLocalBlockEntryViewNonConst(localBlockRow, localBlockCol,
00411                                    numPtRows, numPtCols, blockEntry);
00412     return;
00413   }
00414 
00415   //If we get to here, fillComplete hasn't been called yet, and the matrix data
00416   //is stored in un-packed '2D' form.
00417 
00418   if (data_2D_->size() == 0) {
00419     LocalOrdinal numBlockRows = constBlkGraph_->getNodeNumBlockRows();
00420     data_2D_->resize(numBlockRows);
00421   }
00422 
00423   RowGlobalCols& blkrow = (*data_2D_)[localBlockRow];
00424 
00425   typename RowGlobalCols::iterator col_iter = blkrow.find(globalBlockCol);
00426 
00427   if (col_iter != blkrow.end()) {
00428     blockEntry = col_iter->second;
00429   }
00430   else {
00431     //blockEntry doesn't already exist, so we will create it.
00432 
00433     //make sure block-size is specified:
00434     TEST_FOR_EXCEPTION(numPtRows==0 || numPtCols==0, std::runtime_error,
00435       "Tpetra::VbrMatrix::getGlobalBlockEntryViewNonConst ERROR: creating block-entry, but numPtRows and/or numPtCols is 0.");
00436 
00437     Teuchos::RCP<Node> node = getNode();
00438     size_t blockSize = numPtRows*numPtCols;
00439     blockEntry = Teuchos::arcp(new Scalar[blockSize], 0, blockSize);
00440     std::fill(blockEntry.begin(), blockEntry.end(), 0);
00441     blkrow.insert(std::make_pair(globalBlockCol, blockEntry));
00442     blkGraph_->insertGlobalIndices(globalBlockRow, Teuchos::ArrayView<GlobalOrdinal>(&globalBlockCol, 1));
00443   }
00444 }
00445 
00446 //-------------------------------------------------------------------
00447 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00448 void
00449 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getGlobalBlockEntryView(
00450       GlobalOrdinal globalBlockRow,
00451       GlobalOrdinal globalBlockCol,
00452       LocalOrdinal& numPtRows,
00453       LocalOrdinal& numPtCols,
00454       Teuchos::ArrayRCP<const Scalar>& blockEntry) const
00455 {
00456   //This method returns a const-view of a block-entry (as an ArrayRCP).
00457   //Throws an exception if the block-entry doesn't already exist.
00458 
00459   if (is_storage_optimized_) {
00460     TEST_FOR_EXCEPTION(!isFillComplete(), std::runtime_error,
00461       "Tpetra::VbrMatrix::getGlobalBlockEntryView internal ERROR, storage is optimized but isFillComplete() is false.");
00462 
00463     LocalOrdinal localBlockRow = getBlockRowMap()->getLocalBlockID(globalBlockRow);
00464     LocalOrdinal localBlockCol = getBlockColMap()->getLocalBlockID(globalBlockCol);
00465     getLocalBlockEntryView(localBlockRow, localBlockCol, numPtRows, numPtCols, blockEntry);
00466     return;
00467   }
00468 
00469   TEST_FOR_EXCEPTION(data_2D_->size() == 0, std::runtime_error,
00470     "Tpetra::VbrMatrix::getGlobalBlockEntryView ERROR, matrix storage not yet allocated, can't return a const view.");
00471 
00472   //this acts as a range-check for globalBlockRow:
00473   LocalOrdinal localBlockRow = getBlockRowMap()->getLocalBlockID(globalBlockRow);
00474   TEST_FOR_EXCEPTION( localBlockRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid(),
00475      std::runtime_error,
00476      "Tpetra::VbrMatrix::getGlobalBlockEntryView, globalBlockRow not on the local processor.");
00477 
00478   RowGlobalCols& blkrow = (*data_2D_)[localBlockRow];
00479   typename RowGlobalCols::iterator col_iter = blkrow.find(globalBlockCol);
00480 
00481   if (col_iter == blkrow.end()) {
00482     throw std::runtime_error("Tpetra::VbrMatrix::getGlobalBlockEntryView ERROR, specified block-entry doesn't exist.");
00483   }
00484 
00485   numPtRows = getBlockRowMap()->getLocalBlockSize(localBlockRow);
00486 
00487   TEST_FOR_EXCEPTION(numPtRows == 0, std::runtime_error,
00488     "Tpetra::VbrMatrix::getGlobalBlockEntryView ERROR, numPtRows == 0.");
00489 
00490   blockEntry = col_iter->second;
00491   numPtCols = blockEntry.size() / numPtRows;
00492 }
00493 
00494 //-------------------------------------------------------------------
00495 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00496 void
00497 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getLocalBlockEntryViewNonConst(
00498     LocalOrdinal localBlockRow,
00499     LocalOrdinal localBlockCol,
00500     LocalOrdinal& numPtRows,                     
00501     LocalOrdinal& numPtCols,
00502     Teuchos::ArrayRCP<Scalar>& blockEntry)
00503 {
00504   typedef Teuchos::ArrayRCP<const LocalOrdinal> Host_View_LO;
00505   typedef Teuchos::ArrayRCP<const size_t> Host_View;
00506   typedef typename Host_View_LO::iterator ITER;
00507   //This method returns a non-constant view of a block-entry (as an ArrayRCP).
00508 
00509   TEST_FOR_EXCEPTION(isFillComplete() == false, std::runtime_error,
00510    "Tpetra::VbrMatrix::getLocalBlockEntryViewNonConst ERROR, this method can only be called after fillComplete() has been called.");
00511 
00512   TEST_FOR_EXCEPTION(is_storage_optimized_ == false, std::runtime_error,
00513    "Tpetra::VbrMatrix::getLocalBlockEntryViewNonConst ERROR, this method can only be called if storage is optimized.");
00514 
00515   Teuchos::RCP<Node> node = getNode();
00516 
00517   Host_View bptr = constBlkGraph_->getNodeRowOffsets();
00518   LocalOrdinal bindx_offset = bptr[localBlockRow];
00519   LocalOrdinal length = bptr[localBlockRow+1] - bindx_offset;
00520 
00521   TEST_FOR_EXCEPTION( length < 1, std::runtime_error,
00522     "Tpetra::VbrMatrix::getLocalBlockEntryViewNonConst ERROR, specified localBlockCol not found in localBlockRow.");
00523 
00524   Host_View_LO bindx = constBlkGraph_->getNodePackedIndices();
00525   ITER bindx_beg = bindx.begin() + bindx_offset,  
00526        bindx_end = bindx_beg + length;
00527   ITER it = std::lower_bound(bindx_beg, bindx_end, localBlockCol);
00528 
00529   TEST_FOR_EXCEPTION(it == bindx_end || *it != localBlockCol, std::runtime_error,
00530     "Tpetra::VbrMatrix::getLocalBlockEntryViewNonConst ERROR, specified localBlockCol not found.");
00531 
00532   numPtRows = getBlockRowMap()->getLocalBlockSize(localBlockRow);
00533   numPtCols = getBlockColMap()->getLocalBlockSize(localBlockCol);
00534 
00535   const LocalOrdinal blkSize = numPtRows*numPtCols;
00536   Host_View_LO indx = node->template viewBuffer<LocalOrdinal>(1,pbuf_indx_+bptr[localBlockRow]+(it-bindx_beg));
00537   const LocalOrdinal offset = indx[0];
00538   blockEntry = node->template viewBufferNonConst<Scalar>(Kokkos::ReadWrite, blkSize, pbuf_values1D_ + offset);
00539 }
00540 
00541 //-------------------------------------------------------------------
00542 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00543 void
00544 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getLocalDiagCopy(
00545   Tpetra::Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& diag) const
00546 {
00547   const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> >& rowmap = getBlockRowMap();
00548   TEST_FOR_EXCEPTION(diag.getMap()->isSameAs(*(rowmap->getPointMap())) != true,
00549     std::runtime_error, "Tpetra::VbrMatrix::getLocalDiagCopy ERROR, vector must be distributed the same as this matrix' row-map.");
00550 
00551   Teuchos::ArrayRCP<Scalar> diag_view = diag.get1dViewNonConst();
00552   Teuchos::ArrayView<const GlobalOrdinal> blockIDs = rowmap->getNodeBlockIDs();
00553   size_t offset = 0;
00554   typedef typename Teuchos::ArrayView<const GlobalOrdinal>::size_type Tsize_t;
00555   for(Tsize_t i=0; i<blockIDs.size(); ++i) {
00556     LocalOrdinal localBlockID = rowmap->getLocalBlockID(blockIDs[i]);
00557     LocalOrdinal blockSize = rowmap->getLocalBlockSize(localBlockID);
00558     Teuchos::ArrayRCP<const Scalar> blockEntry;
00559     getGlobalBlockEntryView(blockIDs[i], blockIDs[i], blockSize, blockSize, blockEntry);
00560 
00561     for(LocalOrdinal j=0; j<blockSize; ++j) {
00562       diag_view[offset++] = blockEntry[j*blockSize+j];
00563     }
00564   }
00565 }
00566 
00567 //-------------------------------------------------------------------
00568 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00569 bool
00570 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::checkSizes(const DistObject<char, LocalOrdinal, GlobalOrdinal, Node>& source)
00571 {
00572   bool ok = this->getMap()->getMinAllGlobalIndex() <= source.getMap()->getMinAllGlobalIndex();
00573   ok = ok && this->getMap()->getMaxAllGlobalIndex() >= source.getMap()->getMaxAllGlobalIndex();
00574   return ok;
00575 }
00576 
00577 //-------------------------------------------------------------------
00578 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00579 void
00580 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::copyAndPermute(
00581    const DistObject<char, LocalOrdinal, GlobalOrdinal, Node>& source,
00582    size_t numSameIDs,
00583    const Teuchos::ArrayView<const LocalOrdinal>& permuteToLIDs,
00584    const Teuchos::ArrayView<const LocalOrdinal>& permuteFromLIDs)
00585 {
00586   typedef typename Teuchos::ArrayView<const LocalOrdinal>::size_type Tsize_t;
00587 
00588   const VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>* src_mat_ptr = dynamic_cast<const VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>*>(&source);
00589   if (src_mat_ptr == NULL) {
00590     throw std::runtime_error("VbrMatrix::copyAndPermute ERROR, dynamic_cast failed.");
00591   }
00592   const VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>& src_mat = *src_mat_ptr;
00593 
00594   Teuchos::Array<LocalOrdinal> src_ptColsPerBlkCol;
00595   LocalOrdinal numPtRows;
00596 
00597   LocalOrdinal numSame = numSameIDs;
00598   for(LocalOrdinal i=0; i<numSame; ++i) {
00599     if (isFillComplete()) {
00600       Teuchos::ArrayRCP<const Scalar> src_blkEntries;
00601       Teuchos::ArrayView<const LocalOrdinal> src_blkCols;
00602 
00603       src_mat.getLocalBlockRowView(i, numPtRows, src_blkCols, src_ptColsPerBlkCol, src_blkEntries);
00604       unsigned offset = 0;
00605       for(Tsize_t j=0; j<src_blkCols.size(); ++j) {
00606         LocalOrdinal numPtCols = src_ptColsPerBlkCol[j];
00607         setLocalBlockEntry(i, src_blkCols[j], numPtRows, numPtCols, numPtCols,
00608                            src_blkEntries.view(offset, numPtRows*numPtCols));
00609         offset += numPtRows*numPtCols;
00610       }
00611     }
00612     else {
00613       GlobalOrdinal gRow = getBlockRowMap()->getGlobalBlockID(i);
00614       Teuchos::Array<Teuchos::ArrayRCP<const Scalar> > src_blkEntries;
00615       Teuchos::ArrayView<const GlobalOrdinal> src_blkCols;
00616 
00617       src_mat.getGlobalBlockRowView(gRow, numPtRows, src_blkCols, src_ptColsPerBlkCol, src_blkEntries);
00618       for(Tsize_t j=0; j<src_blkCols.size(); ++j) {
00619         LocalOrdinal numPtCols = src_ptColsPerBlkCol[j];
00620         setGlobalBlockEntry(gRow, src_blkCols[j], numPtRows, numPtCols, numPtRows, src_blkEntries[j].view(0, numPtRows*numPtCols));
00621       }
00622     }
00623   }
00624 
00625   for(Tsize_t i=0; i<permuteToLIDs.size(); ++i) {
00626 
00627     if (isFillComplete()) {
00628       Teuchos::ArrayRCP<const Scalar> src_blkEntries;
00629       Teuchos::ArrayView<const LocalOrdinal> src_blkCols;
00630 
00631       src_mat.getLocalBlockRowView(permuteFromLIDs[i], numPtRows, src_blkCols, src_ptColsPerBlkCol, src_blkEntries);
00632       unsigned offset = 0;
00633       for(Tsize_t j=0; j<src_blkCols.size(); ++j) {
00634         LocalOrdinal numPtCols = src_ptColsPerBlkCol[j];
00635         setLocalBlockEntry(permuteToLIDs[i], src_blkCols[j], numPtRows, numPtCols, numPtCols,
00636                            src_blkEntries.view(offset, numPtRows*numPtCols));
00637         offset += numPtRows*numPtCols;
00638       }
00639     }
00640     else {
00641       GlobalOrdinal gRow = getBlockRowMap()->getGlobalBlockID(permuteToLIDs[i]);
00642       Teuchos::Array<Teuchos::ArrayRCP<const Scalar> > src_blkEntries;
00643       Teuchos::ArrayView<const GlobalOrdinal> src_blkCols;
00644 
00645       src_mat.getGlobalBlockRowView(gRow, numPtRows, src_blkCols, src_ptColsPerBlkCol, src_blkEntries);
00646       for(Tsize_t j=0; j<src_blkCols.size(); ++j) {
00647         LocalOrdinal numPtCols = src_ptColsPerBlkCol[j];
00648         setGlobalBlockEntry(gRow, src_blkCols[j], numPtRows, numPtCols, numPtRows, src_blkEntries[j].view(0, numPtRows*numPtCols));
00649       }
00650     }
00651   }
00652 }
00653 
00654 //-------------------------------------------------------------------
00655 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00656 void
00657 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::packAndPrepare(
00658    const DistObject<char, LocalOrdinal, GlobalOrdinal, Node>& source,
00659    const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs,
00660    Teuchos::Array<char>& exports,
00661    const Teuchos::ArrayView<size_t>& numPacketsPerLID,
00662    size_t& constantNumPackets,
00663    Distributor& distor)
00664 {
00665   const VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>* src_mat_ptr = dynamic_cast<const VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>*>(&source);
00666   if (src_mat_ptr == NULL) {
00667     typedef VbrUtils::VbrDataDist<LocalOrdinal,GlobalOrdinal,Scalar,Node> VDD;
00668     VDD* vdd = const_cast<VDD*>(dynamic_cast<const VDD*>(&source));
00669     if (vdd != NULL) {
00670       vdd->packAndPrepare(source, exportLIDs, exports, numPacketsPerLID, constantNumPackets, distor);
00671     }
00672     else {
00673       throw std::runtime_error("VbrMatrix::packAndPrepare ERROR, dynamic_cast failed.");
00674     }
00675     return;
00676   }
00677 
00678   const VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>& src_mat = *src_mat_ptr;
00679 
00680   typedef typename Teuchos::ArrayView<const LocalOrdinal>::size_type Tsize_t;
00681 
00682   //We will pack each row's data into the exports buffer as follows:
00683   //[num-block-cols,numPtRows,{list-of-blk-cols},{list-of-ptColsPerBlkCol},{all vals}]
00684   //so the length of the char exports buffer for a row is:
00685   //sizeof(LocalOrdinal)*(2+2*(num-block-cols)) + sizeof(Scalar)*numPtRows*sum(numPtCols_i)
00686 
00687   //For each row corresponding to exportLIDs, accumulate the size that it will
00688   //occupy in the exports buffer:
00689   size_t total_exports_size = 0;
00690   for(Tsize_t i=0; i<exportLIDs.size(); ++i) {
00691     Teuchos::Array<LocalOrdinal> src_ptColsPerBlkCol;
00692     LocalOrdinal numPtRows;
00693 
00694     LocalOrdinal numBlkCols = 0;
00695     LocalOrdinal numScalars = 0;
00696 
00697     if (src_mat.isFillComplete()) {
00698       Teuchos::ArrayRCP<const Scalar> src_blkEntries;
00699       Teuchos::ArrayView<const LocalOrdinal> src_blkCols;
00700       src_mat.getLocalBlockRowView(exportLIDs[i], numPtRows,
00701                             src_blkCols, src_ptColsPerBlkCol, src_blkEntries);
00702       numBlkCols = src_blkCols.size();
00703       numScalars = src_blkEntries.size();
00704     }
00705     else {//src_mat.isFillComplete()==false
00706       Teuchos::Array<Teuchos::ArrayRCP<const Scalar> > src_blkEntries;
00707       GlobalOrdinal gRow = src_mat.getBlockRowMap()->getGlobalBlockID(exportLIDs[i]);
00708       Teuchos::ArrayView<const GlobalOrdinal> src_blkCols;
00709       src_mat.getGlobalBlockRowView(gRow, numPtRows, src_blkCols, src_ptColsPerBlkCol, src_blkEntries);
00710       numBlkCols = src_blkCols.size();
00711       for(Tsize_t j=0; j<src_blkEntries.size(); ++j) {
00712         numScalars += src_blkEntries[j].size();
00713       }
00714     }
00715 
00716     size_t size_for_this_row = sizeof(GlobalOrdinal)*(2+2*numBlkCols)
00717                          + sizeof(Scalar)*numScalars;
00718     numPacketsPerLID[i] = size_for_this_row;
00719     total_exports_size += size_for_this_row;
00720   }
00721 
00722   exports.resize(total_exports_size);
00723 
00724   ArrayView<char> avIndsC, avValsC;
00725   ArrayView<Scalar>        avVals;
00726 
00727   size_t offset = 0;
00728 
00729   for(Tsize_t i=0; i<exportLIDs.size(); ++i) {
00730     Teuchos::Array<LocalOrdinal> src_ptColsPerBlkCol;
00731     LocalOrdinal numPtRows;
00732 
00733     LocalOrdinal numBlkCols = 0;
00734     LocalOrdinal numScalars = 0;
00735 
00736     if (src_mat.isFillComplete()) {
00737       Teuchos::ArrayRCP<const Scalar> src_blkEntries;
00738       Teuchos::ArrayView<const LocalOrdinal> src_blkCols;
00739       src_mat.getLocalBlockRowView(exportLIDs[i], numPtRows, src_blkCols, src_ptColsPerBlkCol, src_blkEntries);
00740       numBlkCols = src_blkCols.size();
00741       numScalars = src_blkEntries.size();
00742 
00743       LocalOrdinal num_chars_for_ordinals = (2*numBlkCols+2)*sizeof(LocalOrdinal);
00744       //get export views
00745       avIndsC = exports(offset, num_chars_for_ordinals);
00746       avValsC = exports(offset+ num_chars_for_ordinals, numScalars*sizeof(Scalar));
00747       ArrayView<GlobalOrdinal> avInds = av_reinterpret_cast<GlobalOrdinal>(avIndsC);
00748       typename ArrayView<GlobalOrdinal>::iterator ind_it = avInds.begin();
00749 
00750       const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> >& col_map = src_mat.getBlockColMap();
00751 
00752       //put row info into the buffer views:
00753       *ind_it++ = numBlkCols;
00754       *ind_it++ = numPtRows;
00755       for(Tsize_t j=0; j<src_blkCols.size(); ++j) {
00756         *ind_it++ = col_map->getGlobalBlockID(src_blkCols[j]);
00757       }
00758       std::copy(src_ptColsPerBlkCol.begin(), src_ptColsPerBlkCol.end(), ind_it);
00759       avVals = av_reinterpret_cast<Scalar>(avValsC);
00760       std::copy(src_blkEntries.begin(), src_blkEntries.end(), avVals.begin());
00761     }
00762     else {//src_mat.isFillComplete()==false
00763       Teuchos::Array<Teuchos::ArrayRCP<const Scalar> > src_blkEntries;
00764       GlobalOrdinal gRow = src_mat.getBlockRowMap()->getGlobalBlockID(exportLIDs[i]);
00765       Teuchos::ArrayView<const GlobalOrdinal> src_blkCols;
00766       src_mat.getGlobalBlockRowView(gRow, numPtRows, src_blkCols, src_ptColsPerBlkCol, src_blkEntries);
00767       numBlkCols = src_blkCols.size();
00768       numScalars = 0;
00769       for(Tsize_t j=0; j<src_blkEntries.size(); ++j) {
00770         numScalars += numPtRows*src_ptColsPerBlkCol[j];
00771       }
00772 
00773       LocalOrdinal num_chars_for_ordinals = (2*numBlkCols+2)*sizeof(GlobalOrdinal);
00774       //get export views
00775       avIndsC = exports(offset, num_chars_for_ordinals);
00776       avValsC = exports(offset+ num_chars_for_ordinals, numScalars*sizeof(Scalar));
00777       ArrayView<GlobalOrdinal> avInds = av_reinterpret_cast<GlobalOrdinal>(avIndsC);
00778       typename ArrayView<GlobalOrdinal>::iterator ind_it = avInds.begin();
00779 
00780       //put row info into the buffer views:
00781       *ind_it++ = numBlkCols;
00782       *ind_it++ = numPtRows;
00783       std::copy(src_blkCols.begin(), src_blkCols.end(), ind_it);
00784       ind_it += src_blkCols.size();
00785       std::copy(src_ptColsPerBlkCol.begin(), src_ptColsPerBlkCol.end(), ind_it);
00786       avVals = av_reinterpret_cast<Scalar>(avValsC);
00787       typename ArrayView<Scalar>::iterator val_it = avVals.begin();
00788       for(Tsize_t j=0; j<src_blkEntries.size(); ++j) {
00789         std::copy(src_blkEntries[j].begin(), src_blkEntries[j].end(), val_it);
00790         val_it += src_blkEntries[j].size();
00791       }
00792     }
00793 
00794     size_t size_for_this_row = sizeof(GlobalOrdinal)*(2+2*numBlkCols)
00795                          + sizeof(Scalar)*numScalars;
00796     offset += size_for_this_row;
00797   }
00798 
00799   constantNumPackets = 0;
00800 }
00801 
00802 //-------------------------------------------------------------------
00803 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00804 void
00805 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::unpackAndCombine(
00806     const Teuchos::ArrayView<const LocalOrdinal>& importLIDs,
00807     const Teuchos::ArrayView<const char>& imports,
00808     const Teuchos::ArrayView<size_t>& numPacketsPerLID,
00809     size_t constantNumPackets,
00810     Distributor& distor,
00811     CombineMode CM)
00812 {
00813   typedef typename Teuchos::ArrayView<const LocalOrdinal>::size_type Tsize_t;
00814  
00815   if (CM == Tpetra::ABSMAX) {
00816     std::cout << "Warning, VbrMatrix Import/Export doesn't support combine-mode==ABSMAX; use ADD, INSERT or REPLACE. (REPLACE is being used now.)"<<std::endl;
00817   }
00818 
00819   size_t offset = 0;
00820   for(Tsize_t i=0; i<importLIDs.size(); ++i) {
00821     ArrayView<const char> avC = imports.view(offset, numPacketsPerLID[i]);
00822     ArrayView<const GlobalOrdinal> avOrds = av_reinterpret_cast<const GlobalOrdinal>(avC);
00823     GlobalOrdinal gRow = this->getBlockRowMap()->getGlobalBlockID(importLIDs[i]);
00824     size_t avOrdsOffset = 0;
00825     LocalOrdinal numBlkCols = avOrds[avOrdsOffset++];
00826     LocalOrdinal numPtRows = avOrds[avOrdsOffset++];
00827     LocalOrdinal num_chars_for_ordinals = (2*numBlkCols+2)*sizeof(GlobalOrdinal);
00828     ArrayView<const char> avValsC = imports.view(offset+num_chars_for_ordinals, numPacketsPerLID[i]-num_chars_for_ordinals);
00829     ArrayView<const Scalar> avVals = av_reinterpret_cast<const Scalar>(avValsC);
00830 
00831     size_t avValsOffset = 0;
00832     for(Tsize_t j=0; j<numBlkCols; ++j) {
00833       GlobalOrdinal blkCol = avOrds[avOrdsOffset+j];
00834       LocalOrdinal numPtCols = avOrds[avOrdsOffset+numBlkCols+j];
00835       LocalOrdinal LDA = numPtRows;
00836       if (CM == Tpetra::ADD) {
00837         sumIntoGlobalBlockEntry(gRow, blkCol, numPtRows, numPtCols, LDA,
00838                          avVals.view(avValsOffset, numPtRows*numPtCols));
00839       }
00840       else if (CM == Tpetra::INSERT || CM == Tpetra::REPLACE) {
00841         setGlobalBlockEntry(gRow, blkCol, numPtRows, numPtCols, LDA,
00842                            avVals.view(avValsOffset, numPtRows*numPtCols));
00843       }
00844       else {
00845 //    std::cout << "Warning, VbrMatrix Import/Export doesn't support combine-mode==ABSMAX; use ADD, INSERT or REPLACE. (REPLACE is being used now.)"<<std::endl;
00846         setGlobalBlockEntry(gRow, blkCol, numPtRows, numPtCols, LDA,
00847                            avVals.view(avValsOffset, numPtRows*numPtCols));
00848       }
00849       avValsOffset += numPtRows*numPtCols;
00850     }
00851   }
00852 }
00853 
00854 //-------------------------------------------------------------------
00855 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00856 void
00857 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getLocalBlockEntryView(
00858       LocalOrdinal localBlockRow,
00859       LocalOrdinal localBlockCol,
00860       LocalOrdinal& numPtRows,
00861       LocalOrdinal& numPtCols,
00862       Teuchos::ArrayRCP<const Scalar>& blockEntry) const
00863 {
00864   typedef Teuchos::ArrayRCP<const size_t>       Host_View;
00865   typedef Teuchos::ArrayRCP<const LocalOrdinal> Host_View_LO;
00866   typedef typename Host_View_LO::iterator ITER;
00867   //This method returns a constant view of a block-entry (as an ArrayRCP).
00868 
00869   TEST_FOR_EXCEPTION(isFillComplete() == false, std::runtime_error,
00870    "Tpetra::VbrMatrix::getLocalBlockEntryView ERROR, this method can only be called after fillComplete() has been called.");
00871 
00872   TEST_FOR_EXCEPTION(is_storage_optimized_ == false, std::runtime_error,
00873    "Tpetra::VbrMatrix::getLocalBlockEntryView ERROR, this method can only be called if storage is optimized.");
00874 
00875   Teuchos::RCP<Node> node = getNode();
00876 
00877   Host_View bpr = constBlkGraph_->getNodeRowOffsets();
00878   const size_t bindx_offset = bpr[localBlockRow];
00879   const size_t length       = bpr[localBlockRow+1] - bindx_offset;
00880 
00881   Host_View_LO bindx = constBlkGraph_->getNodePackedIndices();
00882   ITER bindx_beg = bindx.begin()+bindx_offset, 
00883        bindx_end = bindx_beg + length;
00884   ITER it = std::lower_bound(bindx_beg, bindx_end, localBlockCol);
00885 
00886   TEST_FOR_EXCEPTION(it == bindx_end || *it != localBlockCol, std::runtime_error,
00887     "Tpetra::VbrMatrix::getLocalBlockEntryView ERROR, specified localBlockCol not found.");
00888 
00889   numPtRows = getBlockRowMap()->getLocalBlockSize(localBlockRow);
00890   numPtCols = getBlockColMap()->getLocalBlockSize(localBlockCol);
00891 
00892   const LocalOrdinal blkSize = numPtRows*numPtCols;
00893   Host_View_LO indx = node->template viewBuffer<LocalOrdinal>(1, pbuf_indx_+bpr[localBlockRow]+(it-bindx_beg));
00894   const LocalOrdinal offset = indx[0];
00895   blockEntry = node->template viewBuffer<Scalar>(blkSize, pbuf_values1D_ + offset);
00896 }
00897 
00898 //-------------------------------------------------------------------
00899 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00900 Teuchos::RCP<Node>
00901 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getNode() const
00902 {
00903   return getBlockRowMap()->getPointMap()->getNode();
00904 }
00905 
00906 //-------------------------------------------------------------------
00907 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00908 void
00909 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::putScalar(Scalar s)
00910 {
00911   Teuchos::RCP<Node> node = getNode();
00912 
00913   if (isFillComplete()) {
00914     fill_device_ArrayRCP(node, pbuf_values1D_, s);
00915   }
00916   else {
00917     typedef typename Teuchos::Array<RowGlobalCols>::size_type Tsize_t;
00918     Teuchos::Array<RowGlobalCols>& rows = *data_2D_;
00919     Tsize_t numBlockRows = rows.size();
00920     for(Tsize_t r=0; r<numBlockRows; ++r) {
00921       typename RowGlobalCols::iterator
00922         col_iter = rows[r].begin(),
00923         col_end  = rows[r].end();
00924       for(; col_iter != col_end; ++col_iter) {
00925         Teuchos::ArrayRCP<Scalar>& vals = col_iter->second;
00926         std::fill(vals.begin(), vals.end(), s);
00927       }
00928     }
00929   }
00930 }
00931 
00932 //-------------------------------------------------------------------
00933 template<class ArrayType1,class ArrayType2,class Ordinal>
00934 void set_array_values(ArrayType1& dest, ArrayType2& src, Ordinal rows, Ordinal cols, Ordinal stride, Tpetra::CombineMode mode)
00935 {
00936   size_t offset = 0;
00937   size_t src_offset = 0;
00938 
00939   if (mode == ADD) {
00940     for(Ordinal col=0; col<cols; ++col) {
00941       for(Ordinal row=0; row<rows; ++row) {
00942         dest[offset++] += src[src_offset + row];
00943       }
00944       src_offset += stride;
00945     }
00946   }
00947   else {
00948     for(Ordinal col=0; col<cols; ++col) {
00949       for(Ordinal row=0; row<rows; ++row) {
00950         dest[offset++] = src[src_offset + row];
00951       }
00952       src_offset += stride;
00953     }
00954   }
00955 }
00956 
00957 //-------------------------------------------------------------------
00958 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00959 void
00960 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::setGlobalBlockEntry(GlobalOrdinal globalBlockRow, GlobalOrdinal globalBlockCol, const Teuchos::SerialDenseMatrix<GlobalOrdinal,Scalar>& blockEntry)
00961 {
00962   //first get an ArrayRCP for the internal storage for this block-entry:
00963   Teuchos::ArrayRCP<Scalar> internalBlockEntry;
00964   LocalOrdinal blkRowSize = blockEntry.numRows();
00965   LocalOrdinal blkColSize = blockEntry.numCols();
00966   getGlobalBlockEntryViewNonConst(globalBlockRow,globalBlockCol, blkRowSize, blkColSize, internalBlockEntry);
00967 
00968   //now copy the incoming block-entry into internal storage:
00969   const Scalar* inputvalues = blockEntry.values();
00970   set_array_values(internalBlockEntry, inputvalues, blkRowSize, blkColSize, blockEntry.stride(), REPLACE);
00971 }
00972 
00973 //-------------------------------------------------------------------
00974 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00975 void
00976 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::setLocalBlockEntry(LocalOrdinal localBlockRow, LocalOrdinal localBlockCol, const Teuchos::SerialDenseMatrix<LocalOrdinal,Scalar>& blockEntry)
00977 {
00978   //first get an ArrayRCP for the internal storage for this block-entry:
00979   Teuchos::ArrayRCP<Scalar> internalBlockEntry;
00980   LocalOrdinal blkRowSize = blockEntry.numRows();
00981   LocalOrdinal blkColSize = blockEntry.numCols();
00982   getLocalBlockEntryViewNonConst(localBlockRow,localBlockCol, blkRowSize, blkColSize, internalBlockEntry);
00983 
00984   //now copy the incoming block-entry into internal storage:
00985   const Scalar* inputvalues = blockEntry.values();
00986   set_array_values(internalBlockEntry, inputvalues, blkRowSize, blkColSize, blockEntry.stride(), REPLACE);
00987 }
00988 
00989 //-------------------------------------------------------------------
00990 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00991 void
00992 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::sumIntoGlobalBlockEntry(GlobalOrdinal globalBlockRow, GlobalOrdinal globalBlockCol, const Teuchos::SerialDenseMatrix<GlobalOrdinal,Scalar>& blockEntry)
00993 {
00994   //first get an ArrayRCP for the internal storage for this block-entry:
00995   Teuchos::ArrayRCP<Scalar> internalBlockEntry;
00996   LocalOrdinal blkRowSize = blockEntry.numRows();
00997   LocalOrdinal blkColSize = blockEntry.numCols();
00998   getGlobalBlockEntryViewNonConst(globalBlockRow,globalBlockCol, blkRowSize, blkColSize, internalBlockEntry);
00999 
01000   //now sum (add) the incoming block-entry into internal storage:
01001   const Scalar* inputvalues = blockEntry.values();
01002   set_array_values(internalBlockEntry, inputvalues, blkRowSize, blkColSize, blockEntry.stride(), ADD);
01003 }
01004 
01005 //-------------------------------------------------------------------
01006 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
01007 void
01008 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::sumIntoLocalBlockEntry(LocalOrdinal localBlockRow, LocalOrdinal localBlockCol, const Teuchos::SerialDenseMatrix<LocalOrdinal,Scalar>& blockEntry)
01009 {
01010   //first get an ArrayRCP for the internal storage for this block-entry:
01011   Teuchos::ArrayRCP<Scalar> internalBlockEntry;
01012   LocalOrdinal blkRowSize = blockEntry.numRows();
01013   LocalOrdinal blkColSize = blockEntry.numCols();
01014   getLocalBlockEntryViewNonConst(localBlockRow,localBlockCol, blkRowSize, blkColSize, internalBlockEntry);
01015 
01016   //now sum (add) the incoming block-entry into internal storage:
01017   const Scalar* inputvalues = blockEntry.values();
01018   set_array_values(internalBlockEntry, inputvalues, blkRowSize, blkColSize, blockEntry.stride(), ADD);
01019 }
01020 
01021 //-------------------------------------------------------------------
01022 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
01023 void
01024 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::setGlobalBlockEntry(GlobalOrdinal globalBlockRow, GlobalOrdinal globalBlockCol, LocalOrdinal blkRowSize, LocalOrdinal blkColSize, LocalOrdinal LDA, const Teuchos::ArrayView<const Scalar>& blockEntry)
01025 {
01026   //first get an ArrayRCP for the internal storage for this block-entry:
01027   Teuchos::ArrayRCP<Scalar> internalBlockEntry;
01028   getGlobalBlockEntryViewNonConst(globalBlockRow,globalBlockCol, blkRowSize, blkColSize, internalBlockEntry);
01029 
01030   LocalOrdinal blk_size = blockEntry.size();
01031   TEST_FOR_EXCEPTION(blkColSize*LDA > blk_size, std::runtime_error,
01032     "Tpetra::VbrMatrix::setGlobalBlockEntry ERROR, inconsistent block-entry sizes.");
01033 
01034   //copy the incoming block-entry into internal storage:
01035   set_array_values(internalBlockEntry, blockEntry, blkRowSize, blkColSize, LDA, REPLACE);
01036 }
01037 
01038 //-------------------------------------------------------------------
01039 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
01040 void
01041 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::setLocalBlockEntry(LocalOrdinal localBlockRow, LocalOrdinal localBlockCol, LocalOrdinal blkRowSize, LocalOrdinal blkColSize, LocalOrdinal LDA, const Teuchos::ArrayView<const Scalar>& blockEntry)
01042 {
01043   //first get an ArrayRCP for the internal storage for this block-entry:
01044   Teuchos::ArrayRCP<Scalar> internalBlockEntry;
01045   getLocalBlockEntryViewNonConst(localBlockRow,localBlockCol, blkRowSize, blkColSize, internalBlockEntry);
01046 
01047   LocalOrdinal blk_size = blockEntry.size();
01048   TEST_FOR_EXCEPTION(blkColSize*LDA > blk_size, std::runtime_error,
01049     "Tpetra::VbrMatrix::setLocalBlockEntry ERROR, inconsistent block-entry sizes.");
01050 
01051   //copy the incoming block-entry into internal storage:
01052   set_array_values(internalBlockEntry, blockEntry, blkRowSize, blkColSize, LDA, REPLACE);
01053 }
01054 
01055 //-------------------------------------------------------------------
01056 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
01057 void
01058 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::sumIntoGlobalBlockEntry(GlobalOrdinal globalBlockRow, GlobalOrdinal globalBlockCol, LocalOrdinal blkRowSize, LocalOrdinal blkColSize, LocalOrdinal LDA, const Teuchos::ArrayView<const Scalar>& blockEntry)
01059 {
01060   //first get an ArrayRCP for the internal storage for this block-entry:
01061   Teuchos::ArrayRCP<Scalar> internalBlockEntry;
01062   getGlobalBlockEntryViewNonConst(globalBlockRow,globalBlockCol, blkRowSize, blkColSize, internalBlockEntry);
01063 
01064   LocalOrdinal blk_size = blockEntry.size();
01065   TEST_FOR_EXCEPTION(blkColSize*LDA > blk_size, std::runtime_error,
01066     "Tpetra::VbrMatrix::setGlobalBlockEntry ERROR, inconsistent block-entry sizes.");
01067 
01068   //copy the incoming block-entry into internal storage:
01069   set_array_values(internalBlockEntry, blockEntry, blkRowSize, blkColSize, LDA, ADD);
01070 }
01071 
01072 //-------------------------------------------------------------------
01073 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
01074 void
01075 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::sumIntoLocalBlockEntry(LocalOrdinal localBlockRow, LocalOrdinal localBlockCol, LocalOrdinal blkRowSize, LocalOrdinal blkColSize, LocalOrdinal LDA, const Teuchos::ArrayView<const Scalar>& blockEntry)
01076 {
01077   //first get an ArrayRCP for the internal storage for this block-entry:
01078   Teuchos::ArrayRCP<Scalar> internalBlockEntry;
01079   getLocalBlockEntryViewNonConst(localBlockRow,localBlockCol, blkRowSize, blkColSize, internalBlockEntry);
01080 
01081   LocalOrdinal blk_size = blockEntry.size();
01082   TEST_FOR_EXCEPTION(blkColSize*LDA > blk_size, std::runtime_error,
01083     "Tpetra::VbrMatrix::setLocalBlockEntry ERROR, inconsistent block-entry sizes.");
01084 
01085   //copy the incoming block-entry into internal storage:
01086   set_array_values(internalBlockEntry, blockEntry, blkRowSize, blkColSize, LDA, ADD);
01087 }
01088 
01089 //-------------------------------------------------------------------
01090 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
01091 void
01092 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::optimizeStorage()
01093 {
01094   typedef Teuchos::ArrayRCP<const size_t> Host_View;
01095   typedef Teuchos::ArrayRCP<const LocalOrdinal> Host_View_LO;
01096   typedef typename Teuchos::ArrayRCP<size_t>::size_type Tsize_t;
01097 
01098   if (is_storage_optimized_ == true) return;
01099 
01100   TEST_FOR_EXCEPTION(constBlkGraph_->isFillComplete() != true, std::runtime_error,
01101     "Tpetra::VbrMatrix::optimizeStorage ERROR, isFillComplete() is false, required to be true before optimizeStorage is called.");
01102 
01103   size_t num_block_nonzeros = constBlkGraph_->getNodeNumBlockEntries();
01104 
01105   const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> >& rowmap = constBlkGraph_->getBlockRowMap();
01106   const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> >& colmap = constBlkGraph_->getBlockColMap();
01107 
01108   const Teuchos::RCP<Node>& node = getBlockRowMap()->getPointMap()->getNode();
01109 
01110   //need to count the number of point-entries:
01111   size_t num_point_entries = 0;
01112   Host_View bptr = constBlkGraph_->getNodeRowOffsets();
01113 
01114   if (bptr.size() == 0) {
01115     is_storage_optimized_ = true;
01116     return;
01117   }
01118 
01119   Host_View_LO bindx = constBlkGraph_->getNodePackedIndices();
01120 
01121   for(Tsize_t r=0; r<bptr.size()-1; ++r) {
01122     size_t rbeg = bptr[r];
01123     size_t rend = bptr[r+1];
01124 
01125     LocalOrdinal rsize = rowmap->getLocalBlockSize(r);
01126 
01127     for(size_t j=rbeg; j<rend; ++j) {
01128       LocalOrdinal csize = colmap->getLocalBlockSize(bindx[j]);
01129       num_point_entries += rsize*csize;
01130     }
01131   }
01132 
01133   pbuf_indx_ = node->template allocBuffer<LocalOrdinal>(num_block_nonzeros+1);
01134   pbuf_values1D_ = node->template allocBuffer<Scalar>(num_point_entries);
01135 
01136   Teuchos::ArrayRCP<LocalOrdinal> view_indx = node->template viewBufferNonConst<LocalOrdinal>(Kokkos::WriteOnly, num_block_nonzeros+1, pbuf_indx_);
01137   Teuchos::ArrayRCP<Scalar> view_values1D = node->template viewBufferNonConst<Scalar>(Kokkos::WriteOnly, num_point_entries, pbuf_values1D_);
01138 
01139   bool have_2D_data = data_2D_->size() > 0;
01140   Scalar zero = Teuchos::ScalarTraits<Scalar>::zero();
01141 
01142   size_t ioffset = 0;
01143   size_t offset = 0;
01144   for(Tsize_t r=0; r<bptr.size()-1; ++r) {
01145     LocalOrdinal rsize = rowmap->getLocalBlockSize(r);
01146 
01147     RowGlobalCols* blk_row = have_2D_data ? &((*data_2D_)[r]) : NULL;
01148 
01149     for(size_t c=bptr[r]; c<bptr[r+1]; ++c) {
01150       view_indx[ioffset++] = offset;
01151 
01152       LocalOrdinal csize = colmap->getLocalBlockSize(bindx[c]);
01153       Tsize_t blkSize = rsize*csize;
01154 
01155       if (blk_row != NULL) {
01156         GlobalOrdinal global_col = colmap->getGlobalBlockID(bindx[c]);
01157         typename RowGlobalCols::iterator iter = blk_row->find(global_col);
01158         TEST_FOR_EXCEPTION(iter == blk_row->end(), std::runtime_error,
01159           "Tpetra::VbrMatrix::optimizeStorage ERROR, global_col not found in row.");
01160   
01161         Teuchos::ArrayRCP<Scalar> vals = iter->second;
01162         for(Tsize_t n=0; n<blkSize; ++n) {
01163           view_values1D[offset+n] = vals[n];
01164         }
01165       }
01166       else {
01167         for(Tsize_t n=0; n<blkSize; ++n) view_values1D[offset+n] = zero;
01168       }
01169       offset += blkSize;
01170     }
01171   }
01172   view_indx[ioffset] = offset;
01173 
01174   //Final step: release memory for the "2D" storage:
01175   data_2D_ = Teuchos::null;
01176 
01177   is_storage_optimized_ = true;
01178 }
01179 
01180 //-------------------------------------------------------------------
01181 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
01182 void VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::fillLocalMatrix()
01183 {
01184   //We insist that optimzeStorage has already been called.
01185   //We don't care whether this function (fillLocalMatrix()) is being
01186   //called for the first time or not.
01187   TEST_FOR_EXCEPTION(is_storage_optimized_ != true, std::runtime_error,
01188     "Tpetra::VbrMatrix::fillLocalMatrix ERROR, optimizeStorage is required to have already been called.");
01189 
01190   Teuchos::ArrayRCP<const size_t      > nodeRowOffsets = constBlkGraph_->getNodeRowOffsets();
01191   Teuchos::ArrayRCP<const LocalOrdinal> nodePackedInds = constBlkGraph_->getNodePackedIndices();
01192   if (Node::isHostNode == false) {
01193     RCP<Node> node = getRangeMap()->getNode();
01194     //
01195     Teuchos::ArrayRCP<size_t> dev_nodeRowOffsets = node->template allocBuffer<size_t>(nodeRowOffsets.size());
01196     node->copyToBuffer(nodeRowOffsets.size(), nodeRowOffsets(), dev_nodeRowOffsets);
01197     nodeRowOffsets = dev_nodeRowOffsets;
01198     //
01199     Teuchos::ArrayRCP<LocalOrdinal> dev_nodePackedInds = node->template allocBuffer<LocalOrdinal>(nodePackedInds.size());
01200     node->copyToBuffer(nodePackedInds.size(), nodePackedInds(), dev_nodePackedInds);
01201     nodePackedInds = dev_nodePackedInds;
01202   }
01203   lclMatrix_.setPackedValues(pbuf_values1D_,
01204                              getBlockRowMap()->getNodeFirstPointInBlocks_Device(),
01205                              getBlockColMap()->getNodeFirstPointInBlocks_Device(),
01206                              nodeRowOffsets,
01207                              nodePackedInds,
01208                              pbuf_indx_);
01209 }
01210 
01211 //-------------------------------------------------------------------
01212 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
01213 void VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::fillLocalMatVec()
01214 {
01215   //We insist that optimzeStorage has already been called.
01216   //We don't care whether this function (fillLocalMatVec()) is being
01217   //called for the first time or not.
01218   TEST_FOR_EXCEPTION(is_storage_optimized_ != true, std::runtime_error,
01219     "Tpetra::VbrMatrix::fillLocalMatrix ERROR, optimizeStorage is required to have already been called.");
01220 
01221   lclMatOps_.initializeValues(lclMatrix_);
01222 }
01223 
01224 //-------------------------------------------------------------------
01225 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
01226 void
01227 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::createImporterExporter()
01228 {
01229   typedef typename Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > PtMap;
01230 
01231   const PtMap& ptDomMap = (constBlkGraph_->getBlockDomainMap())->getPointMap();
01232   const PtMap& ptRngMap = (constBlkGraph_->getBlockRangeMap())->getPointMap();
01233   const PtMap& ptRowMap = (constBlkGraph_->getBlockRowMap())->getPointMap();
01234   const PtMap& ptColMap = (constBlkGraph_->getBlockColMap())->getPointMap();
01235 
01236   if (!ptDomMap->isSameAs(*ptColMap)) {
01237     importer_ = Teuchos::rcp(new Tpetra::Import<LocalOrdinal,GlobalOrdinal,Node>(ptDomMap, ptColMap));
01238   }
01239   if (!ptRngMap->isSameAs(*ptRowMap)) {
01240     exporter_ = Teuchos::rcp(new Tpetra::Export<LocalOrdinal,GlobalOrdinal,Node>(ptRowMap, ptRngMap));
01241   }
01242 }
01243 
01244 //-------------------------------------------------------------------
01245 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
01246 void
01247 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::fillComplete(const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> >& blockDomainMap, const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> >& blockRangeMap)
01248 {
01249   if (isFillComplete()) return;
01250 
01251   globalAssemble();
01252 
01253   blkGraph_->fillComplete(blockDomainMap, blockRangeMap);
01254 
01255   createImporterExporter();
01256 
01257   is_fill_completed_ = true;
01258 
01259   optimizeStorage();
01260 
01261   fillLocalMatrix();
01262   fillLocalMatVec();
01263 }
01264 
01265 //-------------------------------------------------------------------
01266 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
01267 void VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::fillComplete()
01268 {
01269   //In this case, our block-row-map will also be our domain-map and
01270   //range-map.
01271 
01272   fillComplete(getBlockRowMap(), getBlockRowMap());
01273 }
01274 
01275 //-------------------------------------------------------------------
01276 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
01277 void VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::globalAssemble()
01278 {
01279   if (getPointRowMap()->getComm()->getSize() == 1) {
01280     return;
01281   }
01282 
01283   //nonlocal_data_ contains data that belongs on other processors.
01284   //We'll refer to this as overlapping data, and create an 'overlapMap'
01285   //that describes the layout of this overlapping data.
01286   Teuchos::RCP<const Tpetra::BlockMap<LocalOrdinal,GlobalOrdinal,Node> >
01287      overlapMap = VbrUtils::createOverlapMap(nonlocal_data_, *getBlockRowMap());
01288 
01289   if (overlapMap->getGlobalNumBlocks() == 0) {
01290     return;
01291   }
01292 
01293   //VbrDataDist is a wrapper that makes our overlapping data behave
01294   //like a DistObject so that the overlapping data can be exported
01295   //to the owning processors in a standard way.
01296   VbrUtils::VbrDataDist<LocalOrdinal,GlobalOrdinal,Scalar,Node>
01297       vdd(nonlocal_data_, *overlapMap, *this->getBlockRowMap());
01298 
01299   //Create an exporter where the source map is our overlapMap and the
01300   //target map is the rowmap of our VbrMatrix.
01301   Tpetra::Import<LocalOrdinal,GlobalOrdinal,Node>
01302       importer( vdd.getMap(), convertBlockMapToPointMap(*getBlockRowMap()));
01303 
01304   //Communicate the overlapping data to the owning processors and add it
01305   //into this VbrMatrix.
01306   this->doImport(vdd, importer, Tpetra::ADD);
01307 
01308   //Zero out the overlapping data so it can be re-populated and re-assembled
01309   //in future calls to globalAssemble.
01310   VbrUtils::zeroEntries(nonlocal_data_);
01311 }
01312 
01313 //-------------------------------------------------------------------
01314 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
01315 std::string
01316 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::description() const
01317 {
01318   std::ostringstream oss;
01319   oss << Teuchos::Describable::description();
01320   if (isFillComplete()) {
01321     oss << "{status = fill complete, global num block rows = " << getBlockRowMap()->getGlobalNumBlocks() << "}";
01322   }
01323   else {
01324     oss << "{status = fill not complete, global num block rows = " << getBlockRowMap()->getGlobalNumBlocks() << "}";
01325   }
01326   return oss.str();
01327 }
01328 
01329 //-------------------------------------------------------------------
01330 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
01331 void
01332 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel) const
01333 {
01334   Teuchos::EVerbosityLevel vl = verbLevel;
01335   if (vl == Teuchos::VERB_DEFAULT) vl = Teuchos::VERB_LOW;
01336 
01337   if (vl == Teuchos::VERB_NONE) return;
01338 
01339   Teuchos::RCP<const Teuchos::Comm<int> > comm = getBlockRowMap()->getPointMap()->getComm();
01340   const int myProc = comm->getRank();
01341 
01342   if (myProc == 0) out << "VbrMatrix::describe is under construction..." << std::endl;
01343 }
01344 
01345 }//namespace Tpetra
01346 
01347 //
01348 // Explicit instantiation macro
01349 //
01350 // Must be expanded from within the Tpetra namespace!
01351 //
01352 
01353 #define TPETRA_VBRMATRIX_INSTANT(SCALAR,LO,GO,NODE) \
01354   \
01355   template class VbrMatrix< SCALAR , LO , GO , NODE >;
01356 
01357 #endif //TPETRA_VBRMATRIX_DEF_HPP
01358 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines