Tpetra Matrix/Vector Services Version of the Day
Tpetra_VbrMatrix_def.hpp
00001 // @HEADER
00002 // ***********************************************************************
00003 // 
00004 //          Tpetra: Templated Linear Algebra Services Package
00005 //                 Copyright (2008) Sandia Corporation
00006 // 
00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00008 // the U.S. Government retains certain rights in this software.
00009 // 
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00038 // 
00039 // ************************************************************************
00040 // @HEADER
00041 
00042 #ifndef TPETRA_VBRMATRIX_DEF_HPP
00043 #define TPETRA_VBRMATRIX_DEF_HPP
00044 
00045 #include <algorithm>
00046 #include <sstream>
00047 
00048 #include <Kokkos_NodeHelpers.hpp>
00049 #include <Tpetra_Vector.hpp>
00050 
00051 #ifdef DOXYGEN_USE_ONLY
00052 #include "Tpetra_VbrMatrix_decl.hpp"
00053 #endif
00054 
00055 namespace Tpetra {
00056 
00057 template<class Scalar,class Node>
00058 void fill_device_ArrayRCP(Teuchos::RCP<Node>& node, Teuchos::ArrayRCP<Scalar>& ptr, Scalar value)
00059 {
00060   Kokkos::ReadyBufferHelper<Node> rbh(node);
00061   Kokkos::InitOp<Scalar> wdp;
00062   wdp.alpha = value;
00063   rbh.begin();
00064   wdp.x = rbh.addNonConstBuffer(ptr);
00065   rbh.end();
00066   node->template parallel_for<Kokkos::InitOp<Scalar> >(0, ptr.size(), wdp);
00067 }
00068 
00069 //-------------------------------------------------------------------
00070 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00071 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::VbrMatrix(const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> > &blkRowMap, size_t maxNumEntriesPerRow, ProfileType pftype)
00072  : DistObject<char, LocalOrdinal, GlobalOrdinal, Node>(convertBlockMapToPointMap(*blkRowMap)),
00073    blkGraph_(Teuchos::rcp(new BlockCrsGraph<LocalOrdinal,GlobalOrdinal,Node>(blkRowMap, maxNumEntriesPerRow, pftype))),
00074    constBlkGraph_(blkGraph_),
00075    lclMatrix_(blkRowMap->getNodeNumBlocks(), blkRowMap->getPointMap()->getNode()),
00076    pbuf_values1D_(),
00077    pbuf_indx_(),
00078    lclMatOps_(blkRowMap->getPointMap()->getNode()),
00079    importer_(),
00080    exporter_(),
00081    importedVec_(),
00082    exportedVec_(),
00083    data_2D_(Teuchos::rcp(new Teuchos::Array<RowGlobalCols>)),
00084    nonlocal_data_(),
00085    is_fill_completed_(false),
00086    is_storage_optimized_(false)
00087 {
00088   //The graph of this VBR matrix is a BlockCrsGraph, which is a CrsGraph where
00089   //each entry in the graph corresponds to a block-entry in the matrix.
00090   //That is, you can think of a VBR matrix as a Crs matrix of dense
00091   //submatrices...
00092 }
00093 
00094 //-------------------------------------------------------------------
00095 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00096 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::VbrMatrix(const Teuchos::RCP<const BlockCrsGraph<LocalOrdinal,GlobalOrdinal,Node> > &blkGraph)
00097  : DistObject<char, LocalOrdinal, GlobalOrdinal, Node>(convertBlockMapToPointMap(*blkGraph->getBlockRowMap())),
00098    blkGraph_(),
00099    constBlkGraph_(blkGraph),
00100    lclMatrix_(blkGraph->getBlockRowMap()->getNodeNumBlocks(),
00101               blkGraph->getBlockRowMap()->getPointMap()->getNode()),
00102    pbuf_values1D_(),
00103    pbuf_indx_(),
00104    lclMatOps_(blkGraph->getBlockRowMap()->getPointMap()->getNode()),
00105    importer_(),
00106    exporter_(),
00107    importedVec_(),
00108    exportedVec_(),
00109    data_2D_(Teuchos::rcp(new Teuchos::Array<RowGlobalCols>)),
00110    nonlocal_data_(),
00111    is_fill_completed_(false),
00112    is_storage_optimized_(false)
00113 {
00114   //The graph of this VBR matrix is a BlockCrsGraph, which is a CrsGraph where
00115   //each entry in the graph corresponds to a block-entry in the matrix.
00116   //That is, you can think of a VBR matrix as a Crs matrix of dense
00117   //submatrices...
00118 
00119   TEUCHOS_TEST_FOR_EXCEPTION(blkGraph->isFillComplete() == false, std::runtime_error,
00120    "Tpetra::VbrMatrix::VbrMatrix(BlockCrsGraph) ERROR, this constructor requires graph.isFillComplete()==true.");
00121 
00122   createImporterExporter();
00123 
00124   is_fill_completed_ = true;
00125 
00126   optimizeStorage();
00127 
00128   fillLocalMatrix();
00129   fillLocalMatVec();
00130 }
00131 
00132 //-------------------------------------------------------------------
00133 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00134 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::~VbrMatrix()
00135 {
00136 }
00137 
00138 //-------------------------------------------------------------------
00139 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00140 const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > &
00141 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getDomainMap() const
00142 {
00143   return getBlockDomainMap()->getPointMap();
00144 }
00145 
00146 //-------------------------------------------------------------------
00147 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00148 const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > &
00149 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getRangeMap() const
00150 {
00151   return getBlockRangeMap()->getPointMap();
00152 }
00153 
00154 //-------------------------------------------------------------------
00155 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00156 bool
00157 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::isFillComplete() const
00158 {
00159   return is_fill_completed_;
00160 }
00161 
00162 //-------------------------------------------------------------------
00163 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00164 template <class DomainScalar, class RangeScalar>
00165 void
00166 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::multiply(const MultiVector<DomainScalar,LocalOrdinal,GlobalOrdinal,Node> & X, MultiVector<RangeScalar,LocalOrdinal,GlobalOrdinal,Node> &Y, Teuchos::ETransp trans, RangeScalar alpha, RangeScalar beta) const
00167 {
00168   TEUCHOS_TEST_FOR_EXCEPTION(!isFillComplete(), std::runtime_error,
00169     "Tpetra::VbrMatrix::multiply ERROR, multiply may only be called after fillComplete has been called.");
00170 
00171   const Kokkos::MultiVector<Scalar,Node> *lclX = &X.getLocalMV();
00172   Kokkos::MultiVector<Scalar,Node>        *lclY = &Y.getLocalMVNonConst();
00173 
00174   lclMatOps_.template multiply<Scalar,Scalar>(trans,alpha,*lclX,beta,*lclY);
00175 }
00176 
00177 //-------------------------------------------------------------------
00178 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00179 template<class DomainScalar, class RangeScalar>
00180 void
00181 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::solve(const MultiVector<RangeScalar,LocalOrdinal,GlobalOrdinal,Node>& Y, MultiVector<DomainScalar,LocalOrdinal,GlobalOrdinal,Node>& X, Teuchos::ETransp trans) const
00182 {
00183   TEUCHOS_TEST_FOR_EXCEPTION(X.isConstantStride() == false || Y.isConstantStride() == false, std::runtime_error,
00184         "Tpetra::VbrMatrix::solve(X,Y): X and Y must be constant stride.");
00185 
00186   TEUCHOS_TEST_FOR_EXCEPTION(!isFillComplete(), std::runtime_error,
00187     "Tpetra::VbrMatrix::solve ERROR, solve may only be called after fillComplete has been called.");
00188 
00189   TEUCHOS_TEST_FOR_EXCEPTION(constBlkGraph_->isUpperTriangular()==false && constBlkGraph_->isLowerTriangular()==false, std::runtime_error,
00190     "Tpetra::VbrMatrix::solve ERROR, matrix must be either upper or lower triangular.");
00191 
00192   const Kokkos::MultiVector<RangeScalar,Node> *lclY = &Y.getLocalMV();
00193   Kokkos::MultiVector<DomainScalar,Node>      *lclX = &X.getLocalMVNonConst();
00194 
00195   Teuchos::EUplo triang = constBlkGraph_->isUpperTriangular() ? Teuchos::UPPER_TRI : Teuchos::LOWER_TRI;
00196   Teuchos::EDiag diag = (constBlkGraph_->getNodeNumBlockDiags() < constBlkGraph_->getNodeNumBlockRows()) ? Teuchos::UNIT_DIAG : Teuchos::NON_UNIT_DIAG;
00197 
00198   lclMatOps_.template solve<DomainScalar,RangeScalar>(trans,triang,diag, *lclY,*lclX);
00199 }
00200 
00201 //-------------------------------------------------------------------
00202 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00203 void
00204 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::updateImport(const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& X) const
00205 {
00206   if (importer_ != Teuchos::null) {
00207     if (importedVec_ == Teuchos::null || importedVec_->getNumVectors() != X.getNumVectors()) {
00208       importedVec_ = Teuchos::rcp(new MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>(getBlockColMap()->getPointMap(), X.getNumVectors()));
00209     }
00210 
00211     importedVec_->doImport(X, *importer_, REPLACE);
00212   }
00213 }
00214 
00215 //-------------------------------------------------------------------
00216 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00217 void
00218 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::updateExport(const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& Y) const
00219 {
00220   if (exporter_ != Teuchos::null) {
00221     if (exportedVec_ == Teuchos::null || exportedVec_->getNumVectors() != Y.getNumVectors()) {
00222       exportedVec_ = Teuchos::rcp(new MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>(getBlockColMap()->getPointMap(), Y.getNumVectors()));
00223     }
00224 
00225     exportedVec_->doExport(Y, *exporter_, REPLACE);
00226   }
00227 }
00228 
00229 //-------------------------------------------------------------------
00230 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00231 void
00232 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::apply(
00233          const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &X,
00234                MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &Y,
00235                Teuchos::ETransp trans,
00236                Scalar alpha,
00237                Scalar beta) const
00238 {
00239   if (trans == Teuchos::NO_TRANS) {
00240     updateImport(X);
00241     const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& Xref = importedVec_ == Teuchos::null ? X : *importedVec_;
00242     this->template multiply<Scalar,Scalar>(Xref, Y, trans, alpha, beta);
00243   }
00244   else if (trans == Teuchos::TRANS || trans == Teuchos::CONJ_TRANS) {
00245     updateImport(Y);
00246     MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& Yref = importedVec_ == Teuchos::null ? Y : *importedVec_;
00247     this->template multiply<Scalar,Scalar>(X, Yref, trans, alpha, beta);
00248     if (importedVec_ != Teuchos::null) {
00249       Y.doExport(*importedVec_, *importer_, ADD);
00250     }
00251   }
00252 }
00253 
00254 //-------------------------------------------------------------------
00255 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00256 void
00257 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::applyInverse(
00258          const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &Y,
00259                MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &X,
00260                Teuchos::ETransp trans) const
00261 {
00262   if (trans == Teuchos::NO_TRANS) {
00263     updateImport(Y);
00264     MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& Xref = importedVec_ == Teuchos::null ? X : *importedVec_;
00265     this->template solve<Scalar,Scalar>(Y, Xref, trans);
00266   }
00267   else if (trans == Teuchos::TRANS || trans == Teuchos::CONJ_TRANS) {
00268     updateImport(Y);
00269     const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& Yref = importedVec_ == Teuchos::null ? Y : *importedVec_;
00270     this->template solve<Scalar,Scalar>(Yref, X, trans);
00271     if (importedVec_ != Teuchos::null) {
00272       X.doExport(*importedVec_, *importer_, ADD);
00273     }
00274   }
00275 }
00276 
00277 //-------------------------------------------------------------------
00278 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00279 bool
00280 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::hasTransposeApply() const
00281 {
00282   return true;
00283 }
00284 
00285 //-------------------------------------------------------------------
00286 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00287 const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> > &
00288 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getBlockRowMap() const
00289 {
00290   return constBlkGraph_->getBlockRowMap();
00291 }
00292 
00293 //-------------------------------------------------------------------
00294 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00295 const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > &
00296 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getPointRowMap() const
00297 {
00298   return constBlkGraph_->getBlockRowMap()->getPointMap();
00299 }
00300 
00301 //-------------------------------------------------------------------
00302 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00303 const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> > &
00304 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getBlockColMap() const
00305 {
00306   return constBlkGraph_->getBlockColMap();
00307 }
00308 
00309 //-------------------------------------------------------------------
00310 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00311 const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> > &
00312 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getBlockDomainMap() const
00313 {
00314   return constBlkGraph_->getBlockDomainMap();
00315 }
00316 
00317 //-------------------------------------------------------------------
00318 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00319 const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> > &
00320 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getBlockRangeMap() const
00321 {
00322   return constBlkGraph_->getBlockRangeMap();
00323 }
00324 
00325 //-------------------------------------------------------------------
00326 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00327 const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > &
00328 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getPointColMap() const
00329 {
00330   return constBlkGraph_->getBlockColMap()->getPointMap();
00331 }
00332 
00333 //-------------------------------------------------------------------
00334 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00335 void
00336 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getGlobalBlockRowView(
00337          GlobalOrdinal globalBlockRow,
00338          LocalOrdinal& numPtRows,
00339          Teuchos::ArrayView<const GlobalOrdinal>& blockCols,
00340          Teuchos::Array<LocalOrdinal>& ptColsPerBlockCol,
00341          Teuchos::Array<Teuchos::ArrayRCP<const Scalar> >& blockEntries) const
00342 {
00343   TEUCHOS_TEST_FOR_EXCEPTION(isFillComplete(), std::runtime_error,
00344       "Tpetra::VbrMatrix::getGlobalBlockRowView internal ERROR, isFillComplete() is required to be false.");
00345 
00346   typedef typename Teuchos::ArrayView<const GlobalOrdinal>::size_type Tsize_t;
00347 
00348   LocalOrdinal localRow = getBlockRowMap()->getLocalBlockID(globalBlockRow);
00349   numPtRows = getBlockRowMap()->getLocalBlockSize(localRow);
00350   constBlkGraph_->getGlobalBlockRowView(globalBlockRow, blockCols);
00351   ptColsPerBlockCol.resize(blockCols.size());
00352   blockEntries.resize(blockCols.size());
00353   for(Tsize_t i=0; i<blockCols.size(); ++i) {
00354     getGlobalBlockEntryView(globalBlockRow, blockCols[i],
00355                            numPtRows, ptColsPerBlockCol[i], blockEntries[i]);
00356   }
00357 }
00358 
00359 //-------------------------------------------------------------------
00360 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00361 void
00362 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getLocalBlockRowView(
00363          LocalOrdinal localBlockRow,
00364          LocalOrdinal& numPtRows,
00365          Teuchos::ArrayView<const LocalOrdinal>& blockCols,
00366          Teuchos::Array<LocalOrdinal>& ptColsPerBlockCol,
00367          Teuchos::ArrayRCP<const Scalar>& blockEntries) const
00368 {
00369   TEUCHOS_TEST_FOR_EXCEPTION(!isFillComplete(), std::runtime_error,
00370       "Tpetra::VbrMatrix::getGlobalBlockRowView internal ERROR, isFillComplete() is required to be true.");
00371 
00372   typedef typename Teuchos::ArrayView<const GlobalOrdinal>::size_type Tsize_t;
00373   typedef Teuchos::ArrayRCP<const size_t>       Host_View;
00374   typedef Teuchos::ArrayRCP<const LocalOrdinal> Host_View_LO;
00375 
00376   numPtRows = getBlockRowMap()->getLocalBlockSize(localBlockRow);
00377   constBlkGraph_->getLocalBlockRowView(localBlockRow, blockCols);
00378   ptColsPerBlockCol.resize(blockCols.size());
00379   LocalOrdinal num_scalars_in_block_row = 0;
00380   for(Tsize_t i=0; i<blockCols.size(); ++i) {
00381     ptColsPerBlockCol[i] = getBlockColMap()->getLocalBlockSize(blockCols[i]);
00382     num_scalars_in_block_row += numPtRows*ptColsPerBlockCol[i];
00383   }
00384 
00385   Teuchos::RCP<Node> node = getNode();
00386   Host_View bpr = constBlkGraph_->getNodeRowOffsets();
00387   const size_t bindx_offset = bpr[localBlockRow];
00388   Host_View_LO indx = node->template viewBuffer<LocalOrdinal>(1, pbuf_indx_+bindx_offset);
00389   const LocalOrdinal offset = indx[0];
00390   blockEntries = node->template viewBuffer<Scalar>(num_scalars_in_block_row, pbuf_values1D_+offset);
00391 }
00392 
00393 //-------------------------------------------------------------------
00394 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00395 void
00396 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getGlobalBlockEntryViewNonConst(
00397     GlobalOrdinal globalBlockRow,
00398     GlobalOrdinal globalBlockCol,
00399     LocalOrdinal& numPtRows,
00400     LocalOrdinal& numPtCols,
00401     Teuchos::ArrayRCP<Scalar>& blockEntry)
00402 {
00403   //Return a non-const, read-write view of a block-entry (as an ArrayRCP),
00404   //creating/allocating the block-entry if it doesn't already exist, (but only
00405   //if fillComplete hasn't been called yet, and if the arguments numPtRows
00406   //and numPtCols have been set on input).
00407 
00408   LocalOrdinal localBlockRow = getBlockRowMap()->getLocalBlockID(globalBlockRow);
00409 
00410   if (localBlockRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid()) {
00411     VbrUtils::getGlobalBlockEntryViewNonConst(nonlocal_data_,
00412                    globalBlockRow, globalBlockCol,
00413                    numPtRows, numPtCols, blockEntry);
00414     return;
00415   }
00416 
00417   if (is_storage_optimized_) {
00418     TEUCHOS_TEST_FOR_EXCEPTION(!isFillComplete(), std::runtime_error,
00419       "Tpetra::VbrMatrix::getGlobalBlockEntryViewNonConst internal ERROR, storage is optimized but isFillComplete() is false.");
00420 
00421     LocalOrdinal localBlockCol = getBlockColMap()->getLocalBlockID(globalBlockCol);
00422 
00423     getLocalBlockEntryViewNonConst(localBlockRow, localBlockCol,
00424                                    numPtRows, numPtCols, blockEntry);
00425     return;
00426   }
00427 
00428   //If we get to here, fillComplete hasn't been called yet, and the matrix data
00429   //is stored in un-packed '2D' form.
00430 
00431   if (data_2D_->size() == 0) {
00432     LocalOrdinal numBlockRows = constBlkGraph_->getNodeNumBlockRows();
00433     data_2D_->resize(numBlockRows);
00434   }
00435 
00436   RowGlobalCols& blkrow = (*data_2D_)[localBlockRow];
00437 
00438   typename RowGlobalCols::iterator col_iter = blkrow.find(globalBlockCol);
00439 
00440   if (col_iter != blkrow.end()) {
00441     blockEntry = col_iter->second;
00442   }
00443   else {
00444     //blockEntry doesn't already exist, so we will create it.
00445 
00446     //make sure block-size is specified:
00447     TEUCHOS_TEST_FOR_EXCEPTION(numPtRows==0 || numPtCols==0, std::runtime_error,
00448       "Tpetra::VbrMatrix::getGlobalBlockEntryViewNonConst ERROR: creating block-entry, but numPtRows and/or numPtCols is 0.");
00449 
00450     Teuchos::RCP<Node> node = getNode();
00451     size_t blockSize = numPtRows*numPtCols;
00452     blockEntry = Teuchos::arcp(new Scalar[blockSize], 0, blockSize);
00453     std::fill(blockEntry.begin(), blockEntry.end(), 0);
00454     blkrow.insert(std::make_pair(globalBlockCol, blockEntry));
00455     blkGraph_->insertGlobalIndices(globalBlockRow, Teuchos::ArrayView<GlobalOrdinal>(&globalBlockCol, 1));
00456   }
00457 }
00458 
00459 //-------------------------------------------------------------------
00460 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00461 void
00462 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getGlobalBlockEntryView(
00463       GlobalOrdinal globalBlockRow,
00464       GlobalOrdinal globalBlockCol,
00465       LocalOrdinal& numPtRows,
00466       LocalOrdinal& numPtCols,
00467       Teuchos::ArrayRCP<const Scalar>& blockEntry) const
00468 {
00469   //This method returns a const-view of a block-entry (as an ArrayRCP).
00470   //Throws an exception if the block-entry doesn't already exist.
00471 
00472   if (is_storage_optimized_) {
00473     TEUCHOS_TEST_FOR_EXCEPTION(!isFillComplete(), std::runtime_error,
00474       "Tpetra::VbrMatrix::getGlobalBlockEntryView internal ERROR, storage is optimized but isFillComplete() is false.");
00475 
00476     LocalOrdinal localBlockRow = getBlockRowMap()->getLocalBlockID(globalBlockRow);
00477     LocalOrdinal localBlockCol = getBlockColMap()->getLocalBlockID(globalBlockCol);
00478     getLocalBlockEntryView(localBlockRow, localBlockCol, numPtRows, numPtCols, blockEntry);
00479     return;
00480   }
00481 
00482   TEUCHOS_TEST_FOR_EXCEPTION(data_2D_->size() == 0, std::runtime_error,
00483     "Tpetra::VbrMatrix::getGlobalBlockEntryView ERROR, matrix storage not yet allocated, can't return a const view.");
00484 
00485   //this acts as a range-check for globalBlockRow:
00486   LocalOrdinal localBlockRow = getBlockRowMap()->getLocalBlockID(globalBlockRow);
00487   TEUCHOS_TEST_FOR_EXCEPTION( localBlockRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid(),
00488      std::runtime_error,
00489      "Tpetra::VbrMatrix::getGlobalBlockEntryView, globalBlockRow not on the local processor.");
00490 
00491   RowGlobalCols& blkrow = (*data_2D_)[localBlockRow];
00492   typename RowGlobalCols::iterator col_iter = blkrow.find(globalBlockCol);
00493 
00494   if (col_iter == blkrow.end()) {
00495     throw std::runtime_error("Tpetra::VbrMatrix::getGlobalBlockEntryView ERROR, specified block-entry doesn't exist.");
00496   }
00497 
00498   numPtRows = getBlockRowMap()->getLocalBlockSize(localBlockRow);
00499 
00500   TEUCHOS_TEST_FOR_EXCEPTION(numPtRows == 0, std::runtime_error,
00501     "Tpetra::VbrMatrix::getGlobalBlockEntryView ERROR, numPtRows == 0.");
00502 
00503   blockEntry = col_iter->second;
00504   numPtCols = blockEntry.size() / numPtRows;
00505 }
00506 
00507 //-------------------------------------------------------------------
00508 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00509 void
00510 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getLocalBlockEntryViewNonConst(
00511     LocalOrdinal localBlockRow,
00512     LocalOrdinal localBlockCol,
00513     LocalOrdinal& numPtRows,                     
00514     LocalOrdinal& numPtCols,
00515     Teuchos::ArrayRCP<Scalar>& blockEntry)
00516 {
00517   typedef Teuchos::ArrayRCP<const LocalOrdinal> Host_View_LO;
00518   typedef Teuchos::ArrayRCP<const size_t> Host_View;
00519   typedef typename Host_View_LO::iterator ITER;
00520   //This method returns a non-constant view of a block-entry (as an ArrayRCP).
00521 
00522   TEUCHOS_TEST_FOR_EXCEPTION(isFillComplete() == false, std::runtime_error,
00523    "Tpetra::VbrMatrix::getLocalBlockEntryViewNonConst ERROR, this method can only be called after fillComplete() has been called.");
00524 
00525   TEUCHOS_TEST_FOR_EXCEPTION(is_storage_optimized_ == false, std::runtime_error,
00526    "Tpetra::VbrMatrix::getLocalBlockEntryViewNonConst ERROR, this method can only be called if storage is optimized.");
00527 
00528   Teuchos::RCP<Node> node = getNode();
00529 
00530   Host_View bptr = constBlkGraph_->getNodeRowOffsets();
00531   LocalOrdinal bindx_offset = bptr[localBlockRow];
00532   LocalOrdinal length = bptr[localBlockRow+1] - bindx_offset;
00533 
00534   TEUCHOS_TEST_FOR_EXCEPTION( length < 1, std::runtime_error,
00535     "Tpetra::VbrMatrix::getLocalBlockEntryViewNonConst ERROR, specified localBlockCol not found in localBlockRow.");
00536 
00537   Host_View_LO bindx = constBlkGraph_->getNodePackedIndices();
00538   ITER bindx_beg = bindx.begin() + bindx_offset,  
00539        bindx_end = bindx_beg + length;
00540   ITER it = std::lower_bound(bindx_beg, bindx_end, localBlockCol);
00541 
00542   TEUCHOS_TEST_FOR_EXCEPTION(it == bindx_end || *it != localBlockCol, std::runtime_error,
00543     "Tpetra::VbrMatrix::getLocalBlockEntryViewNonConst ERROR, specified localBlockCol not found.");
00544 
00545   numPtRows = getBlockRowMap()->getLocalBlockSize(localBlockRow);
00546   numPtCols = getBlockColMap()->getLocalBlockSize(localBlockCol);
00547 
00548   const LocalOrdinal blkSize = numPtRows*numPtCols;
00549   Host_View_LO indx = node->template viewBuffer<LocalOrdinal>(1,pbuf_indx_+bptr[localBlockRow]+(it-bindx_beg));
00550   const LocalOrdinal offset = indx[0];
00551   blockEntry = node->template viewBufferNonConst<Scalar>(Kokkos::ReadWrite, blkSize, pbuf_values1D_ + offset);
00552 }
00553 
00554 //-------------------------------------------------------------------
00555 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00556 void
00557 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getLocalDiagCopy(
00558   Tpetra::Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& diag) const
00559 {
00560   const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> >& rowmap = getBlockRowMap();
00561   TEUCHOS_TEST_FOR_EXCEPTION(diag.getMap()->isSameAs(*(rowmap->getPointMap())) != true,
00562     std::runtime_error, "Tpetra::VbrMatrix::getLocalDiagCopy ERROR, vector must be distributed the same as this matrix' row-map.");
00563 
00564   Teuchos::ArrayRCP<Scalar> diag_view = diag.get1dViewNonConst();
00565   Teuchos::ArrayView<const GlobalOrdinal> blockIDs = rowmap->getNodeBlockIDs();
00566   size_t offset = 0;
00567   typedef typename Teuchos::ArrayView<const GlobalOrdinal>::size_type Tsize_t;
00568   for(Tsize_t i=0; i<blockIDs.size(); ++i) {
00569     LocalOrdinal localBlockID = rowmap->getLocalBlockID(blockIDs[i]);
00570     LocalOrdinal blockSize = rowmap->getLocalBlockSize(localBlockID);
00571     Teuchos::ArrayRCP<const Scalar> blockEntry;
00572     getGlobalBlockEntryView(blockIDs[i], blockIDs[i], blockSize, blockSize, blockEntry);
00573 
00574     for(LocalOrdinal j=0; j<blockSize; ++j) {
00575       diag_view[offset++] = blockEntry[j*blockSize+j];
00576     }
00577   }
00578 }
00579 
00580 //-------------------------------------------------------------------
00581 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00582 bool
00583 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::checkSizes(const DistObject<char, LocalOrdinal, GlobalOrdinal, Node>& source)
00584 {
00585   bool ok = this->getMap()->getMinAllGlobalIndex() <= source.getMap()->getMinAllGlobalIndex();
00586   ok = ok && this->getMap()->getMaxAllGlobalIndex() >= source.getMap()->getMaxAllGlobalIndex();
00587   return ok;
00588 }
00589 
00590 //-------------------------------------------------------------------
00591 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00592 void
00593 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::copyAndPermute(
00594    const DistObject<char, LocalOrdinal, GlobalOrdinal, Node>& source,
00595    size_t numSameIDs,
00596    const Teuchos::ArrayView<const LocalOrdinal>& permuteToLIDs,
00597    const Teuchos::ArrayView<const LocalOrdinal>& permuteFromLIDs)
00598 {
00599   typedef typename Teuchos::ArrayView<const LocalOrdinal>::size_type Tsize_t;
00600 
00601   const VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>* src_mat_ptr = dynamic_cast<const VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>*>(&source);
00602   if (src_mat_ptr == NULL) {
00603     throw std::runtime_error("VbrMatrix::copyAndPermute ERROR, dynamic_cast failed.");
00604   }
00605   const VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>& src_mat = *src_mat_ptr;
00606 
00607   Teuchos::Array<LocalOrdinal> src_ptColsPerBlkCol;
00608   LocalOrdinal numPtRows;
00609 
00610   LocalOrdinal numSame = numSameIDs;
00611   for(LocalOrdinal i=0; i<numSame; ++i) {
00612     if (isFillComplete()) {
00613       Teuchos::ArrayRCP<const Scalar> src_blkEntries;
00614       Teuchos::ArrayView<const LocalOrdinal> src_blkCols;
00615 
00616       src_mat.getLocalBlockRowView(i, numPtRows, src_blkCols, src_ptColsPerBlkCol, src_blkEntries);
00617       unsigned offset = 0;
00618       for(Tsize_t j=0; j<src_blkCols.size(); ++j) {
00619         LocalOrdinal numPtCols = src_ptColsPerBlkCol[j];
00620         setLocalBlockEntry(i, src_blkCols[j], numPtRows, numPtCols, numPtCols,
00621                            src_blkEntries.view(offset, numPtRows*numPtCols));
00622         offset += numPtRows*numPtCols;
00623       }
00624     }
00625     else {
00626       GlobalOrdinal gRow = getBlockRowMap()->getGlobalBlockID(i);
00627       Teuchos::Array<Teuchos::ArrayRCP<const Scalar> > src_blkEntries;
00628       Teuchos::ArrayView<const GlobalOrdinal> src_blkCols;
00629 
00630       src_mat.getGlobalBlockRowView(gRow, numPtRows, src_blkCols, src_ptColsPerBlkCol, src_blkEntries);
00631       for(Tsize_t j=0; j<src_blkCols.size(); ++j) {
00632         LocalOrdinal numPtCols = src_ptColsPerBlkCol[j];
00633         setGlobalBlockEntry(gRow, src_blkCols[j], numPtRows, numPtCols, numPtRows, src_blkEntries[j].view(0, numPtRows*numPtCols));
00634       }
00635     }
00636   }
00637 
00638   for(Tsize_t i=0; i<permuteToLIDs.size(); ++i) {
00639 
00640     if (isFillComplete()) {
00641       Teuchos::ArrayRCP<const Scalar> src_blkEntries;
00642       Teuchos::ArrayView<const LocalOrdinal> src_blkCols;
00643 
00644       src_mat.getLocalBlockRowView(permuteFromLIDs[i], numPtRows, src_blkCols, src_ptColsPerBlkCol, src_blkEntries);
00645       unsigned offset = 0;
00646       for(Tsize_t j=0; j<src_blkCols.size(); ++j) {
00647         LocalOrdinal numPtCols = src_ptColsPerBlkCol[j];
00648         setLocalBlockEntry(permuteToLIDs[i], src_blkCols[j], numPtRows, numPtCols, numPtCols,
00649                            src_blkEntries.view(offset, numPtRows*numPtCols));
00650         offset += numPtRows*numPtCols;
00651       }
00652     }
00653     else {
00654       GlobalOrdinal gRow = getBlockRowMap()->getGlobalBlockID(permuteToLIDs[i]);
00655       Teuchos::Array<Teuchos::ArrayRCP<const Scalar> > src_blkEntries;
00656       Teuchos::ArrayView<const GlobalOrdinal> src_blkCols;
00657 
00658       src_mat.getGlobalBlockRowView(gRow, numPtRows, src_blkCols, src_ptColsPerBlkCol, src_blkEntries);
00659       for(Tsize_t j=0; j<src_blkCols.size(); ++j) {
00660         LocalOrdinal numPtCols = src_ptColsPerBlkCol[j];
00661         setGlobalBlockEntry(gRow, src_blkCols[j], numPtRows, numPtCols, numPtRows, src_blkEntries[j].view(0, numPtRows*numPtCols));
00662       }
00663     }
00664   }
00665 }
00666 
00667 //-------------------------------------------------------------------
00668 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00669 void
00670 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::packAndPrepare(
00671    const DistObject<char, LocalOrdinal, GlobalOrdinal, Node>& source,
00672    const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs,
00673    Teuchos::Array<char>& exports,
00674    const Teuchos::ArrayView<size_t>& numPacketsPerLID,
00675    size_t& constantNumPackets,
00676    Distributor& distor)
00677 {
00678   const VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>* src_mat_ptr = dynamic_cast<const VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>*>(&source);
00679   if (src_mat_ptr == NULL) {
00680     typedef VbrUtils::VbrDataDist<LocalOrdinal,GlobalOrdinal,Scalar,Node> VDD;
00681     VDD* vdd = const_cast<VDD*>(dynamic_cast<const VDD*>(&source));
00682     if (vdd != NULL) {
00683       vdd->packAndPrepare(source, exportLIDs, exports, numPacketsPerLID, constantNumPackets, distor);
00684     }
00685     else {
00686       throw std::runtime_error("VbrMatrix::packAndPrepare ERROR, dynamic_cast failed.");
00687     }
00688     return;
00689   }
00690 
00691   const VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>& src_mat = *src_mat_ptr;
00692 
00693   typedef typename Teuchos::ArrayView<const LocalOrdinal>::size_type Tsize_t;
00694 
00695   //We will pack each row's data into the exports buffer as follows:
00696   //[num-block-cols,numPtRows,{list-of-blk-cols},{list-of-ptColsPerBlkCol},{all vals}]
00697   //so the length of the char exports buffer for a row is:
00698   //sizeof(LocalOrdinal)*(2+2*(num-block-cols)) + sizeof(Scalar)*numPtRows*sum(numPtCols_i)
00699 
00700   //For each row corresponding to exportLIDs, accumulate the size that it will
00701   //occupy in the exports buffer:
00702   size_t total_exports_size = 0;
00703   for(Tsize_t i=0; i<exportLIDs.size(); ++i) {
00704     Teuchos::Array<LocalOrdinal> src_ptColsPerBlkCol;
00705     LocalOrdinal numPtRows;
00706 
00707     LocalOrdinal numBlkCols = 0;
00708     LocalOrdinal numScalars = 0;
00709 
00710     if (src_mat.isFillComplete()) {
00711       Teuchos::ArrayRCP<const Scalar> src_blkEntries;
00712       Teuchos::ArrayView<const LocalOrdinal> src_blkCols;
00713       src_mat.getLocalBlockRowView(exportLIDs[i], numPtRows,
00714                             src_blkCols, src_ptColsPerBlkCol, src_blkEntries);
00715       numBlkCols = src_blkCols.size();
00716       numScalars = src_blkEntries.size();
00717     }
00718     else {//src_mat.isFillComplete()==false
00719       Teuchos::Array<Teuchos::ArrayRCP<const Scalar> > src_blkEntries;
00720       GlobalOrdinal gRow = src_mat.getBlockRowMap()->getGlobalBlockID(exportLIDs[i]);
00721       Teuchos::ArrayView<const GlobalOrdinal> src_blkCols;
00722       src_mat.getGlobalBlockRowView(gRow, numPtRows, src_blkCols, src_ptColsPerBlkCol, src_blkEntries);
00723       numBlkCols = src_blkCols.size();
00724       for(Tsize_t j=0; j<src_blkEntries.size(); ++j) {
00725         numScalars += src_blkEntries[j].size();
00726       }
00727     }
00728 
00729     size_t size_for_this_row = sizeof(GlobalOrdinal)*(2+2*numBlkCols)
00730                          + sizeof(Scalar)*numScalars;
00731     numPacketsPerLID[i] = size_for_this_row;
00732     total_exports_size += size_for_this_row;
00733   }
00734 
00735   exports.resize(total_exports_size);
00736 
00737   ArrayView<char> avIndsC, avValsC;
00738   ArrayView<Scalar>        avVals;
00739 
00740   size_t offset = 0;
00741 
00742   for(Tsize_t i=0; i<exportLIDs.size(); ++i) {
00743     Teuchos::Array<LocalOrdinal> src_ptColsPerBlkCol;
00744     LocalOrdinal numPtRows;
00745 
00746     LocalOrdinal numBlkCols = 0;
00747     LocalOrdinal numScalars = 0;
00748 
00749     if (src_mat.isFillComplete()) {
00750       Teuchos::ArrayRCP<const Scalar> src_blkEntries;
00751       Teuchos::ArrayView<const LocalOrdinal> src_blkCols;
00752       src_mat.getLocalBlockRowView(exportLIDs[i], numPtRows, src_blkCols, src_ptColsPerBlkCol, src_blkEntries);
00753       numBlkCols = src_blkCols.size();
00754       numScalars = src_blkEntries.size();
00755 
00756       LocalOrdinal num_chars_for_ordinals = (2*numBlkCols+2)*sizeof(LocalOrdinal);
00757       //get export views
00758       avIndsC = exports(offset, num_chars_for_ordinals);
00759       avValsC = exports(offset+ num_chars_for_ordinals, numScalars*sizeof(Scalar));
00760       ArrayView<GlobalOrdinal> avInds = av_reinterpret_cast<GlobalOrdinal>(avIndsC);
00761       typename ArrayView<GlobalOrdinal>::iterator ind_it = avInds.begin();
00762 
00763       const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> >& col_map = src_mat.getBlockColMap();
00764 
00765       //put row info into the buffer views:
00766       *ind_it++ = numBlkCols;
00767       *ind_it++ = numPtRows;
00768       for(Tsize_t j=0; j<src_blkCols.size(); ++j) {
00769         *ind_it++ = col_map->getGlobalBlockID(src_blkCols[j]);
00770       }
00771       std::copy(src_ptColsPerBlkCol.begin(), src_ptColsPerBlkCol.end(), ind_it);
00772       avVals = av_reinterpret_cast<Scalar>(avValsC);
00773       std::copy(src_blkEntries.begin(), src_blkEntries.end(), avVals.begin());
00774     }
00775     else {//src_mat.isFillComplete()==false
00776       Teuchos::Array<Teuchos::ArrayRCP<const Scalar> > src_blkEntries;
00777       GlobalOrdinal gRow = src_mat.getBlockRowMap()->getGlobalBlockID(exportLIDs[i]);
00778       Teuchos::ArrayView<const GlobalOrdinal> src_blkCols;
00779       src_mat.getGlobalBlockRowView(gRow, numPtRows, src_blkCols, src_ptColsPerBlkCol, src_blkEntries);
00780       numBlkCols = src_blkCols.size();
00781       numScalars = 0;
00782       for(Tsize_t j=0; j<src_blkEntries.size(); ++j) {
00783         numScalars += numPtRows*src_ptColsPerBlkCol[j];
00784       }
00785 
00786       LocalOrdinal num_chars_for_ordinals = (2*numBlkCols+2)*sizeof(GlobalOrdinal);
00787       //get export views
00788       avIndsC = exports(offset, num_chars_for_ordinals);
00789       avValsC = exports(offset+ num_chars_for_ordinals, numScalars*sizeof(Scalar));
00790       ArrayView<GlobalOrdinal> avInds = av_reinterpret_cast<GlobalOrdinal>(avIndsC);
00791       typename ArrayView<GlobalOrdinal>::iterator ind_it = avInds.begin();
00792 
00793       //put row info into the buffer views:
00794       *ind_it++ = numBlkCols;
00795       *ind_it++ = numPtRows;
00796       std::copy(src_blkCols.begin(), src_blkCols.end(), ind_it);
00797       ind_it += src_blkCols.size();
00798       std::copy(src_ptColsPerBlkCol.begin(), src_ptColsPerBlkCol.end(), ind_it);
00799       avVals = av_reinterpret_cast<Scalar>(avValsC);
00800       typename ArrayView<Scalar>::iterator val_it = avVals.begin();
00801       for(Tsize_t j=0; j<src_blkEntries.size(); ++j) {
00802         std::copy(src_blkEntries[j].begin(), src_blkEntries[j].end(), val_it);
00803         val_it += src_blkEntries[j].size();
00804       }
00805     }
00806 
00807     size_t size_for_this_row = sizeof(GlobalOrdinal)*(2+2*numBlkCols)
00808                          + sizeof(Scalar)*numScalars;
00809     offset += size_for_this_row;
00810   }
00811 
00812   constantNumPackets = 0;
00813 }
00814 
00815 //-------------------------------------------------------------------
00816 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00817 void
00818 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::unpackAndCombine(
00819     const Teuchos::ArrayView<const LocalOrdinal>& importLIDs,
00820     const Teuchos::ArrayView<const char>& imports,
00821     const Teuchos::ArrayView<size_t>& numPacketsPerLID,
00822     size_t constantNumPackets,
00823     Distributor& distor,
00824     CombineMode CM)
00825 {
00826   typedef typename Teuchos::ArrayView<const LocalOrdinal>::size_type Tsize_t;
00827  
00828   if (CM == Tpetra::ABSMAX) {
00829     std::cout << "Warning, VbrMatrix Import/Export doesn't support combine-mode==ABSMAX; use ADD, INSERT or REPLACE. (REPLACE is being used now.)"<<std::endl;
00830   }
00831 
00832   size_t offset = 0;
00833   for(Tsize_t i=0; i<importLIDs.size(); ++i) {
00834     ArrayView<const char> avC = imports.view(offset, numPacketsPerLID[i]);
00835     ArrayView<const GlobalOrdinal> avOrds = av_reinterpret_cast<const GlobalOrdinal>(avC);
00836     GlobalOrdinal gRow = this->getBlockRowMap()->getGlobalBlockID(importLIDs[i]);
00837     size_t avOrdsOffset = 0;
00838     LocalOrdinal numBlkCols = avOrds[avOrdsOffset++];
00839     LocalOrdinal numPtRows = avOrds[avOrdsOffset++];
00840     LocalOrdinal num_chars_for_ordinals = (2*numBlkCols+2)*sizeof(GlobalOrdinal);
00841     ArrayView<const char> avValsC = imports.view(offset+num_chars_for_ordinals, numPacketsPerLID[i]-num_chars_for_ordinals);
00842     ArrayView<const Scalar> avVals = av_reinterpret_cast<const Scalar>(avValsC);
00843 
00844     size_t avValsOffset = 0;
00845     for(Tsize_t j=0; j<numBlkCols; ++j) {
00846       GlobalOrdinal blkCol = avOrds[avOrdsOffset+j];
00847       LocalOrdinal numPtCols = avOrds[avOrdsOffset+numBlkCols+j];
00848       LocalOrdinal LDA = numPtRows;
00849       if (CM == Tpetra::ADD) {
00850         sumIntoGlobalBlockEntry(gRow, blkCol, numPtRows, numPtCols, LDA,
00851                          avVals.view(avValsOffset, numPtRows*numPtCols));
00852       }
00853       else if (CM == Tpetra::INSERT || CM == Tpetra::REPLACE) {
00854         setGlobalBlockEntry(gRow, blkCol, numPtRows, numPtCols, LDA,
00855                            avVals.view(avValsOffset, numPtRows*numPtCols));
00856       }
00857       else {
00858 //    std::cout << "Warning, VbrMatrix Import/Export doesn't support combine-mode==ABSMAX; use ADD, INSERT or REPLACE. (REPLACE is being used now.)"<<std::endl;
00859         setGlobalBlockEntry(gRow, blkCol, numPtRows, numPtCols, LDA,
00860                            avVals.view(avValsOffset, numPtRows*numPtCols));
00861       }
00862       avValsOffset += numPtRows*numPtCols;
00863     }
00864   }
00865 }
00866 
00867 //-------------------------------------------------------------------
00868 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00869 void
00870 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getLocalBlockEntryView(
00871       LocalOrdinal localBlockRow,
00872       LocalOrdinal localBlockCol,
00873       LocalOrdinal& numPtRows,
00874       LocalOrdinal& numPtCols,
00875       Teuchos::ArrayRCP<const Scalar>& blockEntry) const
00876 {
00877   typedef Teuchos::ArrayRCP<const size_t>       Host_View;
00878   typedef Teuchos::ArrayRCP<const LocalOrdinal> Host_View_LO;
00879   typedef typename Host_View_LO::iterator ITER;
00880   //This method returns a constant view of a block-entry (as an ArrayRCP).
00881 
00882   TEUCHOS_TEST_FOR_EXCEPTION(isFillComplete() == false, std::runtime_error,
00883    "Tpetra::VbrMatrix::getLocalBlockEntryView ERROR, this method can only be called after fillComplete() has been called.");
00884 
00885   TEUCHOS_TEST_FOR_EXCEPTION(is_storage_optimized_ == false, std::runtime_error,
00886    "Tpetra::VbrMatrix::getLocalBlockEntryView ERROR, this method can only be called if storage is optimized.");
00887 
00888   Teuchos::RCP<Node> node = getNode();
00889 
00890   Host_View bpr = constBlkGraph_->getNodeRowOffsets();
00891   const size_t bindx_offset = bpr[localBlockRow];
00892   const size_t length       = bpr[localBlockRow+1] - bindx_offset;
00893 
00894   Host_View_LO bindx = constBlkGraph_->getNodePackedIndices();
00895   ITER bindx_beg = bindx.begin()+bindx_offset, 
00896        bindx_end = bindx_beg + length;
00897   ITER it = std::lower_bound(bindx_beg, bindx_end, localBlockCol);
00898 
00899   TEUCHOS_TEST_FOR_EXCEPTION(it == bindx_end || *it != localBlockCol, std::runtime_error,
00900     "Tpetra::VbrMatrix::getLocalBlockEntryView ERROR, specified localBlockCol not found.");
00901 
00902   numPtRows = getBlockRowMap()->getLocalBlockSize(localBlockRow);
00903   numPtCols = getBlockColMap()->getLocalBlockSize(localBlockCol);
00904 
00905   const LocalOrdinal blkSize = numPtRows*numPtCols;
00906   Host_View_LO indx = node->template viewBuffer<LocalOrdinal>(1, pbuf_indx_+bpr[localBlockRow]+(it-bindx_beg));
00907   const LocalOrdinal offset = indx[0];
00908   blockEntry = node->template viewBuffer<Scalar>(blkSize, pbuf_values1D_ + offset);
00909 }
00910 
00911 //-------------------------------------------------------------------
00912 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00913 Teuchos::RCP<Node>
00914 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getNode() const
00915 {
00916   return getBlockRowMap()->getPointMap()->getNode();
00917 }
00918 
00919 //-------------------------------------------------------------------
00920 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00921 void
00922 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::putScalar(Scalar s)
00923 {
00924   Teuchos::RCP<Node> node = getNode();
00925 
00926   if (isFillComplete()) {
00927     fill_device_ArrayRCP(node, pbuf_values1D_, s);
00928   }
00929   else {
00930     typedef typename Teuchos::Array<RowGlobalCols>::size_type Tsize_t;
00931     Teuchos::Array<RowGlobalCols>& rows = *data_2D_;
00932     Tsize_t numBlockRows = rows.size();
00933     for(Tsize_t r=0; r<numBlockRows; ++r) {
00934       typename RowGlobalCols::iterator
00935         col_iter = rows[r].begin(),
00936         col_end  = rows[r].end();
00937       for(; col_iter != col_end; ++col_iter) {
00938         Teuchos::ArrayRCP<Scalar>& vals = col_iter->second;
00939         std::fill(vals.begin(), vals.end(), s);
00940       }
00941     }
00942   }
00943 }
00944 
00945 //-------------------------------------------------------------------
00946 template<class ArrayType1,class ArrayType2,class Ordinal>
00947 void set_array_values(ArrayType1& dest, ArrayType2& src, Ordinal rows, Ordinal cols, Ordinal stride, Tpetra::CombineMode mode)
00948 {
00949   size_t offset = 0;
00950   size_t src_offset = 0;
00951 
00952   if (mode == ADD) {
00953     for(Ordinal col=0; col<cols; ++col) {
00954       for(Ordinal row=0; row<rows; ++row) {
00955         dest[offset++] += src[src_offset + row];
00956       }
00957       src_offset += stride;
00958     }
00959   }
00960   else {
00961     for(Ordinal col=0; col<cols; ++col) {
00962       for(Ordinal row=0; row<rows; ++row) {
00963         dest[offset++] = src[src_offset + row];
00964       }
00965       src_offset += stride;
00966     }
00967   }
00968 }
00969 
00970 //-------------------------------------------------------------------
00971 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00972 void
00973 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::setGlobalBlockEntry(GlobalOrdinal globalBlockRow, GlobalOrdinal globalBlockCol, const Teuchos::SerialDenseMatrix<int,Scalar>& blockEntry)
00974 {
00975   //first get an ArrayRCP for the internal storage for this block-entry:
00976   Teuchos::ArrayRCP<Scalar> internalBlockEntry;
00977   LocalOrdinal blkRowSize = blockEntry.numRows();
00978   LocalOrdinal blkColSize = blockEntry.numCols();
00979   getGlobalBlockEntryViewNonConst(globalBlockRow,globalBlockCol, blkRowSize, blkColSize, internalBlockEntry);
00980 
00981   //now copy the incoming block-entry into internal storage:
00982   const Scalar* inputvalues = blockEntry.values();
00983   set_array_values(internalBlockEntry, inputvalues, blkRowSize, blkColSize, blockEntry.stride(), REPLACE);
00984 }
00985 
00986 //-------------------------------------------------------------------
00987 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00988 void
00989 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::setLocalBlockEntry(LocalOrdinal localBlockRow, LocalOrdinal localBlockCol, const Teuchos::SerialDenseMatrix<int,Scalar>& blockEntry)
00990 {
00991   //first get an ArrayRCP for the internal storage for this block-entry:
00992   Teuchos::ArrayRCP<Scalar> internalBlockEntry;
00993   LocalOrdinal blkRowSize = blockEntry.numRows();
00994   LocalOrdinal blkColSize = blockEntry.numCols();
00995   getLocalBlockEntryViewNonConst(localBlockRow,localBlockCol, blkRowSize, blkColSize, internalBlockEntry);
00996 
00997   //now copy the incoming block-entry into internal storage:
00998   const Scalar* inputvalues = blockEntry.values();
00999   set_array_values(internalBlockEntry, inputvalues, blkRowSize, blkColSize, blockEntry.stride(), REPLACE);
01000 }
01001 
01002 //-------------------------------------------------------------------
01003 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
01004 void
01005 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::sumIntoGlobalBlockEntry(GlobalOrdinal globalBlockRow, GlobalOrdinal globalBlockCol, const Teuchos::SerialDenseMatrix<int,Scalar>& blockEntry)
01006 {
01007   //first get an ArrayRCP for the internal storage for this block-entry:
01008   Teuchos::ArrayRCP<Scalar> internalBlockEntry;
01009   LocalOrdinal blkRowSize = blockEntry.numRows();
01010   LocalOrdinal blkColSize = blockEntry.numCols();
01011   getGlobalBlockEntryViewNonConst(globalBlockRow,globalBlockCol, blkRowSize, blkColSize, internalBlockEntry);
01012 
01013   //now sum (add) the incoming block-entry into internal storage:
01014   const Scalar* inputvalues = blockEntry.values();
01015   set_array_values(internalBlockEntry, inputvalues, blkRowSize, blkColSize, blockEntry.stride(), ADD);
01016 }
01017 
01018 //-------------------------------------------------------------------
01019 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
01020 void
01021 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::sumIntoLocalBlockEntry(LocalOrdinal localBlockRow, LocalOrdinal localBlockCol, const Teuchos::SerialDenseMatrix<int,Scalar>& blockEntry)
01022 {
01023   //first get an ArrayRCP for the internal storage for this block-entry:
01024   Teuchos::ArrayRCP<Scalar> internalBlockEntry;
01025   LocalOrdinal blkRowSize = blockEntry.numRows();
01026   LocalOrdinal blkColSize = blockEntry.numCols();
01027   getLocalBlockEntryViewNonConst(localBlockRow,localBlockCol, blkRowSize, blkColSize, internalBlockEntry);
01028 
01029   //now sum (add) the incoming block-entry into internal storage:
01030   const Scalar* inputvalues = blockEntry.values();
01031   set_array_values(internalBlockEntry, inputvalues, blkRowSize, blkColSize, blockEntry.stride(), ADD);
01032 }
01033 
01034 //-------------------------------------------------------------------
01035 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
01036 void
01037 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::setGlobalBlockEntry(GlobalOrdinal globalBlockRow, GlobalOrdinal globalBlockCol, LocalOrdinal blkRowSize, LocalOrdinal blkColSize, LocalOrdinal LDA, const Teuchos::ArrayView<const Scalar>& blockEntry)
01038 {
01039   //first get an ArrayRCP for the internal storage for this block-entry:
01040   Teuchos::ArrayRCP<Scalar> internalBlockEntry;
01041   getGlobalBlockEntryViewNonConst(globalBlockRow,globalBlockCol, blkRowSize, blkColSize, internalBlockEntry);
01042 
01043   LocalOrdinal blk_size = blockEntry.size();
01044   TEUCHOS_TEST_FOR_EXCEPTION(blkColSize*LDA > blk_size, std::runtime_error,
01045     "Tpetra::VbrMatrix::setGlobalBlockEntry ERROR, inconsistent block-entry sizes.");
01046 
01047   //copy the incoming block-entry into internal storage:
01048   set_array_values(internalBlockEntry, blockEntry, blkRowSize, blkColSize, LDA, REPLACE);
01049 }
01050 
01051 //-------------------------------------------------------------------
01052 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
01053 void
01054 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::setLocalBlockEntry(LocalOrdinal localBlockRow, LocalOrdinal localBlockCol, LocalOrdinal blkRowSize, LocalOrdinal blkColSize, LocalOrdinal LDA, const Teuchos::ArrayView<const Scalar>& blockEntry)
01055 {
01056   //first get an ArrayRCP for the internal storage for this block-entry:
01057   Teuchos::ArrayRCP<Scalar> internalBlockEntry;
01058   getLocalBlockEntryViewNonConst(localBlockRow,localBlockCol, blkRowSize, blkColSize, internalBlockEntry);
01059 
01060   LocalOrdinal blk_size = blockEntry.size();
01061   TEUCHOS_TEST_FOR_EXCEPTION(blkColSize*LDA > blk_size, std::runtime_error,
01062     "Tpetra::VbrMatrix::setLocalBlockEntry ERROR, inconsistent block-entry sizes.");
01063 
01064   //copy the incoming block-entry into internal storage:
01065   set_array_values(internalBlockEntry, blockEntry, blkRowSize, blkColSize, LDA, REPLACE);
01066 }
01067 
01068 //-------------------------------------------------------------------
01069 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
01070 void
01071 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::sumIntoGlobalBlockEntry(GlobalOrdinal globalBlockRow, GlobalOrdinal globalBlockCol, LocalOrdinal blkRowSize, LocalOrdinal blkColSize, LocalOrdinal LDA, const Teuchos::ArrayView<const Scalar>& blockEntry)
01072 {
01073   //first get an ArrayRCP for the internal storage for this block-entry:
01074   Teuchos::ArrayRCP<Scalar> internalBlockEntry;
01075   getGlobalBlockEntryViewNonConst(globalBlockRow,globalBlockCol, blkRowSize, blkColSize, internalBlockEntry);
01076 
01077   LocalOrdinal blk_size = blockEntry.size();
01078   TEUCHOS_TEST_FOR_EXCEPTION(blkColSize*LDA > blk_size, std::runtime_error,
01079     "Tpetra::VbrMatrix::setGlobalBlockEntry ERROR, inconsistent block-entry sizes.");
01080 
01081   //copy the incoming block-entry into internal storage:
01082   set_array_values(internalBlockEntry, blockEntry, blkRowSize, blkColSize, LDA, ADD);
01083 }
01084 
01085 //-------------------------------------------------------------------
01086 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
01087 void
01088 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::sumIntoLocalBlockEntry(LocalOrdinal localBlockRow, LocalOrdinal localBlockCol, LocalOrdinal blkRowSize, LocalOrdinal blkColSize, LocalOrdinal LDA, const Teuchos::ArrayView<const Scalar>& blockEntry)
01089 {
01090   //first get an ArrayRCP for the internal storage for this block-entry:
01091   Teuchos::ArrayRCP<Scalar> internalBlockEntry;
01092   getLocalBlockEntryViewNonConst(localBlockRow,localBlockCol, blkRowSize, blkColSize, internalBlockEntry);
01093 
01094   LocalOrdinal blk_size = blockEntry.size();
01095   TEUCHOS_TEST_FOR_EXCEPTION(blkColSize*LDA > blk_size, std::runtime_error,
01096     "Tpetra::VbrMatrix::setLocalBlockEntry ERROR, inconsistent block-entry sizes.");
01097 
01098   //copy the incoming block-entry into internal storage:
01099   set_array_values(internalBlockEntry, blockEntry, blkRowSize, blkColSize, LDA, ADD);
01100 }
01101 
01102 //-------------------------------------------------------------------
01103 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
01104 void
01105 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::optimizeStorage()
01106 {
01107   typedef Teuchos::ArrayRCP<const size_t> Host_View;
01108   typedef Teuchos::ArrayRCP<const LocalOrdinal> Host_View_LO;
01109   typedef typename Teuchos::ArrayRCP<size_t>::size_type Tsize_t;
01110 
01111   if (is_storage_optimized_ == true) return;
01112 
01113   TEUCHOS_TEST_FOR_EXCEPTION(constBlkGraph_->isFillComplete() != true, std::runtime_error,
01114     "Tpetra::VbrMatrix::optimizeStorage ERROR, isFillComplete() is false, required to be true before optimizeStorage is called.");
01115 
01116   size_t num_block_nonzeros = constBlkGraph_->getNodeNumBlockEntries();
01117 
01118   const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> >& rowmap = constBlkGraph_->getBlockRowMap();
01119   const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> >& colmap = constBlkGraph_->getBlockColMap();
01120 
01121   const Teuchos::RCP<Node>& node = getBlockRowMap()->getPointMap()->getNode();
01122 
01123   //need to count the number of point-entries:
01124   size_t num_point_entries = 0;
01125   Host_View bptr = constBlkGraph_->getNodeRowOffsets();
01126 
01127   if (bptr.size() == 0) {
01128     is_storage_optimized_ = true;
01129     return;
01130   }
01131 
01132   Host_View_LO bindx = constBlkGraph_->getNodePackedIndices();
01133 
01134   for(Tsize_t r=0; r<bptr.size()-1; ++r) {
01135     size_t rbeg = bptr[r];
01136     size_t rend = bptr[r+1];
01137 
01138     LocalOrdinal rsize = rowmap->getLocalBlockSize(r);
01139 
01140     for(size_t j=rbeg; j<rend; ++j) {
01141       LocalOrdinal csize = colmap->getLocalBlockSize(bindx[j]);
01142       num_point_entries += rsize*csize;
01143     }
01144   }
01145 
01146   pbuf_indx_ = node->template allocBuffer<LocalOrdinal>(num_block_nonzeros+1);
01147   pbuf_values1D_ = node->template allocBuffer<Scalar>(num_point_entries);
01148 
01149   Teuchos::ArrayRCP<LocalOrdinal> view_indx = node->template viewBufferNonConst<LocalOrdinal>(Kokkos::WriteOnly, num_block_nonzeros+1, pbuf_indx_);
01150   Teuchos::ArrayRCP<Scalar> view_values1D = node->template viewBufferNonConst<Scalar>(Kokkos::WriteOnly, num_point_entries, pbuf_values1D_);
01151 
01152   bool have_2D_data = data_2D_->size() > 0;
01153   Scalar zero = Teuchos::ScalarTraits<Scalar>::zero();
01154 
01155   size_t ioffset = 0;
01156   size_t offset = 0;
01157   for(Tsize_t r=0; r<bptr.size()-1; ++r) {
01158     LocalOrdinal rsize = rowmap->getLocalBlockSize(r);
01159 
01160     RowGlobalCols* blk_row = have_2D_data ? &((*data_2D_)[r]) : NULL;
01161 
01162     for(size_t c=bptr[r]; c<bptr[r+1]; ++c) {
01163       view_indx[ioffset++] = offset;
01164 
01165       LocalOrdinal csize = colmap->getLocalBlockSize(bindx[c]);
01166       Tsize_t blkSize = rsize*csize;
01167 
01168       if (blk_row != NULL) {
01169         GlobalOrdinal global_col = colmap->getGlobalBlockID(bindx[c]);
01170         typename RowGlobalCols::iterator iter = blk_row->find(global_col);
01171         TEUCHOS_TEST_FOR_EXCEPTION(iter == blk_row->end(), std::runtime_error,
01172           "Tpetra::VbrMatrix::optimizeStorage ERROR, global_col not found in row.");
01173   
01174         Teuchos::ArrayRCP<Scalar> vals = iter->second;
01175         for(Tsize_t n=0; n<blkSize; ++n) {
01176           view_values1D[offset+n] = vals[n];
01177         }
01178       }
01179       else {
01180         for(Tsize_t n=0; n<blkSize; ++n) view_values1D[offset+n] = zero;
01181       }
01182       offset += blkSize;
01183     }
01184   }
01185   view_indx[ioffset] = offset;
01186 
01187   //Final step: release memory for the "2D" storage:
01188   data_2D_ = Teuchos::null;
01189 
01190   is_storage_optimized_ = true;
01191 }
01192 
01193 //-------------------------------------------------------------------
01194 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
01195 void VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::fillLocalMatrix()
01196 {
01197   //We insist that optimzeStorage has already been called.
01198   //We don't care whether this function (fillLocalMatrix()) is being
01199   //called for the first time or not.
01200   TEUCHOS_TEST_FOR_EXCEPTION(is_storage_optimized_ != true, std::runtime_error,
01201     "Tpetra::VbrMatrix::fillLocalMatrix ERROR, optimizeStorage is required to have already been called.");
01202 
01203   Teuchos::ArrayRCP<const size_t      > nodeRowOffsets = constBlkGraph_->getNodeRowOffsets();
01204   Teuchos::ArrayRCP<const LocalOrdinal> nodePackedInds = constBlkGraph_->getNodePackedIndices();
01205   if (Node::isHostNode == false) {
01206     RCP<Node> node = getRangeMap()->getNode();
01207     //
01208     Teuchos::ArrayRCP<size_t> dev_nodeRowOffsets = node->template allocBuffer<size_t>(nodeRowOffsets.size());
01209     node->copyToBuffer(nodeRowOffsets.size(), nodeRowOffsets(), dev_nodeRowOffsets);
01210     nodeRowOffsets = dev_nodeRowOffsets;
01211     //
01212     Teuchos::ArrayRCP<LocalOrdinal> dev_nodePackedInds = node->template allocBuffer<LocalOrdinal>(nodePackedInds.size());
01213     node->copyToBuffer(nodePackedInds.size(), nodePackedInds(), dev_nodePackedInds);
01214     nodePackedInds = dev_nodePackedInds;
01215   }
01216   lclMatrix_.setPackedValues(pbuf_values1D_,
01217                              getBlockRowMap()->getNodeFirstPointInBlocks_Device(),
01218                              getBlockColMap()->getNodeFirstPointInBlocks_Device(),
01219                              nodeRowOffsets,
01220                              nodePackedInds,
01221                              pbuf_indx_);
01222 }
01223 
01224 //-------------------------------------------------------------------
01225 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
01226 void VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::fillLocalMatVec()
01227 {
01228   //We insist that optimzeStorage has already been called.
01229   //We don't care whether this function (fillLocalMatVec()) is being
01230   //called for the first time or not.
01231   TEUCHOS_TEST_FOR_EXCEPTION(is_storage_optimized_ != true, std::runtime_error,
01232     "Tpetra::VbrMatrix::fillLocalMatrix ERROR, optimizeStorage is required to have already been called.");
01233 
01234   lclMatOps_.initializeValues(lclMatrix_);
01235 }
01236 
01237 //-------------------------------------------------------------------
01238 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
01239 void
01240 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::createImporterExporter()
01241 {
01242   typedef typename Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > PtMap;
01243 
01244   const PtMap& ptDomMap = (constBlkGraph_->getBlockDomainMap())->getPointMap();
01245   const PtMap& ptRngMap = (constBlkGraph_->getBlockRangeMap())->getPointMap();
01246   const PtMap& ptRowMap = (constBlkGraph_->getBlockRowMap())->getPointMap();
01247   const PtMap& ptColMap = (constBlkGraph_->getBlockColMap())->getPointMap();
01248 
01249   if (!ptDomMap->isSameAs(*ptColMap)) {
01250     importer_ = Teuchos::rcp(new Tpetra::Import<LocalOrdinal,GlobalOrdinal,Node>(ptDomMap, ptColMap));
01251   }
01252   if (!ptRngMap->isSameAs(*ptRowMap)) {
01253     exporter_ = Teuchos::rcp(new Tpetra::Export<LocalOrdinal,GlobalOrdinal,Node>(ptRowMap, ptRngMap));
01254   }
01255 }
01256 
01257 //-------------------------------------------------------------------
01258 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
01259 void
01260 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::fillComplete(const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> >& blockDomainMap, const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> >& blockRangeMap)
01261 {
01262   if (isFillComplete()) return;
01263 
01264   globalAssemble();
01265 
01266   blkGraph_->fillComplete(blockDomainMap, blockRangeMap);
01267 
01268   createImporterExporter();
01269 
01270   is_fill_completed_ = true;
01271 
01272   optimizeStorage();
01273 
01274   fillLocalMatrix();
01275   fillLocalMatVec();
01276 }
01277 
01278 //-------------------------------------------------------------------
01279 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
01280 void VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::fillComplete()
01281 {
01282   //In this case, our block-row-map will also be our domain-map and
01283   //range-map.
01284 
01285   fillComplete(getBlockRowMap(), getBlockRowMap());
01286 }
01287 
01288 //-------------------------------------------------------------------
01289 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
01290 void VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::globalAssemble()
01291 {
01292   if (getPointRowMap()->getComm()->getSize() == 1) {
01293     return;
01294   }
01295 
01296   //nonlocal_data_ contains data that belongs on other processors.
01297   //We'll refer to this as overlapping data, and create an 'overlapMap'
01298   //that describes the layout of this overlapping data.
01299   Teuchos::RCP<const Tpetra::BlockMap<LocalOrdinal,GlobalOrdinal,Node> >
01300      overlapMap = VbrUtils::createOverlapMap(nonlocal_data_, *getBlockRowMap());
01301 
01302   if (overlapMap->getGlobalNumBlocks() == 0) {
01303     return;
01304   }
01305 
01306   //VbrDataDist is a wrapper that makes our overlapping data behave
01307   //like a DistObject so that the overlapping data can be exported
01308   //to the owning processors in a standard way.
01309   VbrUtils::VbrDataDist<LocalOrdinal,GlobalOrdinal,Scalar,Node>
01310       vdd(nonlocal_data_, *overlapMap, *this->getBlockRowMap());
01311 
01312   //Create an exporter where the source map is our overlapMap and the
01313   //target map is the rowmap of our VbrMatrix.
01314   Tpetra::Import<LocalOrdinal,GlobalOrdinal,Node>
01315       importer( vdd.getMap(), convertBlockMapToPointMap(*getBlockRowMap()));
01316 
01317   //Communicate the overlapping data to the owning processors and add it
01318   //into this VbrMatrix.
01319   this->doImport(vdd, importer, Tpetra::ADD);
01320 
01321   //Zero out the overlapping data so it can be re-populated and re-assembled
01322   //in future calls to globalAssemble.
01323   VbrUtils::zeroEntries(nonlocal_data_);
01324 }
01325 
01326 //-------------------------------------------------------------------
01327 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
01328 std::string
01329 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::description() const
01330 {
01331   std::ostringstream oss;
01332   oss << Teuchos::Describable::description();
01333   if (isFillComplete()) {
01334     oss << "{status = fill complete, global num block rows = " << getBlockRowMap()->getGlobalNumBlocks() << "}";
01335   }
01336   else {
01337     oss << "{status = fill not complete, global num block rows = " << getBlockRowMap()->getGlobalNumBlocks() << "}";
01338   }
01339   return oss.str();
01340 }
01341 
01342 //-------------------------------------------------------------------
01343 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
01344 void
01345 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel) const
01346 {
01347   Teuchos::EVerbosityLevel vl = verbLevel;
01348   if (vl == Teuchos::VERB_DEFAULT) vl = Teuchos::VERB_LOW;
01349 
01350   if (vl == Teuchos::VERB_NONE) return;
01351 
01352   Teuchos::RCP<const Teuchos::Comm<int> > comm = getBlockRowMap()->getPointMap()->getComm();
01353   const int myProc = comm->getRank();
01354 
01355   if (myProc == 0) out << "VbrMatrix::describe is under construction..." << std::endl;
01356 }
01357 
01358 }//namespace Tpetra
01359 
01360 //
01361 // Explicit instantiation macro
01362 //
01363 // Must be expanded from within the Tpetra namespace!
01364 //
01365 
01366 #define TPETRA_VBRMATRIX_INSTANT(SCALAR,LO,GO,NODE) \
01367   \
01368   template class VbrMatrix< SCALAR , LO , GO , NODE >;
01369 
01370 #endif //TPETRA_VBRMATRIX_DEF_HPP
01371 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines