Tpetra Matrix/Vector Services Version of the Day
Tpetra_VbrMatrix_def.hpp
00001 // @HEADER
00002 // ***********************************************************************
00003 // 
00004 //          Tpetra: Templated Linear Algebra Services Package
00005 //                 Copyright (2008) Sandia Corporation
00006 // 
00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00008 // the U.S. Government retains certain rights in this software.
00009 // 
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00038 // 
00039 // ************************************************************************
00040 // @HEADER
00041 
00042 #ifndef TPETRA_VBRMATRIX_DEF_HPP
00043 #define TPETRA_VBRMATRIX_DEF_HPP
00044 
00045 #include <Tpetra_BlockMap.hpp>
00046 #include <Tpetra_BlockCrsGraph.hpp>
00047 #include <Tpetra_DistObject.hpp>
00048 #include <Tpetra_Vector.hpp>
00049 #include <Kokkos_NodeHelpers.hpp>
00050 #include <Kokkos_VbrMatrix.hpp>
00051 #include <Teuchos_SerialDenseMatrix.hpp>
00052 #include <algorithm>
00053 #include <sstream>
00054 
00055 #ifdef DOXYGEN_USE_ONLY
00056 #include "Tpetra_VbrMatrix_decl.hpp"
00057 #endif
00058 
00059 namespace Tpetra {
00060 
00061 template<class Scalar,class Node>
00062 void fill_device_ArrayRCP(Teuchos::RCP<Node>& node, Teuchos::ArrayRCP<Scalar>& ptr, Scalar value)
00063 {
00064   KokkosClassic::ReadyBufferHelper<Node> rbh(node);
00065   KokkosClassic::InitOp<Scalar> wdp;
00066   wdp.alpha = value;
00067   rbh.begin();
00068   wdp.x = rbh.addNonConstBuffer(ptr);
00069   rbh.end();
00070   node->template parallel_for<KokkosClassic::InitOp<Scalar> >(0, ptr.size(), wdp);
00071 }
00072 
00073 //-------------------------------------------------------------------
00074 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00075 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::VbrMatrix(const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> > &blkRowMap, size_t maxNumEntriesPerRow, ProfileType pftype)
00076  : DistObject<char, LocalOrdinal, GlobalOrdinal, Node>(convertBlockMapToPointMap(*blkRowMap)),
00077    blkGraph_(Teuchos::rcp(new BlockCrsGraph<LocalOrdinal,GlobalOrdinal,Node>(blkRowMap, maxNumEntriesPerRow, pftype))),
00078    constBlkGraph_(blkGraph_),
00079    lclMatrix_ (Teuchos::rcp (new local_matrix_type (blkRowMap->getNodeNumBlocks (), 
00080                 blkRowMap->getPointMap()->getNode ()))),
00081    pbuf_values1D_(),
00082    pbuf_indx_(),
00083    lclMatOps_(blkRowMap->getPointMap()->getNode()),
00084    importer_(),
00085    exporter_(),
00086    importedVec_(),
00087    exportedVec_(),
00088    data_2D_(Teuchos::rcp(new Teuchos::Array<RowGlobalCols>)),
00089    nonlocal_data_(),
00090    is_fill_completed_(false),
00091    is_storage_optimized_(false)
00092 {
00093   //The graph of this VBR matrix is a BlockCrsGraph, which is a CrsGraph where
00094   //each entry in the graph corresponds to a block-entry in the matrix.
00095   //That is, you can think of a VBR matrix as a Crs matrix of dense
00096   //submatrices...
00097 }
00098 
00099 //-------------------------------------------------------------------
00100 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00101 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::VbrMatrix(const Teuchos::RCP<const BlockCrsGraph<LocalOrdinal,GlobalOrdinal,Node> > &blkGraph)
00102  : DistObject<char, LocalOrdinal, GlobalOrdinal, Node>(convertBlockMapToPointMap(*blkGraph->getBlockRowMap())),
00103    blkGraph_(),
00104    constBlkGraph_(blkGraph),
00105    lclMatrix_ (Teuchos::rcp (new local_matrix_type (blkGraph->getBlockRowMap ()->getNodeNumBlocks (),
00106                 blkGraph->getBlockRowMap ()->getPointMap ()->getNode ()))),
00107    pbuf_values1D_(),
00108    pbuf_indx_(),
00109    lclMatOps_(blkGraph->getBlockRowMap()->getPointMap()->getNode()),
00110    importer_(),
00111    exporter_(),
00112    importedVec_(),
00113    exportedVec_(),
00114    data_2D_(Teuchos::rcp(new Teuchos::Array<RowGlobalCols>)),
00115    nonlocal_data_(),
00116    is_fill_completed_(false),
00117    is_storage_optimized_(false)
00118 {
00119   //The graph of this VBR matrix is a BlockCrsGraph, which is a CrsGraph where
00120   //each entry in the graph corresponds to a block-entry in the matrix.
00121   //That is, you can think of a VBR matrix as a Crs matrix of dense
00122   //submatrices...
00123 
00124   TEUCHOS_TEST_FOR_EXCEPTION(blkGraph->isFillComplete() == false, std::runtime_error,
00125    "Tpetra::VbrMatrix::VbrMatrix(BlockCrsGraph) ERROR, this constructor requires graph.isFillComplete()==true.");
00126 
00127   createImporterExporter();
00128 
00129   is_fill_completed_ = true;
00130 
00131   optimizeStorage();
00132 
00133   fillLocalMatrix();
00134   fillLocalMatVec();
00135 }
00136 
00137 //-------------------------------------------------------------------
00138 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00139 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::~VbrMatrix()
00140 {
00141 }
00142 
00143 //-------------------------------------------------------------------
00144 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00145 Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> >
00146 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getDomainMap() const
00147 {
00148   return getBlockDomainMap()->getPointMap();
00149 }
00150 
00151 //-------------------------------------------------------------------
00152 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00153 Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> >
00154 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getRangeMap() const
00155 {
00156   return getBlockRangeMap()->getPointMap();
00157 }
00158 
00159 //-------------------------------------------------------------------
00160 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00161 bool
00162 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::isFillComplete() const
00163 {
00164   return is_fill_completed_;
00165 }
00166 
00167 //-------------------------------------------------------------------
00168 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00169 template <class DomainScalar, class RangeScalar>
00170 void
00171 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::
00172 multiply (const MultiVector<DomainScalar,LocalOrdinal,GlobalOrdinal,Node>& X, 
00173     MultiVector<RangeScalar,LocalOrdinal,GlobalOrdinal,Node>& Y, 
00174     Teuchos::ETransp trans, 
00175     RangeScalar alpha, 
00176     RangeScalar beta) const
00177 {
00178   TEUCHOS_TEST_FOR_EXCEPTION(
00179     ! isFillComplete (), std::runtime_error,
00180     "Tpetra::VbrMatrix::multiply: This method may only be called after "
00181     "fillComplete has been called.");
00182 
00183   const KokkosClassic::MultiVector<Scalar,Node> *lclX = &X.getLocalMV();
00184   KokkosClassic::MultiVector<Scalar,Node>       *lclY = &Y.getLocalMVNonConst();
00185 
00186   lclMatOps_.template multiply<Scalar,Scalar> (trans, alpha, *lclX, beta, *lclY);
00187 }
00188 
00189 //-------------------------------------------------------------------
00190 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00191 template<class DomainScalar, class RangeScalar>
00192 void
00193 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::
00194 solve (const MultiVector<RangeScalar,LocalOrdinal,GlobalOrdinal,Node>& Y, 
00195        MultiVector<DomainScalar,LocalOrdinal,GlobalOrdinal,Node>& X, 
00196        Teuchos::ETransp trans) const
00197 {
00198   TEUCHOS_TEST_FOR_EXCEPTION(X.isConstantStride() == false || Y.isConstantStride() == false, std::runtime_error,
00199         "Tpetra::VbrMatrix::solve(X,Y): X and Y must be constant stride.");
00200 
00201   TEUCHOS_TEST_FOR_EXCEPTION(!isFillComplete(), std::runtime_error,
00202     "Tpetra::VbrMatrix::solve ERROR, solve may only be called after fillComplete has been called.");
00203 
00204   TEUCHOS_TEST_FOR_EXCEPTION(constBlkGraph_->isUpperTriangular()==false && constBlkGraph_->isLowerTriangular()==false, std::runtime_error,
00205     "Tpetra::VbrMatrix::solve ERROR, matrix must be either upper or lower triangular.");
00206 
00207   const KokkosClassic::MultiVector<RangeScalar,Node> *lclY = &Y.getLocalMV();
00208   KokkosClassic::MultiVector<DomainScalar,Node>      *lclX = &X.getLocalMVNonConst();
00209 
00210   Teuchos::EUplo triang = constBlkGraph_->isUpperTriangular() ? Teuchos::UPPER_TRI : Teuchos::LOWER_TRI;
00211   Teuchos::EDiag diag = (constBlkGraph_->getNodeNumBlockDiags() < constBlkGraph_->getNodeNumBlockRows()) ? Teuchos::UNIT_DIAG : Teuchos::NON_UNIT_DIAG;
00212 
00213   lclMatOps_.template solve<DomainScalar,RangeScalar>(trans,triang,diag, *lclY,*lclX);
00214 }
00215 
00216 //-------------------------------------------------------------------
00217 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00218 void
00219 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::updateImport(const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& X) const
00220 {
00221   if (importer_ != Teuchos::null) {
00222     if (importedVec_ == Teuchos::null || importedVec_->getNumVectors() != X.getNumVectors()) {
00223       importedVec_ = Teuchos::rcp(new MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>(getBlockColMap()->getPointMap(), X.getNumVectors()));
00224     }
00225 
00226     importedVec_->doImport(X, *importer_, REPLACE);
00227   }
00228 }
00229 
00230 //-------------------------------------------------------------------
00231 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00232 void
00233 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::updateExport(const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& Y) const
00234 {
00235   if (exporter_ != Teuchos::null) {
00236     if (exportedVec_ == Teuchos::null || exportedVec_->getNumVectors() != Y.getNumVectors()) {
00237       exportedVec_ = Teuchos::rcp(new MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>(getBlockColMap()->getPointMap(), Y.getNumVectors()));
00238     }
00239 
00240     exportedVec_->doExport(Y, *exporter_, REPLACE);
00241   }
00242 }
00243 
00244 //-------------------------------------------------------------------
00245 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00246 void
00247 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::apply(
00248          const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &X,
00249                MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &Y,
00250                Teuchos::ETransp trans,
00251                Scalar alpha,
00252                Scalar beta) const
00253 {
00254   if (trans == Teuchos::NO_TRANS) {
00255     updateImport(X);
00256     const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& Xref = importedVec_ == Teuchos::null ? X : *importedVec_;
00257     this->template multiply<Scalar,Scalar>(Xref, Y, trans, alpha, beta);
00258   }
00259   else if (trans == Teuchos::TRANS || trans == Teuchos::CONJ_TRANS) {
00260     updateImport(Y);
00261     MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& Yref = importedVec_ == Teuchos::null ? Y : *importedVec_;
00262     this->template multiply<Scalar,Scalar>(X, Yref, trans, alpha, beta);
00263     if (importedVec_ != Teuchos::null) {
00264       Y.doExport(*importedVec_, *importer_, ADD);
00265     }
00266   }
00267 }
00268 
00269 //-------------------------------------------------------------------
00270 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00271 void
00272 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::applyInverse(
00273          const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &Y,
00274                MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &X,
00275                Teuchos::ETransp trans) const
00276 {
00277   if (trans == Teuchos::NO_TRANS) {
00278     updateImport(Y);
00279     MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& Xref = importedVec_ == Teuchos::null ? X : *importedVec_;
00280     this->template solve<Scalar,Scalar>(Y, Xref, trans);
00281   }
00282   else if (trans == Teuchos::TRANS || trans == Teuchos::CONJ_TRANS) {
00283     updateImport(Y);
00284     const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& Yref = importedVec_ == Teuchos::null ? Y : *importedVec_;
00285     this->template solve<Scalar,Scalar>(Yref, X, trans);
00286     if (importedVec_ != Teuchos::null) {
00287       X.doExport(*importedVec_, *importer_, ADD);
00288     }
00289   }
00290 }
00291 
00292 //-------------------------------------------------------------------
00293 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00294 bool
00295 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::hasTransposeApply() const
00296 {
00297   return true;
00298 }
00299 
00300 //-------------------------------------------------------------------
00301 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00302 Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> >
00303 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getBlockRowMap() const
00304 {
00305   return constBlkGraph_->getBlockRowMap();
00306 }
00307 
00308 //-------------------------------------------------------------------
00309 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00310 Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> >
00311 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getPointRowMap() const
00312 {
00313   return constBlkGraph_->getBlockRowMap()->getPointMap();
00314 }
00315 
00316 //-------------------------------------------------------------------
00317 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00318 Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> >
00319 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getBlockColMap() const
00320 {
00321   return constBlkGraph_->getBlockColMap();
00322 }
00323 
00324 //-------------------------------------------------------------------
00325 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00326 Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> >
00327 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getBlockDomainMap() const
00328 {
00329   return constBlkGraph_->getBlockDomainMap();
00330 }
00331 
00332 //-------------------------------------------------------------------
00333 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00334 Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> >
00335 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getBlockRangeMap() const
00336 {
00337   return constBlkGraph_->getBlockRangeMap();
00338 }
00339 
00340 //-------------------------------------------------------------------
00341 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00342 Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> >
00343 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getPointColMap() const
00344 {
00345   return constBlkGraph_->getBlockColMap()->getPointMap();
00346 }
00347 
00348 //-------------------------------------------------------------------
00349 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00350 void
00351 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::
00352 getGlobalBlockRowView (GlobalOrdinal globalBlockRow,
00353            LocalOrdinal& numPtRows,
00354            Teuchos::ArrayView<const GlobalOrdinal>& blockCols,
00355            Teuchos::Array<LocalOrdinal>& ptColsPerBlockCol,
00356            Teuchos::Array<Teuchos::ArrayRCP<const Scalar> >& blockEntries) const
00357 {
00358   TEUCHOS_TEST_FOR_EXCEPTION(
00359     isFillComplete(), std::runtime_error,
00360     "Tpetra::VbrMatrix::getGlobalBlockRowView internal ERROR, "
00361     "isFillComplete() is required to be false.");
00362 
00363   typedef typename Teuchos::ArrayView<const GlobalOrdinal>::size_type Tsize_t;
00364 
00365   LocalOrdinal localRow = getBlockRowMap()->getLocalBlockID(globalBlockRow);
00366   numPtRows = getBlockRowMap()->getLocalBlockSize(localRow);
00367   constBlkGraph_->getGlobalBlockRowView(globalBlockRow, blockCols);
00368   ptColsPerBlockCol.resize(blockCols.size());
00369   blockEntries.resize(blockCols.size());
00370   for (Tsize_t i = 0; i < blockCols.size (); ++i) {
00371     getGlobalBlockEntryView (globalBlockRow, blockCols[i],
00372            numPtRows, ptColsPerBlockCol[i], blockEntries[i]);
00373   }
00374 }
00375 
00376 //-------------------------------------------------------------------
00377 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00378 void
00379 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getLocalBlockRowView(
00380          LocalOrdinal localBlockRow,
00381          LocalOrdinal& numPtRows,
00382          Teuchos::ArrayView<const LocalOrdinal>& blockCols,
00383          Teuchos::Array<LocalOrdinal>& ptColsPerBlockCol,
00384          Teuchos::ArrayRCP<const Scalar>& blockEntries) const
00385 {
00386   TEUCHOS_TEST_FOR_EXCEPTION(!isFillComplete(), std::runtime_error,
00387       "Tpetra::VbrMatrix::getGlobalBlockRowView internal ERROR, isFillComplete() is required to be true.");
00388 
00389   typedef typename Teuchos::ArrayView<const GlobalOrdinal>::size_type Tsize_t;
00390   typedef Teuchos::ArrayRCP<const size_t>       Host_View;
00391   typedef Teuchos::ArrayRCP<const LocalOrdinal> Host_View_LO;
00392 
00393   numPtRows = getBlockRowMap()->getLocalBlockSize(localBlockRow);
00394   constBlkGraph_->getLocalBlockRowView(localBlockRow, blockCols);
00395   ptColsPerBlockCol.resize(blockCols.size());
00396   LocalOrdinal num_scalars_in_block_row = 0;
00397   for(Tsize_t i=0; i<blockCols.size(); ++i) {
00398     ptColsPerBlockCol[i] = getBlockColMap()->getLocalBlockSize(blockCols[i]);
00399     num_scalars_in_block_row += numPtRows*ptColsPerBlockCol[i];
00400   }
00401 
00402   Teuchos::RCP<Node> node = getNode();
00403   Host_View bpr = constBlkGraph_->getNodeRowOffsets();
00404   const size_t bindx_offset = bpr[localBlockRow];
00405   Host_View_LO indx = node->template viewBuffer<LocalOrdinal>(1, pbuf_indx_+bindx_offset);
00406   const LocalOrdinal offset = indx[0];
00407   blockEntries = node->template viewBuffer<Scalar>(num_scalars_in_block_row, pbuf_values1D_+offset);
00408 }
00409 
00410 //-------------------------------------------------------------------
00411 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00412 void
00413 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getGlobalBlockEntryViewNonConst(
00414     GlobalOrdinal globalBlockRow,
00415     GlobalOrdinal globalBlockCol,
00416     LocalOrdinal& numPtRows,
00417     LocalOrdinal& numPtCols,
00418     Teuchos::ArrayRCP<Scalar>& blockEntry)
00419 {
00420   //Return a non-const, read-write view of a block-entry (as an ArrayRCP),
00421   //creating/allocating the block-entry if it doesn't already exist, (but only
00422   //if fillComplete hasn't been called yet, and if the arguments numPtRows
00423   //and numPtCols have been set on input).
00424 
00425   LocalOrdinal localBlockRow = getBlockRowMap()->getLocalBlockID(globalBlockRow);
00426 
00427   if (localBlockRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid()) {
00428     VbrUtils::getGlobalBlockEntryViewNonConst(nonlocal_data_,
00429                    globalBlockRow, globalBlockCol,
00430                    numPtRows, numPtCols, blockEntry);
00431     return;
00432   }
00433 
00434   if (is_storage_optimized_) {
00435     TEUCHOS_TEST_FOR_EXCEPTION(!isFillComplete(), std::runtime_error,
00436       "Tpetra::VbrMatrix::getGlobalBlockEntryViewNonConst internal ERROR, storage is optimized but isFillComplete() is false.");
00437 
00438     LocalOrdinal localBlockCol = getBlockColMap()->getLocalBlockID(globalBlockCol);
00439 
00440     getLocalBlockEntryViewNonConst(localBlockRow, localBlockCol,
00441                                    numPtRows, numPtCols, blockEntry);
00442     return;
00443   }
00444 
00445   //If we get to here, fillComplete hasn't been called yet, and the matrix data
00446   //is stored in un-packed '2D' form.
00447 
00448   if (data_2D_->size() == 0) {
00449     LocalOrdinal numBlockRows = constBlkGraph_->getNodeNumBlockRows();
00450     data_2D_->resize(numBlockRows);
00451   }
00452 
00453   RowGlobalCols& blkrow = (*data_2D_)[localBlockRow];
00454 
00455   typename RowGlobalCols::iterator col_iter = blkrow.find(globalBlockCol);
00456 
00457   if (col_iter != blkrow.end()) {
00458     blockEntry = col_iter->second;
00459   }
00460   else {
00461     //blockEntry doesn't already exist, so we will create it.
00462 
00463     //make sure block-size is specified:
00464     TEUCHOS_TEST_FOR_EXCEPTION(numPtRows==0 || numPtCols==0, std::runtime_error,
00465       "Tpetra::VbrMatrix::getGlobalBlockEntryViewNonConst ERROR: creating block-entry, but numPtRows and/or numPtCols is 0.");
00466 
00467     Teuchos::RCP<Node> node = getNode();
00468     size_t blockSize = numPtRows*numPtCols;
00469     blockEntry = Teuchos::arcp(new Scalar[blockSize], 0, blockSize);
00470     std::fill(blockEntry.begin(), blockEntry.end(), 0);
00471     blkrow.insert(std::make_pair(globalBlockCol, blockEntry));
00472     blkGraph_->insertGlobalIndices(globalBlockRow, Teuchos::ArrayView<GlobalOrdinal>(&globalBlockCol, 1));
00473   }
00474 }
00475 
00476 //-------------------------------------------------------------------
00477 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00478 void
00479 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getGlobalBlockEntryView(
00480       GlobalOrdinal globalBlockRow,
00481       GlobalOrdinal globalBlockCol,
00482       LocalOrdinal& numPtRows,
00483       LocalOrdinal& numPtCols,
00484       Teuchos::ArrayRCP<const Scalar>& blockEntry) const
00485 {
00486   //This method returns a const-view of a block-entry (as an ArrayRCP).
00487   //Throws an exception if the block-entry doesn't already exist.
00488 
00489   if (is_storage_optimized_) {
00490     TEUCHOS_TEST_FOR_EXCEPTION(!isFillComplete(), std::runtime_error,
00491       "Tpetra::VbrMatrix::getGlobalBlockEntryView internal ERROR, storage is optimized but isFillComplete() is false.");
00492 
00493     LocalOrdinal localBlockRow = getBlockRowMap()->getLocalBlockID(globalBlockRow);
00494     LocalOrdinal localBlockCol = getBlockColMap()->getLocalBlockID(globalBlockCol);
00495     getLocalBlockEntryView(localBlockRow, localBlockCol, numPtRows, numPtCols, blockEntry);
00496     return;
00497   }
00498 
00499   TEUCHOS_TEST_FOR_EXCEPTION(data_2D_->size() == 0, std::runtime_error,
00500     "Tpetra::VbrMatrix::getGlobalBlockEntryView ERROR, matrix storage not yet allocated, can't return a const view.");
00501 
00502   //this acts as a range-check for globalBlockRow:
00503   LocalOrdinal localBlockRow = getBlockRowMap()->getLocalBlockID(globalBlockRow);
00504   TEUCHOS_TEST_FOR_EXCEPTION( localBlockRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid(),
00505      std::runtime_error,
00506      "Tpetra::VbrMatrix::getGlobalBlockEntryView, globalBlockRow not on the local processor.");
00507 
00508   RowGlobalCols& blkrow = (*data_2D_)[localBlockRow];
00509   typename RowGlobalCols::iterator col_iter = blkrow.find(globalBlockCol);
00510 
00511   if (col_iter == blkrow.end()) {
00512     throw std::runtime_error("Tpetra::VbrMatrix::getGlobalBlockEntryView ERROR, specified block-entry doesn't exist.");
00513   }
00514 
00515   numPtRows = getBlockRowMap()->getLocalBlockSize(localBlockRow);
00516 
00517   TEUCHOS_TEST_FOR_EXCEPTION(numPtRows == 0, std::runtime_error,
00518     "Tpetra::VbrMatrix::getGlobalBlockEntryView ERROR, numPtRows == 0.");
00519 
00520   blockEntry = col_iter->second;
00521   numPtCols = blockEntry.size() / numPtRows;
00522 }
00523 
00524 //-------------------------------------------------------------------
00525 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00526 void
00527 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getLocalBlockEntryViewNonConst(
00528     LocalOrdinal localBlockRow,
00529     LocalOrdinal localBlockCol,
00530     LocalOrdinal& numPtRows,                     
00531     LocalOrdinal& numPtCols,
00532     Teuchos::ArrayRCP<Scalar>& blockEntry)
00533 {
00534   typedef Teuchos::ArrayRCP<const LocalOrdinal> Host_View_LO;
00535   typedef Teuchos::ArrayRCP<const size_t> Host_View;
00536   typedef typename Host_View_LO::iterator ITER;
00537   //This method returns a non-constant view of a block-entry (as an ArrayRCP).
00538 
00539   TEUCHOS_TEST_FOR_EXCEPTION(isFillComplete() == false, std::runtime_error,
00540    "Tpetra::VbrMatrix::getLocalBlockEntryViewNonConst ERROR, this method can only be called after fillComplete() has been called.");
00541 
00542   TEUCHOS_TEST_FOR_EXCEPTION(is_storage_optimized_ == false, std::runtime_error,
00543    "Tpetra::VbrMatrix::getLocalBlockEntryViewNonConst ERROR, this method can only be called if storage is optimized.");
00544 
00545   Teuchos::RCP<Node> node = getNode();
00546 
00547   Host_View bptr = constBlkGraph_->getNodeRowOffsets();
00548   LocalOrdinal bindx_offset = bptr[localBlockRow];
00549   LocalOrdinal length = bptr[localBlockRow+1] - bindx_offset;
00550 
00551   TEUCHOS_TEST_FOR_EXCEPTION( length < 1, std::runtime_error,
00552     "Tpetra::VbrMatrix::getLocalBlockEntryViewNonConst ERROR, specified localBlockCol not found in localBlockRow.");
00553 
00554   Host_View_LO bindx = constBlkGraph_->getNodePackedIndices();
00555   ITER bindx_beg = bindx.begin() + bindx_offset,  
00556        bindx_end = bindx_beg + length;
00557   ITER it = std::lower_bound(bindx_beg, bindx_end, localBlockCol);
00558 
00559   TEUCHOS_TEST_FOR_EXCEPTION(it == bindx_end || *it != localBlockCol, std::runtime_error,
00560     "Tpetra::VbrMatrix::getLocalBlockEntryViewNonConst ERROR, specified localBlockCol not found.");
00561 
00562   numPtRows = getBlockRowMap()->getLocalBlockSize(localBlockRow);
00563   numPtCols = getBlockColMap()->getLocalBlockSize(localBlockCol);
00564 
00565   const LocalOrdinal blkSize = numPtRows*numPtCols;
00566   Host_View_LO indx = node->template viewBuffer<LocalOrdinal>(1,pbuf_indx_+bptr[localBlockRow]+(it-bindx_beg));
00567   const LocalOrdinal offset = indx[0];
00568   blockEntry = node->template viewBufferNonConst<Scalar>(KokkosClassic::ReadWrite, blkSize, pbuf_values1D_ + offset);
00569 }
00570 
00571 //-------------------------------------------------------------------
00572 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00573 void
00574 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getLocalDiagCopy(
00575   Tpetra::Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& diag) const
00576 {
00577   const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> >& rowmap = getBlockRowMap();
00578   TEUCHOS_TEST_FOR_EXCEPTION(diag.getMap()->isSameAs(*(rowmap->getPointMap())) != true,
00579     std::runtime_error, "Tpetra::VbrMatrix::getLocalDiagCopy ERROR, vector must be distributed the same as this matrix' row-map.");
00580 
00581   Teuchos::ArrayRCP<Scalar> diag_view = diag.get1dViewNonConst();
00582   Teuchos::ArrayView<const GlobalOrdinal> blockIDs = rowmap->getNodeBlockIDs();
00583   size_t offset = 0;
00584   typedef typename Teuchos::ArrayView<const GlobalOrdinal>::size_type Tsize_t;
00585   for(Tsize_t i=0; i<blockIDs.size(); ++i) {
00586     LocalOrdinal localBlockID = rowmap->getLocalBlockID(blockIDs[i]);
00587     LocalOrdinal blockSize = rowmap->getLocalBlockSize(localBlockID);
00588     Teuchos::ArrayRCP<const Scalar> blockEntry;
00589     getGlobalBlockEntryView(blockIDs[i], blockIDs[i], blockSize, blockSize, blockEntry);
00590 
00591     for(LocalOrdinal j=0; j<blockSize; ++j) {
00592       diag_view[offset++] = blockEntry[j*blockSize+j];
00593     }
00594   }
00595 }
00596 
00597 //-------------------------------------------------------------------
00598 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00599 bool
00600 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::
00601 checkSizes (const SrcDistObject& source)
00602 {
00603   typedef VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps> this_type;
00604   const this_type* srcVbrMat = dynamic_cast<const this_type*> (&source);
00605 
00606   if (srcVbrMat == NULL) {
00607     typedef VbrUtils::VbrDataDist<LocalOrdinal,GlobalOrdinal,Scalar,Node> VDD;
00608     const VDD* vdd = const_cast<VDD*> (dynamic_cast<const VDD*> (&source));
00609     if (vdd == NULL) {
00610       return false;
00611     } else {
00612       return (this->getMap ()->getMinAllGlobalIndex () <= 
00613         vdd->getMap ()->getMinAllGlobalIndex ()) &&
00614   (this->getMap ()->getMaxAllGlobalIndex () >= 
00615    vdd->getMap ()->getMaxAllGlobalIndex ());
00616     }
00617   } else {
00618     return (this->getMap ()->getMinAllGlobalIndex () <= 
00619       srcVbrMat->getMap ()->getMinAllGlobalIndex ()) &&
00620       (this->getMap ()->getMaxAllGlobalIndex () >= 
00621        srcVbrMat->getMap ()->getMaxAllGlobalIndex ());
00622   }
00623 }
00624 
00625 //-------------------------------------------------------------------
00626 template<class Scalar, 
00627    class LocalOrdinal, 
00628    class GlobalOrdinal, 
00629    class Node, 
00630    class LocalMatOps>
00631 void
00632 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::
00633 copyAndPermute (const SrcDistObject& source,
00634     size_t numSameIDs,
00635     const Teuchos::ArrayView<const LocalOrdinal>& permuteToLIDs,
00636     const Teuchos::ArrayView<const LocalOrdinal>& permuteFromLIDs)
00637 {
00638   typedef typename Teuchos::ArrayView<const LocalOrdinal>::size_type Tsize_t;
00639   typedef VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps> this_type;
00640   const this_type* src_mat_ptr = dynamic_cast<const this_type*> (&source);
00641 
00642   if (src_mat_ptr == NULL) {
00643     throw std::runtime_error ("VbrMatrix::copyAndPermute ERROR, dynamic_cast failed.");
00644   }
00645   const this_type& src_mat = *src_mat_ptr;
00646 
00647   Teuchos::Array<LocalOrdinal> src_ptColsPerBlkCol;
00648   LocalOrdinal numPtRows;
00649 
00650   LocalOrdinal numSame = numSameIDs;
00651   for(LocalOrdinal i=0; i<numSame; ++i) {
00652     if (isFillComplete()) {
00653       Teuchos::ArrayRCP<const Scalar> src_blkEntries;
00654       Teuchos::ArrayView<const LocalOrdinal> src_blkCols;
00655 
00656       src_mat.getLocalBlockRowView (i, numPtRows, src_blkCols, 
00657             src_ptColsPerBlkCol, src_blkEntries);
00658       unsigned offset = 0;
00659       for (Tsize_t j=0; j<src_blkCols.size(); ++j) {
00660         LocalOrdinal numPtCols = src_ptColsPerBlkCol[j];
00661         setLocalBlockEntry (i, src_blkCols[j], numPtRows, numPtCols, numPtCols,
00662           src_blkEntries.view (offset, numPtRows*numPtCols));
00663         offset += numPtRows*numPtCols;
00664       }
00665     }
00666     else {
00667       GlobalOrdinal gRow = getBlockRowMap ()->getGlobalBlockID (i);
00668       Teuchos::Array<Teuchos::ArrayRCP<const Scalar> > src_blkEntries;
00669       Teuchos::ArrayView<const GlobalOrdinal> src_blkCols;
00670 
00671       src_mat.getGlobalBlockRowView (gRow, numPtRows, src_blkCols, 
00672              src_ptColsPerBlkCol, src_blkEntries);
00673       for (Tsize_t j = 0; j < src_blkCols.size (); ++j) {
00674         LocalOrdinal numPtCols = src_ptColsPerBlkCol[j];
00675         setGlobalBlockEntry (gRow, src_blkCols[j], 
00676            numPtRows, numPtCols, numPtRows, 
00677            src_blkEntries[j].view (0, numPtRows * numPtCols));
00678       }
00679     }
00680   }
00681 
00682   for (Tsize_t i = 0; i < permuteToLIDs.size (); ++i) {
00683     if (isFillComplete ()) {
00684       Teuchos::ArrayRCP<const Scalar> src_blkEntries;
00685       Teuchos::ArrayView<const LocalOrdinal> src_blkCols;
00686 
00687       src_mat.getLocalBlockRowView (permuteFromLIDs[i], numPtRows, 
00688             src_blkCols, src_ptColsPerBlkCol, 
00689             src_blkEntries);
00690       unsigned offset = 0;
00691       for (Tsize_t j=0; j<src_blkCols.size(); ++j) {
00692         LocalOrdinal numPtCols = src_ptColsPerBlkCol[j];
00693         setLocalBlockEntry (permuteToLIDs[i], src_blkCols[j], numPtRows, numPtCols, numPtCols,
00694           src_blkEntries.view(offset, numPtRows*numPtCols));
00695         offset += numPtRows*numPtCols;
00696       }
00697     }
00698     else {
00699       GlobalOrdinal gRow = getBlockRowMap ()->getGlobalBlockID (permuteToLIDs[i]);
00700       Teuchos::Array<Teuchos::ArrayRCP<const Scalar> > src_blkEntries;
00701       Teuchos::ArrayView<const GlobalOrdinal> src_blkCols;
00702 
00703       src_mat.getGlobalBlockRowView (gRow, numPtRows, src_blkCols, 
00704              src_ptColsPerBlkCol, src_blkEntries);
00705       for (Tsize_t j = 0; j < src_blkCols.size (); ++j) {
00706         LocalOrdinal numPtCols = src_ptColsPerBlkCol[j];
00707         setGlobalBlockEntry (gRow, src_blkCols[j], 
00708            numPtRows, numPtCols, numPtRows, 
00709            src_blkEntries[j].view (0, numPtRows * numPtCols));
00710       }
00711     }
00712   }
00713 }
00714 
00715 //-------------------------------------------------------------------
00716 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00717 void
00718 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::
00719 packAndPrepare (const SrcDistObject& source,
00720     const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs,
00721     Teuchos::Array<char>& exports,
00722     const Teuchos::ArrayView<size_t>& numPacketsPerLID,
00723     size_t& constantNumPackets,
00724     Distributor& distor)
00725 {
00726   typedef typename Teuchos::ArrayView<const LocalOrdinal>::size_type Tsize_t;
00727   typedef VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps> this_type;
00728   const this_type* src_mat_ptr = dynamic_cast<const this_type*> (&source);
00729 
00730   if (src_mat_ptr == NULL) {
00731     typedef VbrUtils::VbrDataDist<LocalOrdinal, GlobalOrdinal, Scalar, Node> VDD;
00732     const VDD* vdd = dynamic_cast<const VDD*> (&source);
00733     TEUCHOS_TEST_FOR_EXCEPTION(
00734       vdd == NULL, std::invalid_argument, "Tpetra::VbrMatrix::packAndPrepare: "
00735       "Source object input must be either a VbrMatrix or a VbrDataDist with "
00736       "matching template parameters.");
00737     vdd->pack (exportLIDs, exports, numPacketsPerLID, constantNumPackets, distor);
00738     return;
00739   }
00740   const this_type& src_mat = *src_mat_ptr;
00741 
00742   //We will pack each row's data into the exports buffer as follows:
00743   //[num-block-cols,numPtRows,{list-of-blk-cols},{list-of-ptColsPerBlkCol},{all vals}]
00744   //so the length of the char exports buffer for a row is:
00745   //sizeof(LocalOrdinal)*(2+2*(num-block-cols)) + sizeof(Scalar)*numPtRows*sum(numPtCols_i)
00746 
00747   //For each row corresponding to exportLIDs, accumulate the size that it will
00748   //occupy in the exports buffer:
00749   size_t total_exports_size = 0;
00750   for (Tsize_t i = 0; i < exportLIDs.size (); ++i) {
00751     Teuchos::Array<LocalOrdinal> src_ptColsPerBlkCol;
00752     LocalOrdinal numPtRows;
00753     LocalOrdinal numBlkCols = 0;
00754     LocalOrdinal numScalars = 0;
00755 
00756     if (src_mat.isFillComplete ()) {
00757       Teuchos::ArrayRCP<const Scalar> src_blkEntries;
00758       Teuchos::ArrayView<const LocalOrdinal> src_blkCols;
00759       src_mat.getLocalBlockRowView (exportLIDs[i], numPtRows,
00760             src_blkCols, src_ptColsPerBlkCol, 
00761             src_blkEntries);
00762       numBlkCols = src_blkCols.size ();
00763       numScalars = src_blkEntries.size ();
00764     }
00765     else {
00766       Teuchos::Array<Teuchos::ArrayRCP<const Scalar> > src_blkEntries;
00767       GlobalOrdinal gRow = 
00768   src_mat.getBlockRowMap ()->getGlobalBlockID (exportLIDs[i]);
00769       Teuchos::ArrayView<const GlobalOrdinal> src_blkCols;
00770       src_mat.getGlobalBlockRowView (gRow, numPtRows, src_blkCols, 
00771              src_ptColsPerBlkCol, src_blkEntries);
00772       numBlkCols = src_blkCols.size ();
00773       for (Tsize_t j = 0; j < src_blkEntries.size (); ++j) {
00774         numScalars += src_blkEntries[j].size ();
00775       }
00776     }
00777 
00778     const size_t size_for_this_row = 
00779       sizeof (GlobalOrdinal) * (2 + 2*numBlkCols) + 
00780       sizeof (Scalar) * numScalars;
00781     numPacketsPerLID[i] = size_for_this_row;
00782     total_exports_size += size_for_this_row;
00783   }
00784 
00785   exports.resize (total_exports_size);
00786 
00787   ArrayView<char> avIndsC, avValsC;
00788   ArrayView<Scalar>        avVals;
00789 
00790   size_t offset = 0;
00791 
00792   for (Tsize_t i = 0; i < exportLIDs.size (); ++i) {
00793     Teuchos::Array<LocalOrdinal> src_ptColsPerBlkCol;
00794     LocalOrdinal numPtRows;
00795     LocalOrdinal numBlkCols = 0;
00796     LocalOrdinal numScalars = 0;
00797 
00798     if (src_mat.isFillComplete ()) {
00799       Teuchos::ArrayRCP<const Scalar> src_blkEntries;
00800       Teuchos::ArrayView<const LocalOrdinal> src_blkCols;
00801       src_mat.getLocalBlockRowView (exportLIDs[i], numPtRows, src_blkCols, 
00802             src_ptColsPerBlkCol, src_blkEntries);
00803       numBlkCols = src_blkCols.size ();
00804       numScalars = src_blkEntries.size ();
00805 
00806       LocalOrdinal num_chars_for_ordinals = 
00807   (2*numBlkCols + 2) * sizeof (LocalOrdinal);
00808       //get export views
00809       avIndsC = exports (offset, num_chars_for_ordinals);
00810       avValsC = exports (offset + num_chars_for_ordinals, numScalars * sizeof (Scalar));
00811       ArrayView<GlobalOrdinal> avInds = av_reinterpret_cast<GlobalOrdinal>(avIndsC);
00812       typename ArrayView<GlobalOrdinal>::iterator ind_it = avInds.begin();
00813 
00814       const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> >& col_map = 
00815   src_mat.getBlockColMap ();
00816 
00817       //put row info into the buffer views:
00818       *ind_it++ = numBlkCols;
00819       *ind_it++ = numPtRows;
00820       for (Tsize_t j = 0; j < src_blkCols.size (); ++j) {
00821         *ind_it++ = col_map->getGlobalBlockID (src_blkCols[j]);
00822       }
00823       std::copy (src_ptColsPerBlkCol.begin(), src_ptColsPerBlkCol.end(), ind_it);
00824       avVals = av_reinterpret_cast<Scalar> (avValsC);
00825       std::copy (src_blkEntries.begin(), src_blkEntries.end(), avVals.begin());
00826     }
00827     else {
00828       Teuchos::Array<Teuchos::ArrayRCP<const Scalar> > src_blkEntries;
00829       GlobalOrdinal gRow = src_mat.getBlockRowMap()->getGlobalBlockID(exportLIDs[i]);
00830       Teuchos::ArrayView<const GlobalOrdinal> src_blkCols;
00831       src_mat.getGlobalBlockRowView (gRow, numPtRows, src_blkCols, 
00832              src_ptColsPerBlkCol, src_blkEntries);
00833       numBlkCols = src_blkCols.size();
00834       numScalars = 0;
00835       for(Tsize_t j=0; j<src_blkEntries.size(); ++j) {
00836         numScalars += numPtRows*src_ptColsPerBlkCol[j];
00837       }
00838 
00839       LocalOrdinal num_chars_for_ordinals = (2*numBlkCols+2)*sizeof(GlobalOrdinal);
00840       //get export views
00841       avIndsC = exports(offset, num_chars_for_ordinals);
00842       avValsC = exports(offset+ num_chars_for_ordinals, numScalars*sizeof(Scalar));
00843       ArrayView<GlobalOrdinal> avInds = av_reinterpret_cast<GlobalOrdinal>(avIndsC);
00844       typename ArrayView<GlobalOrdinal>::iterator ind_it = avInds.begin();
00845 
00846       //put row info into the buffer views:
00847       *ind_it++ = numBlkCols;
00848       *ind_it++ = numPtRows;
00849       std::copy(src_blkCols.begin(), src_blkCols.end(), ind_it);
00850       ind_it += src_blkCols.size();
00851       std::copy(src_ptColsPerBlkCol.begin(), src_ptColsPerBlkCol.end(), ind_it);
00852       avVals = av_reinterpret_cast<Scalar>(avValsC);
00853       typename ArrayView<Scalar>::iterator val_it = avVals.begin();
00854       for(Tsize_t j=0; j<src_blkEntries.size(); ++j) {
00855         std::copy(src_blkEntries[j].begin(), src_blkEntries[j].end(), val_it);
00856         val_it += src_blkEntries[j].size();
00857       }
00858     }
00859 
00860     const size_t size_for_this_row = 
00861       sizeof (GlobalOrdinal) * (2 + 2*numBlkCols)
00862       + sizeof (Scalar) * numScalars;
00863     offset += size_for_this_row;
00864   }
00865 
00866   constantNumPackets = 0;
00867 }
00868 
00869 //-------------------------------------------------------------------
00870 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00871 void
00872 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::
00873 unpackAndCombine (const Teuchos::ArrayView<const LocalOrdinal>& importLIDs,
00874       const Teuchos::ArrayView<const char>& imports,
00875       const Teuchos::ArrayView<size_t>& numPacketsPerLID,
00876       size_t constantNumPackets,
00877       Distributor& distor,
00878       CombineMode CM)
00879 {
00880   typedef typename Teuchos::ArrayView<const LocalOrdinal>::size_type Tsize_t;
00881  
00882   if (CM == Tpetra::ABSMAX) {
00883     std::cout << "Warning, VbrMatrix Import/Export doesn't support combine-mode==ABSMAX; use ADD, INSERT or REPLACE. (REPLACE is being used now.)"<<std::endl;
00884   }
00885 
00886   size_t offset = 0;
00887   for(Tsize_t i=0; i<importLIDs.size(); ++i) {
00888     ArrayView<const char> avC = imports.view(offset, numPacketsPerLID[i]);
00889     ArrayView<const GlobalOrdinal> avOrds = av_reinterpret_cast<const GlobalOrdinal>(avC);
00890     GlobalOrdinal gRow = this->getBlockRowMap()->getGlobalBlockID(importLIDs[i]);
00891     size_t avOrdsOffset = 0;
00892     LocalOrdinal numBlkCols = avOrds[avOrdsOffset++];
00893     LocalOrdinal numPtRows = avOrds[avOrdsOffset++];
00894     LocalOrdinal num_chars_for_ordinals = (2*numBlkCols+2)*sizeof(GlobalOrdinal);
00895     ArrayView<const char> avValsC = imports.view(offset+num_chars_for_ordinals, numPacketsPerLID[i]-num_chars_for_ordinals);
00896     ArrayView<const Scalar> avVals = av_reinterpret_cast<const Scalar>(avValsC);
00897 
00898     size_t avValsOffset = 0;
00899     for(Tsize_t j=0; j<numBlkCols; ++j) {
00900       GlobalOrdinal blkCol = avOrds[avOrdsOffset+j];
00901       LocalOrdinal numPtCols = avOrds[avOrdsOffset+numBlkCols+j];
00902       LocalOrdinal LDA = numPtRows;
00903       if (CM == Tpetra::ADD) {
00904         sumIntoGlobalBlockEntry(gRow, blkCol, numPtRows, numPtCols, LDA,
00905                          avVals.view(avValsOffset, numPtRows*numPtCols));
00906       }
00907       else if (CM == Tpetra::INSERT || CM == Tpetra::REPLACE) {
00908         setGlobalBlockEntry(gRow, blkCol, numPtRows, numPtCols, LDA,
00909                            avVals.view(avValsOffset, numPtRows*numPtCols));
00910       }
00911       else {
00912 //    std::cout << "Warning, VbrMatrix Import/Export doesn't support combine-mode==ABSMAX; use ADD, INSERT or REPLACE. (REPLACE is being used now.)"<<std::endl;
00913         setGlobalBlockEntry(gRow, blkCol, numPtRows, numPtCols, LDA,
00914                            avVals.view(avValsOffset, numPtRows*numPtCols));
00915       }
00916       avValsOffset += numPtRows*numPtCols;
00917     }
00918   }
00919 }
00920 
00921 //-------------------------------------------------------------------
00922 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00923 void
00924 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getLocalBlockEntryView(
00925       LocalOrdinal localBlockRow,
00926       LocalOrdinal localBlockCol,
00927       LocalOrdinal& numPtRows,
00928       LocalOrdinal& numPtCols,
00929       Teuchos::ArrayRCP<const Scalar>& blockEntry) const
00930 {
00931   typedef Teuchos::ArrayRCP<const size_t>       Host_View;
00932   typedef Teuchos::ArrayRCP<const LocalOrdinal> Host_View_LO;
00933   typedef typename Host_View_LO::iterator ITER;
00934   //This method returns a constant view of a block-entry (as an ArrayRCP).
00935 
00936   TEUCHOS_TEST_FOR_EXCEPTION(isFillComplete() == false, std::runtime_error,
00937    "Tpetra::VbrMatrix::getLocalBlockEntryView ERROR, this method can only be called after fillComplete() has been called.");
00938 
00939   TEUCHOS_TEST_FOR_EXCEPTION(is_storage_optimized_ == false, std::runtime_error,
00940    "Tpetra::VbrMatrix::getLocalBlockEntryView ERROR, this method can only be called if storage is optimized.");
00941 
00942   Teuchos::RCP<Node> node = getNode();
00943 
00944   Host_View bpr = constBlkGraph_->getNodeRowOffsets();
00945   const size_t bindx_offset = bpr[localBlockRow];
00946   const size_t length       = bpr[localBlockRow+1] - bindx_offset;
00947 
00948   Host_View_LO bindx = constBlkGraph_->getNodePackedIndices();
00949   ITER bindx_beg = bindx.begin()+bindx_offset, 
00950        bindx_end = bindx_beg + length;
00951   ITER it = std::lower_bound(bindx_beg, bindx_end, localBlockCol);
00952 
00953   TEUCHOS_TEST_FOR_EXCEPTION(it == bindx_end || *it != localBlockCol, std::runtime_error,
00954     "Tpetra::VbrMatrix::getLocalBlockEntryView ERROR, specified localBlockCol not found.");
00955 
00956   numPtRows = getBlockRowMap()->getLocalBlockSize(localBlockRow);
00957   numPtCols = getBlockColMap()->getLocalBlockSize(localBlockCol);
00958 
00959   const LocalOrdinal blkSize = numPtRows*numPtCols;
00960   Host_View_LO indx = node->template viewBuffer<LocalOrdinal>(1, pbuf_indx_+bpr[localBlockRow]+(it-bindx_beg));
00961   const LocalOrdinal offset = indx[0];
00962   blockEntry = node->template viewBuffer<Scalar>(blkSize, pbuf_values1D_ + offset);
00963 }
00964 
00965 //-------------------------------------------------------------------
00966 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00967 Teuchos::RCP<Node>
00968 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getNode() const
00969 {
00970   return getBlockRowMap()->getPointMap()->getNode();
00971 }
00972 
00973 //-------------------------------------------------------------------
00974 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00975 void
00976 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::putScalar(Scalar s)
00977 {
00978   Teuchos::RCP<Node> node = getNode();
00979 
00980   if (isFillComplete()) {
00981     fill_device_ArrayRCP(node, pbuf_values1D_, s);
00982   }
00983   else {
00984     typedef typename Teuchos::Array<RowGlobalCols>::size_type Tsize_t;
00985     Teuchos::Array<RowGlobalCols>& rows = *data_2D_;
00986     Tsize_t numBlockRows = rows.size();
00987     for(Tsize_t r=0; r<numBlockRows; ++r) {
00988       typename RowGlobalCols::iterator
00989         col_iter = rows[r].begin(),
00990         col_end  = rows[r].end();
00991       for(; col_iter != col_end; ++col_iter) {
00992         Teuchos::ArrayRCP<Scalar>& vals = col_iter->second;
00993         std::fill(vals.begin(), vals.end(), s);
00994       }
00995     }
00996   }
00997 }
00998 
00999 //-------------------------------------------------------------------
01000 template<class ArrayType1,class ArrayType2,class Ordinal>
01001 void set_array_values(ArrayType1& dest, ArrayType2& src, Ordinal rows, Ordinal cols, Ordinal stride, Tpetra::CombineMode mode)
01002 {
01003   size_t offset = 0;
01004   size_t src_offset = 0;
01005 
01006   if (mode == ADD) {
01007     for(Ordinal col=0; col<cols; ++col) {
01008       for(Ordinal row=0; row<rows; ++row) {
01009         dest[offset++] += src[src_offset + row];
01010       }
01011       src_offset += stride;
01012     }
01013   }
01014   else {
01015     for(Ordinal col=0; col<cols; ++col) {
01016       for(Ordinal row=0; row<rows; ++row) {
01017         dest[offset++] = src[src_offset + row];
01018       }
01019       src_offset += stride;
01020     }
01021   }
01022 }
01023 
01024 //-------------------------------------------------------------------
01025 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
01026 void
01027 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::setGlobalBlockEntry(GlobalOrdinal globalBlockRow, GlobalOrdinal globalBlockCol, const Teuchos::SerialDenseMatrix<int,Scalar>& blockEntry)
01028 {
01029   //first get an ArrayRCP for the internal storage for this block-entry:
01030   Teuchos::ArrayRCP<Scalar> internalBlockEntry;
01031   LocalOrdinal blkRowSize = blockEntry.numRows();
01032   LocalOrdinal blkColSize = blockEntry.numCols();
01033   getGlobalBlockEntryViewNonConst(globalBlockRow,globalBlockCol, blkRowSize, blkColSize, internalBlockEntry);
01034 
01035   //now copy the incoming block-entry into internal storage:
01036   const Scalar* inputvalues = blockEntry.values();
01037   set_array_values<Teuchos::ArrayRCP<Scalar>, const Scalar*, LocalOrdinal >(
01038     internalBlockEntry, inputvalues, blkRowSize, blkColSize, blockEntry.stride(), REPLACE);
01039 }
01040 
01041 //-------------------------------------------------------------------
01042 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
01043 void
01044 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::setLocalBlockEntry(LocalOrdinal localBlockRow, LocalOrdinal localBlockCol, const Teuchos::SerialDenseMatrix<int,Scalar>& blockEntry)
01045 {
01046   //first get an ArrayRCP for the internal storage for this block-entry:
01047   Teuchos::ArrayRCP<Scalar> internalBlockEntry;
01048   LocalOrdinal blkRowSize = blockEntry.numRows();
01049   LocalOrdinal blkColSize = blockEntry.numCols();
01050   getLocalBlockEntryViewNonConst(localBlockRow,localBlockCol, blkRowSize, blkColSize, internalBlockEntry);
01051 
01052   //now copy the incoming block-entry into internal storage:
01053   const Scalar* inputvalues = blockEntry.values();
01054   set_array_values<Teuchos::ArrayRCP<Scalar>, const Scalar*, LocalOrdinal>(
01055 internalBlockEntry, inputvalues, blkRowSize, blkColSize, blockEntry.stride(), REPLACE);
01056 }
01057 
01058 //-------------------------------------------------------------------
01059 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
01060 void
01061 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::sumIntoGlobalBlockEntry(GlobalOrdinal globalBlockRow, GlobalOrdinal globalBlockCol, const Teuchos::SerialDenseMatrix<int,Scalar>& blockEntry)
01062 {
01063   //first get an ArrayRCP for the internal storage for this block-entry:
01064   Teuchos::ArrayRCP<Scalar> internalBlockEntry;
01065   LocalOrdinal blkRowSize = blockEntry.numRows();
01066   LocalOrdinal blkColSize = blockEntry.numCols();
01067   getGlobalBlockEntryViewNonConst(globalBlockRow,globalBlockCol, blkRowSize, blkColSize, internalBlockEntry);
01068 
01069   //now sum (add) the incoming block-entry into internal storage:
01070   const Scalar* inputvalues = blockEntry.values();
01071   set_array_values<Teuchos::ArrayRCP<Scalar>, const Scalar*, LocalOrdinal>(
01072     internalBlockEntry, inputvalues, blkRowSize, blkColSize, blockEntry.stride(), ADD);
01073 }
01074 
01075 //-------------------------------------------------------------------
01076 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
01077 void
01078 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::sumIntoLocalBlockEntry(LocalOrdinal localBlockRow, LocalOrdinal localBlockCol, const Teuchos::SerialDenseMatrix<int,Scalar>& blockEntry)
01079 {
01080   //first get an ArrayRCP for the internal storage for this block-entry:
01081   Teuchos::ArrayRCP<Scalar> internalBlockEntry;
01082   LocalOrdinal blkRowSize = blockEntry.numRows();
01083   LocalOrdinal blkColSize = blockEntry.numCols();
01084   getLocalBlockEntryViewNonConst(localBlockRow,localBlockCol, blkRowSize, blkColSize, internalBlockEntry);
01085 
01086   //now sum (add) the incoming block-entry into internal storage:
01087   const Scalar* inputvalues = blockEntry.values();
01088   set_array_values<Teuchos::ArrayRCP<Scalar>, const Scalar*, LocalOrdinal>(
01089     internalBlockEntry, inputvalues, blkRowSize, blkColSize, blockEntry.stride(), ADD);
01090 }
01091 
01092 //-------------------------------------------------------------------
01093 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
01094 void
01095 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::setGlobalBlockEntry(GlobalOrdinal globalBlockRow, GlobalOrdinal globalBlockCol, LocalOrdinal blkRowSize, LocalOrdinal blkColSize, LocalOrdinal LDA, const Teuchos::ArrayView<const Scalar>& blockEntry)
01096 {
01097   //first get an ArrayRCP for the internal storage for this block-entry:
01098   Teuchos::ArrayRCP<Scalar> internalBlockEntry;
01099   getGlobalBlockEntryViewNonConst(globalBlockRow,globalBlockCol, blkRowSize, blkColSize, internalBlockEntry);
01100 
01101   LocalOrdinal blk_size = blockEntry.size();
01102   TEUCHOS_TEST_FOR_EXCEPTION(blkColSize*LDA > blk_size, std::runtime_error,
01103     "Tpetra::VbrMatrix::setGlobalBlockEntry ERROR, inconsistent block-entry sizes.");
01104 
01105   //copy the incoming block-entry into internal storage:
01106   set_array_values(internalBlockEntry, blockEntry, blkRowSize, blkColSize, LDA, REPLACE);
01107 }
01108 
01109 //-------------------------------------------------------------------
01110 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
01111 void
01112 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::setLocalBlockEntry(LocalOrdinal localBlockRow, LocalOrdinal localBlockCol, LocalOrdinal blkRowSize, LocalOrdinal blkColSize, LocalOrdinal LDA, const Teuchos::ArrayView<const Scalar>& blockEntry)
01113 {
01114   //first get an ArrayRCP for the internal storage for this block-entry:
01115   Teuchos::ArrayRCP<Scalar> internalBlockEntry;
01116   getLocalBlockEntryViewNonConst(localBlockRow,localBlockCol, blkRowSize, blkColSize, internalBlockEntry);
01117 
01118   LocalOrdinal blk_size = blockEntry.size();
01119   TEUCHOS_TEST_FOR_EXCEPTION(blkColSize*LDA > blk_size, std::runtime_error,
01120     "Tpetra::VbrMatrix::setLocalBlockEntry ERROR, inconsistent block-entry sizes.");
01121 
01122   //copy the incoming block-entry into internal storage:
01123   set_array_values(internalBlockEntry, blockEntry, blkRowSize, blkColSize, LDA, REPLACE);
01124 }
01125 
01126 //-------------------------------------------------------------------
01127 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
01128 void
01129 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::sumIntoGlobalBlockEntry(GlobalOrdinal globalBlockRow, GlobalOrdinal globalBlockCol, LocalOrdinal blkRowSize, LocalOrdinal blkColSize, LocalOrdinal LDA, const Teuchos::ArrayView<const Scalar>& blockEntry)
01130 {
01131   //first get an ArrayRCP for the internal storage for this block-entry:
01132   Teuchos::ArrayRCP<Scalar> internalBlockEntry;
01133   getGlobalBlockEntryViewNonConst(globalBlockRow,globalBlockCol, blkRowSize, blkColSize, internalBlockEntry);
01134 
01135   LocalOrdinal blk_size = blockEntry.size();
01136   TEUCHOS_TEST_FOR_EXCEPTION(blkColSize*LDA > blk_size, std::runtime_error,
01137     "Tpetra::VbrMatrix::setGlobalBlockEntry ERROR, inconsistent block-entry sizes.");
01138 
01139   //copy the incoming block-entry into internal storage:
01140   set_array_values(internalBlockEntry, blockEntry, blkRowSize, blkColSize, LDA, ADD);
01141 }
01142 
01143 //-------------------------------------------------------------------
01144 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
01145 void
01146 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::sumIntoLocalBlockEntry(LocalOrdinal localBlockRow, LocalOrdinal localBlockCol, LocalOrdinal blkRowSize, LocalOrdinal blkColSize, LocalOrdinal LDA, const Teuchos::ArrayView<const Scalar>& blockEntry)
01147 {
01148   //first get an ArrayRCP for the internal storage for this block-entry:
01149   Teuchos::ArrayRCP<Scalar> internalBlockEntry;
01150   getLocalBlockEntryViewNonConst(localBlockRow,localBlockCol, blkRowSize, blkColSize, internalBlockEntry);
01151 
01152   LocalOrdinal blk_size = blockEntry.size();
01153   TEUCHOS_TEST_FOR_EXCEPTION(blkColSize*LDA > blk_size, std::runtime_error,
01154     "Tpetra::VbrMatrix::setLocalBlockEntry ERROR, inconsistent block-entry sizes.");
01155 
01156   //copy the incoming block-entry into internal storage:
01157   set_array_values(internalBlockEntry, blockEntry, blkRowSize, blkColSize, LDA, ADD);
01158 }
01159 
01160 //-------------------------------------------------------------------
01161 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
01162 void
01163 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::optimizeStorage()
01164 {
01165   typedef Teuchos::ArrayRCP<const size_t> Host_View;
01166   typedef Teuchos::ArrayRCP<const LocalOrdinal> Host_View_LO;
01167   typedef typename Teuchos::ArrayRCP<size_t>::size_type Tsize_t;
01168 
01169   if (is_storage_optimized_ == true) return;
01170 
01171   TEUCHOS_TEST_FOR_EXCEPTION(constBlkGraph_->isFillComplete() != true, std::runtime_error,
01172     "Tpetra::VbrMatrix::optimizeStorage ERROR, isFillComplete() is false, required to be true before optimizeStorage is called.");
01173 
01174   size_t num_block_nonzeros = constBlkGraph_->getNodeNumBlockEntries();
01175 
01176   const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> >& rowmap = constBlkGraph_->getBlockRowMap();
01177   const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> >& colmap = constBlkGraph_->getBlockColMap();
01178 
01179   const Teuchos::RCP<Node>& node = getBlockRowMap()->getPointMap()->getNode();
01180 
01181   //need to count the number of point-entries:
01182   size_t num_point_entries = 0;
01183   Host_View bptr = constBlkGraph_->getNodeRowOffsets();
01184 
01185   if (bptr.size() == 0) {
01186     is_storage_optimized_ = true;
01187     return;
01188   }
01189 
01190   Host_View_LO bindx = constBlkGraph_->getNodePackedIndices();
01191 
01192   for(Tsize_t r=0; r<bptr.size()-1; ++r) {
01193     size_t rbeg = bptr[r];
01194     size_t rend = bptr[r+1];
01195 
01196     LocalOrdinal rsize = rowmap->getLocalBlockSize(r);
01197 
01198     for(size_t j=rbeg; j<rend; ++j) {
01199       LocalOrdinal csize = colmap->getLocalBlockSize(bindx[j]);
01200       num_point_entries += rsize*csize;
01201     }
01202   }
01203 
01204   pbuf_indx_ = node->template allocBuffer<LocalOrdinal>(num_block_nonzeros+1);
01205   pbuf_values1D_ = node->template allocBuffer<Scalar>(num_point_entries);
01206 
01207   Teuchos::ArrayRCP<LocalOrdinal> view_indx = node->template viewBufferNonConst<LocalOrdinal>(KokkosClassic::WriteOnly, num_block_nonzeros+1, pbuf_indx_);
01208   Teuchos::ArrayRCP<Scalar> view_values1D = node->template viewBufferNonConst<Scalar>(KokkosClassic::WriteOnly, num_point_entries, pbuf_values1D_);
01209 
01210   bool have_2D_data = data_2D_->size() > 0;
01211   Scalar zero = Teuchos::ScalarTraits<Scalar>::zero();
01212 
01213   size_t ioffset = 0;
01214   size_t offset = 0;
01215   for(Tsize_t r=0; r<bptr.size()-1; ++r) {
01216     LocalOrdinal rsize = rowmap->getLocalBlockSize(r);
01217 
01218     RowGlobalCols* blk_row = have_2D_data ? &((*data_2D_)[r]) : NULL;
01219 
01220     for(size_t c=bptr[r]; c<bptr[r+1]; ++c) {
01221       view_indx[ioffset++] = offset;
01222 
01223       LocalOrdinal csize = colmap->getLocalBlockSize(bindx[c]);
01224       Tsize_t blkSize = rsize*csize;
01225 
01226       if (blk_row != NULL) {
01227         GlobalOrdinal global_col = colmap->getGlobalBlockID(bindx[c]);
01228         typename RowGlobalCols::iterator iter = blk_row->find(global_col);
01229         TEUCHOS_TEST_FOR_EXCEPTION(iter == blk_row->end(), std::runtime_error,
01230           "Tpetra::VbrMatrix::optimizeStorage ERROR, global_col not found in row.");
01231   
01232         Teuchos::ArrayRCP<Scalar> vals = iter->second;
01233         for(Tsize_t n=0; n<blkSize; ++n) {
01234           view_values1D[offset+n] = vals[n];
01235         }
01236       }
01237       else {
01238         for(Tsize_t n=0; n<blkSize; ++n) view_values1D[offset+n] = zero;
01239       }
01240       offset += blkSize;
01241     }
01242   }
01243   view_indx[ioffset] = offset;
01244 
01245   //Final step: release memory for the "2D" storage:
01246   data_2D_ = Teuchos::null;
01247 
01248   is_storage_optimized_ = true;
01249 }
01250 
01251 //-------------------------------------------------------------------
01252 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
01253 void VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::fillLocalMatrix()
01254 {
01255   //We insist that optimzeStorage has already been called.
01256   //We don't care whether this function (fillLocalMatrix()) is being
01257   //called for the first time or not.
01258   TEUCHOS_TEST_FOR_EXCEPTION(is_storage_optimized_ != true, std::runtime_error,
01259     "Tpetra::VbrMatrix::fillLocalMatrix ERROR, optimizeStorage is required to have already been called.");
01260 
01261   Teuchos::ArrayRCP<const size_t      > nodeRowOffsets = constBlkGraph_->getNodeRowOffsets();
01262   Teuchos::ArrayRCP<const LocalOrdinal> nodePackedInds = constBlkGraph_->getNodePackedIndices();
01263   if (Node::isHostNode == false) {
01264     RCP<Node> node = getRangeMap()->getNode();
01265     //
01266     Teuchos::ArrayRCP<size_t> dev_nodeRowOffsets = node->template allocBuffer<size_t>(nodeRowOffsets.size());
01267     node->copyToBuffer(nodeRowOffsets.size(), nodeRowOffsets(), dev_nodeRowOffsets);
01268     nodeRowOffsets = dev_nodeRowOffsets;
01269     //
01270     Teuchos::ArrayRCP<LocalOrdinal> dev_nodePackedInds = node->template allocBuffer<LocalOrdinal>(nodePackedInds.size());
01271     node->copyToBuffer(nodePackedInds.size(), nodePackedInds(), dev_nodePackedInds);
01272     nodePackedInds = dev_nodePackedInds;
01273   }
01274   lclMatrix_->setPackedValues (pbuf_values1D_,
01275              getBlockRowMap()->getNodeFirstPointInBlocks_Device(),
01276              getBlockColMap()->getNodeFirstPointInBlocks_Device(),
01277              nodeRowOffsets,
01278              nodePackedInds,
01279              pbuf_indx_);
01280 }
01281 
01282 //-------------------------------------------------------------------
01283 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
01284 void VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::fillLocalMatVec()
01285 {
01286   //We insist that optimzeStorage has already been called.
01287   //We don't care whether this function (fillLocalMatVec()) is being
01288   //called for the first time or not.
01289   TEUCHOS_TEST_FOR_EXCEPTION(is_storage_optimized_ != true, std::runtime_error,
01290     "Tpetra::VbrMatrix::fillLocalMatrix ERROR, optimizeStorage is required to have already been called.");
01291 
01292   lclMatOps_.initializeValues (*lclMatrix_);
01293 }
01294 
01295 //-------------------------------------------------------------------
01296 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
01297 void
01298 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::createImporterExporter()
01299 {
01300   typedef typename Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > PtMap;
01301 
01302   const PtMap& ptDomMap = (constBlkGraph_->getBlockDomainMap())->getPointMap();
01303   const PtMap& ptRngMap = (constBlkGraph_->getBlockRangeMap())->getPointMap();
01304   const PtMap& ptRowMap = (constBlkGraph_->getBlockRowMap())->getPointMap();
01305   const PtMap& ptColMap = (constBlkGraph_->getBlockColMap())->getPointMap();
01306 
01307   if (!ptDomMap->isSameAs(*ptColMap)) {
01308     importer_ = Teuchos::rcp(new Tpetra::Import<LocalOrdinal,GlobalOrdinal,Node>(ptDomMap, ptColMap));
01309   }
01310   if (!ptRngMap->isSameAs(*ptRowMap)) {
01311     exporter_ = Teuchos::rcp(new Tpetra::Export<LocalOrdinal,GlobalOrdinal,Node>(ptRowMap, ptRngMap));
01312   }
01313 }
01314 
01315 //-------------------------------------------------------------------
01316 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
01317 void
01318 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::fillComplete(const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> >& blockDomainMap, const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> >& blockRangeMap)
01319 {
01320   if (isFillComplete()) return;
01321 
01322   globalAssemble();
01323 
01324   blkGraph_->fillComplete(blockDomainMap, blockRangeMap);
01325 
01326   createImporterExporter();
01327 
01328   is_fill_completed_ = true;
01329 
01330   optimizeStorage();
01331 
01332   fillLocalMatrix();
01333   fillLocalMatVec();
01334 }
01335 
01336 //-------------------------------------------------------------------
01337 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
01338 void VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::fillComplete()
01339 {
01340   //In this case, our block-row-map will also be our domain-map and
01341   //range-map.
01342 
01343   fillComplete(getBlockRowMap(), getBlockRowMap());
01344 }
01345 
01346 //-------------------------------------------------------------------
01347 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
01348 void VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::globalAssemble()
01349 {
01350   if (getPointRowMap()->getComm()->getSize() == 1) {
01351     return;
01352   }
01353 
01354   //nonlocal_data_ contains data that belongs on other processors.
01355   //We'll refer to this as overlapping data, and create an 'overlapMap'
01356   //that describes the layout of this overlapping data.
01357   Teuchos::RCP<const Tpetra::BlockMap<LocalOrdinal,GlobalOrdinal,Node> >
01358      overlapMap = VbrUtils::createOverlapMap(nonlocal_data_, *getBlockRowMap());
01359 
01360   if (overlapMap->getGlobalNumBlocks() == 0) {
01361     return;
01362   }
01363 
01364   //VbrDataDist is a wrapper that makes our overlapping data behave
01365   //like a DistObject so that the overlapping data can be exported
01366   //to the owning processors in a standard way.
01367   typedef VbrUtils::VbrDataDist<LocalOrdinal, GlobalOrdinal, Scalar, Node> VDD;
01368   VDD vdd (nonlocal_data_, *overlapMap);
01369 
01370   //Create an exporter where the source map is our overlapMap and the
01371   //target map is the rowmap of our VbrMatrix.
01372   typedef Import<LocalOrdinal,GlobalOrdinal,Node> import_type;
01373   import_type importer (vdd.getMap (), convertBlockMapToPointMap (*getBlockRowMap ()));
01374 
01375   //Communicate the overlapping data to the owning processors and add it
01376   //into this VbrMatrix.
01377   this->doImport (vdd, importer, Tpetra::ADD);
01378 
01379   //Zero out the overlapping data so it can be re-populated and re-assembled
01380   //in future calls to globalAssemble.
01381   nonlocal_data_.zeroEntries ();
01382 }
01383 
01384 //-------------------------------------------------------------------
01385 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
01386 std::string
01387 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::description() const
01388 {
01389   std::ostringstream oss;
01390   oss << Teuchos::Describable::description();
01391   if (isFillComplete()) {
01392     oss << "{status = fill complete, global num block rows = " 
01393   << getBlockRowMap()->getGlobalNumBlocks() << "}";
01394   }
01395   else {
01396     oss << "{status = fill not complete, global num block rows = " 
01397   << getBlockRowMap()->getGlobalNumBlocks() << "}";
01398   }
01399   return oss.str();
01400 }
01401 
01402 //-------------------------------------------------------------------
01403 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
01404 void
01405 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel) const
01406 {
01407   Teuchos::EVerbosityLevel vl = verbLevel;
01408   if (vl == Teuchos::VERB_DEFAULT) vl = Teuchos::VERB_LOW;
01409 
01410   if (vl == Teuchos::VERB_NONE) return;
01411 
01412   Teuchos::RCP<const Teuchos::Comm<int> > comm = getBlockRowMap()->getPointMap()->getComm();
01413   const int myProc = comm->getRank();
01414 
01415   if (myProc == 0) out << "VbrMatrix::describe is under construction..." << std::endl;
01416 }
01417 
01418 }//namespace Tpetra
01419 
01420 //
01421 // Explicit instantiation macro
01422 //
01423 // Must be expanded from within the Tpetra namespace!
01424 //
01425 
01426 #define TPETRA_VBRMATRIX_INSTANT(SCALAR,LO,GO,NODE) \
01427   \
01428   template class VbrMatrix< SCALAR , LO , GO , NODE >;
01429 
01430 #endif //TPETRA_VBRMATRIX_DEF_HPP
01431 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines