Tpetra Matrix/Vector Services Version of the Day
Tpetra_VbrMatrix_def.hpp
00001 // @HEADER
00002 // ***********************************************************************
00003 //
00004 //          Tpetra: Templated Linear Algebra Services Package
00005 //                 Copyright (2008) Sandia Corporation
00006 //
00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00008 // the U.S. Government retains certain rights in this software.
00009 //
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00038 //
00039 // ************************************************************************
00040 // @HEADER
00041 
00042 #ifndef TPETRA_VBRMATRIX_DEF_HPP
00043 #define TPETRA_VBRMATRIX_DEF_HPP
00044 
00045 #include <Tpetra_BlockMap.hpp>
00046 #include <Tpetra_BlockCrsGraph.hpp>
00047 #include <Tpetra_DistObject.hpp>
00048 #include <Tpetra_Vector.hpp>
00049 #include <Kokkos_NodeHelpers.hpp>
00050 #include <Kokkos_VbrMatrix.hpp>
00051 #include <Teuchos_SerialDenseMatrix.hpp>
00052 #include <algorithm>
00053 #include <sstream>
00054 
00055 #ifdef DOXYGEN_USE_ONLY
00056 #include "Tpetra_VbrMatrix_decl.hpp"
00057 #endif
00058 
00059 namespace Tpetra {
00060 
00061 template<class Scalar,class Node>
00062 void fill_device_ArrayRCP(Teuchos::RCP<Node>& node, Teuchos::ArrayRCP<Scalar>& ptr, Scalar value)
00063 {
00064   KokkosClassic::ReadyBufferHelper<Node> rbh(node);
00065   KokkosClassic::InitOp<Scalar> wdp;
00066   wdp.alpha = value;
00067   rbh.begin();
00068   wdp.x = rbh.addNonConstBuffer(ptr);
00069   rbh.end();
00070   node->template parallel_for<KokkosClassic::InitOp<Scalar> >(0, ptr.size(), wdp);
00071 }
00072 
00073 //-------------------------------------------------------------------
00074 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00075 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::VbrMatrix(const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> > &blkRowMap, size_t maxNumEntriesPerRow, ProfileType pftype)
00076  : DistObject<char, LocalOrdinal, GlobalOrdinal, Node>(convertBlockMapToPointMap(*blkRowMap)),
00077    blkGraph_(Teuchos::rcp(new BlockCrsGraph<LocalOrdinal,GlobalOrdinal,Node>(blkRowMap, maxNumEntriesPerRow, pftype))),
00078    constBlkGraph_(blkGraph_),
00079    lclMatrix_ (Teuchos::rcp (new local_matrix_type (blkRowMap->getNodeNumBlocks (),
00080                                                     blkRowMap->getPointMap()->getNode ()))),
00081    pbuf_values1D_(),
00082    pbuf_indx_(),
00083    lclMatOps_(blkRowMap->getPointMap()->getNode()),
00084    importer_(),
00085    exporter_(),
00086    importedVec_(),
00087    exportedVec_(),
00088    data_2D_(Teuchos::rcp(new Teuchos::Array<RowGlobalCols>)),
00089    nonlocal_data_(),
00090    is_fill_completed_(false),
00091    is_storage_optimized_(false)
00092 {
00093   //The graph of this VBR matrix is a BlockCrsGraph, which is a CrsGraph where
00094   //each entry in the graph corresponds to a block-entry in the matrix.
00095   //That is, you can think of a VBR matrix as a Crs matrix of dense
00096   //submatrices...
00097 }
00098 
00099 //-------------------------------------------------------------------
00100 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00101 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::VbrMatrix(const Teuchos::RCP<const BlockCrsGraph<LocalOrdinal,GlobalOrdinal,Node> > &blkGraph)
00102  : DistObject<char, LocalOrdinal, GlobalOrdinal, Node>(convertBlockMapToPointMap(*blkGraph->getBlockRowMap())),
00103    blkGraph_(),
00104    constBlkGraph_(blkGraph),
00105    lclMatrix_ (Teuchos::rcp (new local_matrix_type (blkGraph->getBlockRowMap ()->getNodeNumBlocks (),
00106                                                     blkGraph->getBlockRowMap ()->getPointMap ()->getNode ()))),
00107    pbuf_values1D_(),
00108    pbuf_indx_(),
00109    lclMatOps_(blkGraph->getBlockRowMap()->getPointMap()->getNode()),
00110    importer_(),
00111    exporter_(),
00112    importedVec_(),
00113    exportedVec_(),
00114    data_2D_(Teuchos::rcp(new Teuchos::Array<RowGlobalCols>)),
00115    nonlocal_data_(),
00116    is_fill_completed_(false),
00117    is_storage_optimized_(false)
00118 {
00119   //The graph of this VBR matrix is a BlockCrsGraph, which is a CrsGraph where
00120   //each entry in the graph corresponds to a block-entry in the matrix.
00121   //That is, you can think of a VBR matrix as a Crs matrix of dense
00122   //submatrices...
00123 
00124   TEUCHOS_TEST_FOR_EXCEPTION(blkGraph->isFillComplete() == false, std::runtime_error,
00125    "Tpetra::VbrMatrix::VbrMatrix(BlockCrsGraph) ERROR, this constructor requires graph.isFillComplete()==true.");
00126 
00127   createImporterExporter();
00128 
00129   is_fill_completed_ = true;
00130 
00131   optimizeStorage();
00132 
00133   fillLocalMatrix();
00134   fillLocalMatVec();
00135 }
00136 
00137 //-------------------------------------------------------------------
00138 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00139 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::~VbrMatrix()
00140 {
00141 }
00142 
00143 //-------------------------------------------------------------------
00144 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00145 Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> >
00146 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getDomainMap() const
00147 {
00148   return getBlockDomainMap()->getPointMap();
00149 }
00150 
00151 //-------------------------------------------------------------------
00152 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00153 Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> >
00154 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getRangeMap() const
00155 {
00156   return getBlockRangeMap()->getPointMap();
00157 }
00158 
00159 //-------------------------------------------------------------------
00160 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00161 bool
00162 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::isFillComplete() const
00163 {
00164   return is_fill_completed_;
00165 }
00166 
00167 //-------------------------------------------------------------------
00168 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00169 template <class DomainScalar, class RangeScalar>
00170 void
00171 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::
00172 multiply (const MultiVector<DomainScalar,LocalOrdinal,GlobalOrdinal,Node>& X,
00173           MultiVector<RangeScalar,LocalOrdinal,GlobalOrdinal,Node>& Y,
00174           Teuchos::ETransp trans,
00175           RangeScalar alpha,
00176           RangeScalar beta) const
00177 {
00178   TEUCHOS_TEST_FOR_EXCEPTION(
00179     ! isFillComplete (), std::runtime_error,
00180     "Tpetra::VbrMatrix::multiply: This method may only be called after "
00181     "fillComplete has been called.");
00182 
00183   KokkosClassic::MultiVector<Scalar,Node> lclX = X.getLocalMV ();
00184   KokkosClassic::MultiVector<Scalar,Node> lclY = Y.getLocalMV ();
00185 
00186   lclMatOps_.template multiply<Scalar,Scalar> (trans, alpha, lclX, beta, lclY);
00187 }
00188 
00189 //-------------------------------------------------------------------
00190 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00191 template<class DomainScalar, class RangeScalar>
00192 void
00193 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::
00194 solve (const MultiVector<RangeScalar,LocalOrdinal,GlobalOrdinal,Node>& Y,
00195        MultiVector<DomainScalar,LocalOrdinal,GlobalOrdinal,Node>& X,
00196        Teuchos::ETransp trans) const
00197 {
00198   TEUCHOS_TEST_FOR_EXCEPTION(X.isConstantStride() == false || Y.isConstantStride() == false, std::runtime_error,
00199         "Tpetra::VbrMatrix::solve(X,Y): X and Y must be constant stride.");
00200 
00201   TEUCHOS_TEST_FOR_EXCEPTION(!isFillComplete(), std::runtime_error,
00202     "Tpetra::VbrMatrix::solve ERROR, solve may only be called after fillComplete has been called.");
00203 
00204   TEUCHOS_TEST_FOR_EXCEPTION(constBlkGraph_->isUpperTriangular()==false && constBlkGraph_->isLowerTriangular()==false, std::runtime_error,
00205     "Tpetra::VbrMatrix::solve ERROR, matrix must be either upper or lower triangular.");
00206 
00207   KokkosClassic::MultiVector<RangeScalar,Node> lclY = Y.getLocalMV ();
00208   KokkosClassic::MultiVector<DomainScalar,Node> lclX = X.getLocalMV ();
00209 
00210   const Teuchos::EUplo triang =
00211     constBlkGraph_->isUpperTriangular () ?
00212     Teuchos::UPPER_TRI :
00213     Teuchos::LOWER_TRI;
00214   const Teuchos::EDiag diag =
00215     (constBlkGraph_->getNodeNumBlockDiags () < constBlkGraph_->getNodeNumBlockRows ()) ?
00216     Teuchos::UNIT_DIAG :
00217     Teuchos::NON_UNIT_DIAG;
00218 
00219   lclMatOps_.template solve<DomainScalar,RangeScalar> (trans, triang, diag, lclY, lclX);
00220 }
00221 
00222 //-------------------------------------------------------------------
00223 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00224 void
00225 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::updateImport(const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& X) const
00226 {
00227   if (importer_ != Teuchos::null) {
00228     if (importedVec_ == Teuchos::null || importedVec_->getNumVectors() != X.getNumVectors()) {
00229       importedVec_ = Teuchos::rcp(new MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>(getBlockColMap()->getPointMap(), X.getNumVectors()));
00230     }
00231 
00232     importedVec_->doImport(X, *importer_, REPLACE);
00233   }
00234 }
00235 
00236 //-------------------------------------------------------------------
00237 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00238 void
00239 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::updateExport(const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& Y) const
00240 {
00241   if (exporter_ != Teuchos::null) {
00242     if (exportedVec_ == Teuchos::null || exportedVec_->getNumVectors() != Y.getNumVectors()) {
00243       exportedVec_ = Teuchos::rcp(new MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>(getBlockColMap()->getPointMap(), Y.getNumVectors()));
00244     }
00245 
00246     exportedVec_->doExport(Y, *exporter_, REPLACE);
00247   }
00248 }
00249 
00250 //-------------------------------------------------------------------
00251 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00252 void
00253 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::apply(
00254          const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &X,
00255                MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &Y,
00256                Teuchos::ETransp trans,
00257                Scalar alpha,
00258                Scalar beta) const
00259 {
00260   if (trans == Teuchos::NO_TRANS) {
00261     updateImport(X);
00262     const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& Xref = importedVec_ == Teuchos::null ? X : *importedVec_;
00263     this->template multiply<Scalar,Scalar>(Xref, Y, trans, alpha, beta);
00264   }
00265   else if (trans == Teuchos::TRANS || trans == Teuchos::CONJ_TRANS) {
00266     updateImport(Y);
00267     MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& Yref = importedVec_ == Teuchos::null ? Y : *importedVec_;
00268     this->template multiply<Scalar,Scalar>(X, Yref, trans, alpha, beta);
00269     if (importedVec_ != Teuchos::null) {
00270       Y.doExport(*importedVec_, *importer_, ADD);
00271     }
00272   }
00273 }
00274 
00275 //-------------------------------------------------------------------
00276 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00277 void
00278 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::applyInverse(
00279          const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &Y,
00280                MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &X,
00281                Teuchos::ETransp trans) const
00282 {
00283   if (trans == Teuchos::NO_TRANS) {
00284     updateImport(Y);
00285     MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& Xref = importedVec_ == Teuchos::null ? X : *importedVec_;
00286     this->template solve<Scalar,Scalar>(Y, Xref, trans);
00287   }
00288   else if (trans == Teuchos::TRANS || trans == Teuchos::CONJ_TRANS) {
00289     updateImport(Y);
00290     const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& Yref = importedVec_ == Teuchos::null ? Y : *importedVec_;
00291     this->template solve<Scalar,Scalar>(Yref, X, trans);
00292     if (importedVec_ != Teuchos::null) {
00293       X.doExport(*importedVec_, *importer_, ADD);
00294     }
00295   }
00296 }
00297 
00298 //-------------------------------------------------------------------
00299 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00300 bool
00301 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::hasTransposeApply() const
00302 {
00303   return true;
00304 }
00305 
00306 //-------------------------------------------------------------------
00307 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00308 Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> >
00309 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getBlockRowMap() const
00310 {
00311   return constBlkGraph_->getBlockRowMap();
00312 }
00313 
00314 //-------------------------------------------------------------------
00315 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00316 Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> >
00317 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getPointRowMap() const
00318 {
00319   return constBlkGraph_->getBlockRowMap()->getPointMap();
00320 }
00321 
00322 //-------------------------------------------------------------------
00323 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00324 Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> >
00325 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getBlockColMap() const
00326 {
00327   return constBlkGraph_->getBlockColMap();
00328 }
00329 
00330 //-------------------------------------------------------------------
00331 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00332 Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> >
00333 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getBlockDomainMap() const
00334 {
00335   return constBlkGraph_->getBlockDomainMap();
00336 }
00337 
00338 //-------------------------------------------------------------------
00339 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00340 Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> >
00341 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getBlockRangeMap() const
00342 {
00343   return constBlkGraph_->getBlockRangeMap();
00344 }
00345 
00346 //-------------------------------------------------------------------
00347 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00348 Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> >
00349 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getPointColMap() const
00350 {
00351   return constBlkGraph_->getBlockColMap()->getPointMap();
00352 }
00353 
00354 //-------------------------------------------------------------------
00355 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00356 void
00357 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::
00358 getGlobalBlockRowView (GlobalOrdinal globalBlockRow,
00359                        LocalOrdinal& numPtRows,
00360                        Teuchos::ArrayView<const GlobalOrdinal>& blockCols,
00361                        Teuchos::Array<LocalOrdinal>& ptColsPerBlockCol,
00362                        Teuchos::Array<Teuchos::ArrayRCP<const Scalar> >& blockEntries) const
00363 {
00364   TEUCHOS_TEST_FOR_EXCEPTION(
00365     isFillComplete(), std::runtime_error,
00366     "Tpetra::VbrMatrix::getGlobalBlockRowView internal ERROR, "
00367     "isFillComplete() is required to be false.");
00368 
00369   typedef typename Teuchos::ArrayView<const GlobalOrdinal>::size_type Tsize_t;
00370 
00371   LocalOrdinal localRow = getBlockRowMap()->getLocalBlockID(globalBlockRow);
00372   numPtRows = getBlockRowMap()->getLocalBlockSize(localRow);
00373   constBlkGraph_->getGlobalBlockRowView(globalBlockRow, blockCols);
00374   ptColsPerBlockCol.resize(blockCols.size());
00375   blockEntries.resize(blockCols.size());
00376   for (Tsize_t i = 0; i < blockCols.size (); ++i) {
00377     getGlobalBlockEntryView (globalBlockRow, blockCols[i],
00378                              numPtRows, ptColsPerBlockCol[i], blockEntries[i]);
00379   }
00380 }
00381 
00382 //-------------------------------------------------------------------
00383 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00384 void
00385 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getLocalBlockRowView(
00386          LocalOrdinal localBlockRow,
00387          LocalOrdinal& numPtRows,
00388          Teuchos::ArrayView<const LocalOrdinal>& blockCols,
00389          Teuchos::Array<LocalOrdinal>& ptColsPerBlockCol,
00390          Teuchos::ArrayRCP<const Scalar>& blockEntries) const
00391 {
00392   TEUCHOS_TEST_FOR_EXCEPTION(!isFillComplete(), std::runtime_error,
00393       "Tpetra::VbrMatrix::getGlobalBlockRowView internal ERROR, isFillComplete() is required to be true.");
00394 
00395   typedef typename Teuchos::ArrayView<const GlobalOrdinal>::size_type Tsize_t;
00396   typedef Teuchos::ArrayRCP<const size_t>       Host_View;
00397   typedef Teuchos::ArrayRCP<const LocalOrdinal> Host_View_LO;
00398 
00399   numPtRows = getBlockRowMap()->getLocalBlockSize(localBlockRow);
00400   constBlkGraph_->getLocalBlockRowView(localBlockRow, blockCols);
00401   ptColsPerBlockCol.resize(blockCols.size());
00402   LocalOrdinal num_scalars_in_block_row = 0;
00403   for(Tsize_t i=0; i<blockCols.size(); ++i) {
00404     ptColsPerBlockCol[i] = getBlockColMap()->getLocalBlockSize(blockCols[i]);
00405     num_scalars_in_block_row += numPtRows*ptColsPerBlockCol[i];
00406   }
00407 
00408   Teuchos::RCP<Node> node = getNode();
00409   Host_View bpr = constBlkGraph_->getNodeRowOffsets();
00410   const size_t bindx_offset = bpr[localBlockRow];
00411   Host_View_LO indx = node->template viewBuffer<LocalOrdinal>(1, pbuf_indx_+bindx_offset);
00412   const LocalOrdinal offset = indx[0];
00413   blockEntries = node->template viewBuffer<Scalar>(num_scalars_in_block_row, pbuf_values1D_+offset);
00414 }
00415 
00416 //-------------------------------------------------------------------
00417 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00418 void
00419 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getGlobalBlockEntryViewNonConst(
00420     GlobalOrdinal globalBlockRow,
00421     GlobalOrdinal globalBlockCol,
00422     LocalOrdinal& numPtRows,
00423     LocalOrdinal& numPtCols,
00424     Teuchos::ArrayRCP<Scalar>& blockEntry)
00425 {
00426   //Return a non-const, read-write view of a block-entry (as an ArrayRCP),
00427   //creating/allocating the block-entry if it doesn't already exist, (but only
00428   //if fillComplete hasn't been called yet, and if the arguments numPtRows
00429   //and numPtCols have been set on input).
00430 
00431   LocalOrdinal localBlockRow = getBlockRowMap()->getLocalBlockID(globalBlockRow);
00432 
00433   if (localBlockRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid()) {
00434     VbrUtils::getGlobalBlockEntryViewNonConst(nonlocal_data_,
00435                    globalBlockRow, globalBlockCol,
00436                    numPtRows, numPtCols, blockEntry);
00437     return;
00438   }
00439 
00440   if (is_storage_optimized_) {
00441     TEUCHOS_TEST_FOR_EXCEPTION(!isFillComplete(), std::runtime_error,
00442       "Tpetra::VbrMatrix::getGlobalBlockEntryViewNonConst internal ERROR, storage is optimized but isFillComplete() is false.");
00443 
00444     LocalOrdinal localBlockCol = getBlockColMap()->getLocalBlockID(globalBlockCol);
00445 
00446     getLocalBlockEntryViewNonConst(localBlockRow, localBlockCol,
00447                                    numPtRows, numPtCols, blockEntry);
00448     return;
00449   }
00450 
00451   //If we get to here, fillComplete hasn't been called yet, and the matrix data
00452   //is stored in un-packed '2D' form.
00453 
00454   if (data_2D_->size() == 0) {
00455     LocalOrdinal numBlockRows = constBlkGraph_->getNodeNumBlockRows();
00456     data_2D_->resize(numBlockRows);
00457   }
00458 
00459   RowGlobalCols& blkrow = (*data_2D_)[localBlockRow];
00460 
00461   typename RowGlobalCols::iterator col_iter = blkrow.find(globalBlockCol);
00462 
00463   if (col_iter != blkrow.end()) {
00464     blockEntry = col_iter->second;
00465   }
00466   else {
00467     //blockEntry doesn't already exist, so we will create it.
00468 
00469     //make sure block-size is specified:
00470     TEUCHOS_TEST_FOR_EXCEPTION(numPtRows==0 || numPtCols==0, std::runtime_error,
00471       "Tpetra::VbrMatrix::getGlobalBlockEntryViewNonConst ERROR: creating block-entry, but numPtRows and/or numPtCols is 0.");
00472 
00473     Teuchos::RCP<Node> node = getNode();
00474     size_t blockSize = numPtRows*numPtCols;
00475     blockEntry = Teuchos::arcp(new Scalar[blockSize], 0, blockSize);
00476     std::fill(blockEntry.begin(), blockEntry.end(), 0);
00477     blkrow.insert(std::make_pair(globalBlockCol, blockEntry));
00478     blkGraph_->insertGlobalIndices(globalBlockRow, Teuchos::ArrayView<GlobalOrdinal>(&globalBlockCol, 1));
00479   }
00480 }
00481 
00482 //-------------------------------------------------------------------
00483 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00484 void
00485 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getGlobalBlockEntryView(
00486       GlobalOrdinal globalBlockRow,
00487       GlobalOrdinal globalBlockCol,
00488       LocalOrdinal& numPtRows,
00489       LocalOrdinal& numPtCols,
00490       Teuchos::ArrayRCP<const Scalar>& blockEntry) const
00491 {
00492   //This method returns a const-view of a block-entry (as an ArrayRCP).
00493   //Throws an exception if the block-entry doesn't already exist.
00494 
00495   if (is_storage_optimized_) {
00496     TEUCHOS_TEST_FOR_EXCEPTION(!isFillComplete(), std::runtime_error,
00497       "Tpetra::VbrMatrix::getGlobalBlockEntryView internal ERROR, storage is optimized but isFillComplete() is false.");
00498 
00499     LocalOrdinal localBlockRow = getBlockRowMap()->getLocalBlockID(globalBlockRow);
00500     LocalOrdinal localBlockCol = getBlockColMap()->getLocalBlockID(globalBlockCol);
00501     getLocalBlockEntryView(localBlockRow, localBlockCol, numPtRows, numPtCols, blockEntry);
00502     return;
00503   }
00504 
00505   TEUCHOS_TEST_FOR_EXCEPTION(data_2D_->size() == 0, std::runtime_error,
00506     "Tpetra::VbrMatrix::getGlobalBlockEntryView ERROR, matrix storage not yet allocated, can't return a const view.");
00507 
00508   //this acts as a range-check for globalBlockRow:
00509   LocalOrdinal localBlockRow = getBlockRowMap()->getLocalBlockID(globalBlockRow);
00510   TEUCHOS_TEST_FOR_EXCEPTION( localBlockRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid(),
00511      std::runtime_error,
00512      "Tpetra::VbrMatrix::getGlobalBlockEntryView, globalBlockRow not on the local processor.");
00513 
00514   RowGlobalCols& blkrow = (*data_2D_)[localBlockRow];
00515   typename RowGlobalCols::iterator col_iter = blkrow.find(globalBlockCol);
00516 
00517   if (col_iter == blkrow.end()) {
00518     throw std::runtime_error("Tpetra::VbrMatrix::getGlobalBlockEntryView ERROR, specified block-entry doesn't exist.");
00519   }
00520 
00521   numPtRows = getBlockRowMap()->getLocalBlockSize(localBlockRow);
00522 
00523   TEUCHOS_TEST_FOR_EXCEPTION(numPtRows == 0, std::runtime_error,
00524     "Tpetra::VbrMatrix::getGlobalBlockEntryView ERROR, numPtRows == 0.");
00525 
00526   blockEntry = col_iter->second;
00527   numPtCols = blockEntry.size() / numPtRows;
00528 }
00529 
00530 //-------------------------------------------------------------------
00531 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00532 void
00533 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getLocalBlockEntryViewNonConst(
00534     LocalOrdinal localBlockRow,
00535     LocalOrdinal localBlockCol,
00536     LocalOrdinal& numPtRows,
00537     LocalOrdinal& numPtCols,
00538     Teuchos::ArrayRCP<Scalar>& blockEntry)
00539 {
00540   typedef Teuchos::ArrayRCP<const LocalOrdinal> Host_View_LO;
00541   typedef Teuchos::ArrayRCP<const size_t> Host_View;
00542   typedef typename Host_View_LO::iterator ITER;
00543   //This method returns a non-constant view of a block-entry (as an ArrayRCP).
00544 
00545   TEUCHOS_TEST_FOR_EXCEPTION(isFillComplete() == false, std::runtime_error,
00546    "Tpetra::VbrMatrix::getLocalBlockEntryViewNonConst ERROR, this method can only be called after fillComplete() has been called.");
00547 
00548   TEUCHOS_TEST_FOR_EXCEPTION(is_storage_optimized_ == false, std::runtime_error,
00549    "Tpetra::VbrMatrix::getLocalBlockEntryViewNonConst ERROR, this method can only be called if storage is optimized.");
00550 
00551   Teuchos::RCP<Node> node = getNode();
00552 
00553   Host_View bptr = constBlkGraph_->getNodeRowOffsets();
00554   LocalOrdinal bindx_offset = bptr[localBlockRow];
00555   LocalOrdinal length = bptr[localBlockRow+1] - bindx_offset;
00556 
00557   TEUCHOS_TEST_FOR_EXCEPTION( length < 1, std::runtime_error,
00558     "Tpetra::VbrMatrix::getLocalBlockEntryViewNonConst ERROR, specified localBlockCol not found in localBlockRow.");
00559 
00560   Host_View_LO bindx = constBlkGraph_->getNodePackedIndices();
00561   ITER bindx_beg = bindx.begin() + bindx_offset,
00562        bindx_end = bindx_beg + length;
00563   ITER it = std::lower_bound(bindx_beg, bindx_end, localBlockCol);
00564 
00565   TEUCHOS_TEST_FOR_EXCEPTION(it == bindx_end || *it != localBlockCol, std::runtime_error,
00566     "Tpetra::VbrMatrix::getLocalBlockEntryViewNonConst ERROR, specified localBlockCol not found.");
00567 
00568   numPtRows = getBlockRowMap()->getLocalBlockSize(localBlockRow);
00569   numPtCols = getBlockColMap()->getLocalBlockSize(localBlockCol);
00570 
00571   const LocalOrdinal blkSize = numPtRows*numPtCols;
00572   Host_View_LO indx = node->template viewBuffer<LocalOrdinal>(1,pbuf_indx_+bptr[localBlockRow]+(it-bindx_beg));
00573   const LocalOrdinal offset = indx[0];
00574   blockEntry = node->template viewBufferNonConst<Scalar>(KokkosClassic::ReadWrite, blkSize, pbuf_values1D_ + offset);
00575 }
00576 
00577 //-------------------------------------------------------------------
00578 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00579 void
00580 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getLocalDiagCopy(
00581   Tpetra::Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& diag) const
00582 {
00583   const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> >& rowmap = getBlockRowMap();
00584   TEUCHOS_TEST_FOR_EXCEPTION(diag.getMap()->isSameAs(*(rowmap->getPointMap())) != true,
00585     std::runtime_error, "Tpetra::VbrMatrix::getLocalDiagCopy ERROR, vector must be distributed the same as this matrix' row-map.");
00586 
00587   Teuchos::ArrayRCP<Scalar> diag_view = diag.get1dViewNonConst();
00588   Teuchos::ArrayView<const GlobalOrdinal> blockIDs = rowmap->getNodeBlockIDs();
00589   size_t offset = 0;
00590   typedef typename Teuchos::ArrayView<const GlobalOrdinal>::size_type Tsize_t;
00591   for(Tsize_t i=0; i<blockIDs.size(); ++i) {
00592     LocalOrdinal localBlockID = rowmap->getLocalBlockID(blockIDs[i]);
00593     LocalOrdinal blockSize = rowmap->getLocalBlockSize(localBlockID);
00594     Teuchos::ArrayRCP<const Scalar> blockEntry;
00595     getGlobalBlockEntryView(blockIDs[i], blockIDs[i], blockSize, blockSize, blockEntry);
00596 
00597     for(LocalOrdinal j=0; j<blockSize; ++j) {
00598       diag_view[offset++] = blockEntry[j*blockSize+j];
00599     }
00600   }
00601 }
00602 
00603 //-------------------------------------------------------------------
00604 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00605 bool
00606 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::
00607 checkSizes (const SrcDistObject& source)
00608 {
00609   typedef VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps> this_type;
00610   const this_type* srcVbrMat = dynamic_cast<const this_type*> (&source);
00611 
00612   if (srcVbrMat == NULL) {
00613     typedef VbrUtils::VbrDataDist<LocalOrdinal,GlobalOrdinal,Scalar,Node> VDD;
00614     const VDD* vdd = const_cast<VDD*> (dynamic_cast<const VDD*> (&source));
00615     if (vdd == NULL) {
00616       return false;
00617     } else {
00618       return (this->getMap ()->getMinAllGlobalIndex () <=
00619               vdd->getMap ()->getMinAllGlobalIndex ()) &&
00620         (this->getMap ()->getMaxAllGlobalIndex () >=
00621          vdd->getMap ()->getMaxAllGlobalIndex ());
00622     }
00623   } else {
00624     return (this->getMap ()->getMinAllGlobalIndex () <=
00625             srcVbrMat->getMap ()->getMinAllGlobalIndex ()) &&
00626       (this->getMap ()->getMaxAllGlobalIndex () >=
00627        srcVbrMat->getMap ()->getMaxAllGlobalIndex ());
00628   }
00629 }
00630 
00631 //-------------------------------------------------------------------
00632 template<class Scalar,
00633          class LocalOrdinal,
00634          class GlobalOrdinal,
00635          class Node,
00636          class LocalMatOps>
00637 void
00638 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::
00639 copyAndPermute (const SrcDistObject& source,
00640                 size_t numSameIDs,
00641                 const Teuchos::ArrayView<const LocalOrdinal>& permuteToLIDs,
00642                 const Teuchos::ArrayView<const LocalOrdinal>& permuteFromLIDs)
00643 {
00644   typedef typename Teuchos::ArrayView<const LocalOrdinal>::size_type Tsize_t;
00645   typedef VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps> this_type;
00646   const this_type* src_mat_ptr = dynamic_cast<const this_type*> (&source);
00647 
00648   if (src_mat_ptr == NULL) {
00649     throw std::runtime_error ("VbrMatrix::copyAndPermute ERROR, dynamic_cast failed.");
00650   }
00651   const this_type& src_mat = *src_mat_ptr;
00652 
00653   Teuchos::Array<LocalOrdinal> src_ptColsPerBlkCol;
00654   LocalOrdinal numPtRows;
00655 
00656   LocalOrdinal numSame = numSameIDs;
00657   for(LocalOrdinal i=0; i<numSame; ++i) {
00658     if (isFillComplete()) {
00659       Teuchos::ArrayRCP<const Scalar> src_blkEntries;
00660       Teuchos::ArrayView<const LocalOrdinal> src_blkCols;
00661 
00662       src_mat.getLocalBlockRowView (i, numPtRows, src_blkCols,
00663                                     src_ptColsPerBlkCol, src_blkEntries);
00664       unsigned offset = 0;
00665       for (Tsize_t j=0; j<src_blkCols.size(); ++j) {
00666         LocalOrdinal numPtCols = src_ptColsPerBlkCol[j];
00667         setLocalBlockEntry (i, src_blkCols[j], numPtRows, numPtCols, numPtCols,
00668                             src_blkEntries.view (offset, numPtRows*numPtCols));
00669         offset += numPtRows*numPtCols;
00670       }
00671     }
00672     else {
00673       GlobalOrdinal gRow = getBlockRowMap ()->getGlobalBlockID (i);
00674       Teuchos::Array<Teuchos::ArrayRCP<const Scalar> > src_blkEntries;
00675       Teuchos::ArrayView<const GlobalOrdinal> src_blkCols;
00676 
00677       src_mat.getGlobalBlockRowView (gRow, numPtRows, src_blkCols,
00678                                      src_ptColsPerBlkCol, src_blkEntries);
00679       for (Tsize_t j = 0; j < src_blkCols.size (); ++j) {
00680         LocalOrdinal numPtCols = src_ptColsPerBlkCol[j];
00681         setGlobalBlockEntry (gRow, src_blkCols[j],
00682                              numPtRows, numPtCols, numPtRows,
00683                              src_blkEntries[j].view (0, numPtRows * numPtCols));
00684       }
00685     }
00686   }
00687 
00688   for (Tsize_t i = 0; i < permuteToLIDs.size (); ++i) {
00689     if (isFillComplete ()) {
00690       Teuchos::ArrayRCP<const Scalar> src_blkEntries;
00691       Teuchos::ArrayView<const LocalOrdinal> src_blkCols;
00692 
00693       src_mat.getLocalBlockRowView (permuteFromLIDs[i], numPtRows,
00694                                     src_blkCols, src_ptColsPerBlkCol,
00695                                     src_blkEntries);
00696       unsigned offset = 0;
00697       for (Tsize_t j=0; j<src_blkCols.size(); ++j) {
00698         LocalOrdinal numPtCols = src_ptColsPerBlkCol[j];
00699         setLocalBlockEntry (permuteToLIDs[i], src_blkCols[j], numPtRows, numPtCols, numPtCols,
00700                             src_blkEntries.view(offset, numPtRows*numPtCols));
00701         offset += numPtRows*numPtCols;
00702       }
00703     }
00704     else {
00705       GlobalOrdinal gRow = getBlockRowMap ()->getGlobalBlockID (permuteToLIDs[i]);
00706       Teuchos::Array<Teuchos::ArrayRCP<const Scalar> > src_blkEntries;
00707       Teuchos::ArrayView<const GlobalOrdinal> src_blkCols;
00708 
00709       src_mat.getGlobalBlockRowView (gRow, numPtRows, src_blkCols,
00710                                      src_ptColsPerBlkCol, src_blkEntries);
00711       for (Tsize_t j = 0; j < src_blkCols.size (); ++j) {
00712         LocalOrdinal numPtCols = src_ptColsPerBlkCol[j];
00713         setGlobalBlockEntry (gRow, src_blkCols[j],
00714                              numPtRows, numPtCols, numPtRows,
00715                              src_blkEntries[j].view (0, numPtRows * numPtCols));
00716       }
00717     }
00718   }
00719 }
00720 
00721 //-------------------------------------------------------------------
00722 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00723 void
00724 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::
00725 packAndPrepare (const SrcDistObject& source,
00726                 const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs,
00727                 Teuchos::Array<char>& exports,
00728                 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
00729                 size_t& constantNumPackets,
00730                 Distributor& distor)
00731 {
00732   typedef typename Teuchos::ArrayView<const LocalOrdinal>::size_type Tsize_t;
00733   typedef VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps> this_type;
00734   const this_type* src_mat_ptr = dynamic_cast<const this_type*> (&source);
00735 
00736   if (src_mat_ptr == NULL) {
00737     typedef VbrUtils::VbrDataDist<LocalOrdinal, GlobalOrdinal, Scalar, Node> VDD;
00738     const VDD* vdd = dynamic_cast<const VDD*> (&source);
00739     TEUCHOS_TEST_FOR_EXCEPTION(
00740       vdd == NULL, std::invalid_argument, "Tpetra::VbrMatrix::packAndPrepare: "
00741       "Source object input must be either a VbrMatrix or a VbrDataDist with "
00742       "matching template parameters.");
00743     vdd->pack (exportLIDs, exports, numPacketsPerLID, constantNumPackets, distor);
00744     return;
00745   }
00746   const this_type& src_mat = *src_mat_ptr;
00747 
00748   //We will pack each row's data into the exports buffer as follows:
00749   //[num-block-cols,numPtRows,{list-of-blk-cols},{list-of-ptColsPerBlkCol},{all vals}]
00750   //so the length of the char exports buffer for a row is:
00751   //sizeof(LocalOrdinal)*(2+2*(num-block-cols)) + sizeof(Scalar)*numPtRows*sum(numPtCols_i)
00752 
00753   //For each row corresponding to exportLIDs, accumulate the size that it will
00754   //occupy in the exports buffer:
00755   size_t total_exports_size = 0;
00756   for (Tsize_t i = 0; i < exportLIDs.size (); ++i) {
00757     Teuchos::Array<LocalOrdinal> src_ptColsPerBlkCol;
00758     LocalOrdinal numPtRows;
00759     LocalOrdinal numBlkCols = 0;
00760     LocalOrdinal numScalars = 0;
00761 
00762     if (src_mat.isFillComplete ()) {
00763       Teuchos::ArrayRCP<const Scalar> src_blkEntries;
00764       Teuchos::ArrayView<const LocalOrdinal> src_blkCols;
00765       src_mat.getLocalBlockRowView (exportLIDs[i], numPtRows,
00766                                     src_blkCols, src_ptColsPerBlkCol,
00767                                     src_blkEntries);
00768       numBlkCols = src_blkCols.size ();
00769       numScalars = src_blkEntries.size ();
00770     }
00771     else {
00772       Teuchos::Array<Teuchos::ArrayRCP<const Scalar> > src_blkEntries;
00773       GlobalOrdinal gRow =
00774         src_mat.getBlockRowMap ()->getGlobalBlockID (exportLIDs[i]);
00775       Teuchos::ArrayView<const GlobalOrdinal> src_blkCols;
00776       src_mat.getGlobalBlockRowView (gRow, numPtRows, src_blkCols,
00777                                      src_ptColsPerBlkCol, src_blkEntries);
00778       numBlkCols = src_blkCols.size ();
00779       for (Tsize_t j = 0; j < src_blkEntries.size (); ++j) {
00780         numScalars += src_blkEntries[j].size ();
00781       }
00782     }
00783 
00784     const size_t size_for_this_row =
00785       sizeof (GlobalOrdinal) * (2 + 2*numBlkCols) +
00786       sizeof (Scalar) * numScalars;
00787     numPacketsPerLID[i] = size_for_this_row;
00788     total_exports_size += size_for_this_row;
00789   }
00790 
00791   exports.resize (total_exports_size);
00792 
00793   ArrayView<char> avIndsC, avValsC;
00794   ArrayView<Scalar>        avVals;
00795 
00796   size_t offset = 0;
00797 
00798   for (Tsize_t i = 0; i < exportLIDs.size (); ++i) {
00799     Teuchos::Array<LocalOrdinal> src_ptColsPerBlkCol;
00800     LocalOrdinal numPtRows;
00801     LocalOrdinal numBlkCols = 0;
00802     LocalOrdinal numScalars = 0;
00803 
00804     if (src_mat.isFillComplete ()) {
00805       Teuchos::ArrayRCP<const Scalar> src_blkEntries;
00806       Teuchos::ArrayView<const LocalOrdinal> src_blkCols;
00807       src_mat.getLocalBlockRowView (exportLIDs[i], numPtRows, src_blkCols,
00808                                     src_ptColsPerBlkCol, src_blkEntries);
00809       numBlkCols = src_blkCols.size ();
00810       numScalars = src_blkEntries.size ();
00811 
00812       LocalOrdinal num_chars_for_ordinals =
00813         (2*numBlkCols + 2) * sizeof (LocalOrdinal);
00814       //get export views
00815       avIndsC = exports (offset, num_chars_for_ordinals);
00816       avValsC = exports (offset + num_chars_for_ordinals, numScalars * sizeof (Scalar));
00817       ArrayView<GlobalOrdinal> avInds = av_reinterpret_cast<GlobalOrdinal>(avIndsC);
00818       typename ArrayView<GlobalOrdinal>::iterator ind_it = avInds.begin();
00819 
00820       const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> >& col_map =
00821         src_mat.getBlockColMap ();
00822 
00823       //put row info into the buffer views:
00824       *ind_it++ = numBlkCols;
00825       *ind_it++ = numPtRows;
00826       for (Tsize_t j = 0; j < src_blkCols.size (); ++j) {
00827         *ind_it++ = col_map->getGlobalBlockID (src_blkCols[j]);
00828       }
00829       std::copy (src_ptColsPerBlkCol.begin(), src_ptColsPerBlkCol.end(), ind_it);
00830       avVals = av_reinterpret_cast<Scalar> (avValsC);
00831       std::copy (src_blkEntries.begin(), src_blkEntries.end(), avVals.begin());
00832     }
00833     else {
00834       Teuchos::Array<Teuchos::ArrayRCP<const Scalar> > src_blkEntries;
00835       GlobalOrdinal gRow = src_mat.getBlockRowMap()->getGlobalBlockID(exportLIDs[i]);
00836       Teuchos::ArrayView<const GlobalOrdinal> src_blkCols;
00837       src_mat.getGlobalBlockRowView (gRow, numPtRows, src_blkCols,
00838                                      src_ptColsPerBlkCol, src_blkEntries);
00839       numBlkCols = src_blkCols.size();
00840       numScalars = 0;
00841       for(Tsize_t j=0; j<src_blkEntries.size(); ++j) {
00842         numScalars += numPtRows*src_ptColsPerBlkCol[j];
00843       }
00844 
00845       LocalOrdinal num_chars_for_ordinals = (2*numBlkCols+2)*sizeof(GlobalOrdinal);
00846       //get export views
00847       avIndsC = exports(offset, num_chars_for_ordinals);
00848       avValsC = exports(offset+ num_chars_for_ordinals, numScalars*sizeof(Scalar));
00849       ArrayView<GlobalOrdinal> avInds = av_reinterpret_cast<GlobalOrdinal>(avIndsC);
00850       typename ArrayView<GlobalOrdinal>::iterator ind_it = avInds.begin();
00851 
00852       //put row info into the buffer views:
00853       *ind_it++ = numBlkCols;
00854       *ind_it++ = numPtRows;
00855       std::copy(src_blkCols.begin(), src_blkCols.end(), ind_it);
00856       ind_it += src_blkCols.size();
00857       std::copy(src_ptColsPerBlkCol.begin(), src_ptColsPerBlkCol.end(), ind_it);
00858       avVals = av_reinterpret_cast<Scalar>(avValsC);
00859       typename ArrayView<Scalar>::iterator val_it = avVals.begin();
00860       for(Tsize_t j=0; j<src_blkEntries.size(); ++j) {
00861         std::copy(src_blkEntries[j].begin(), src_blkEntries[j].end(), val_it);
00862         val_it += src_blkEntries[j].size();
00863       }
00864     }
00865 
00866     const size_t size_for_this_row =
00867       sizeof (GlobalOrdinal) * (2 + 2*numBlkCols)
00868       + sizeof (Scalar) * numScalars;
00869     offset += size_for_this_row;
00870   }
00871 
00872   constantNumPackets = 0;
00873 }
00874 
00875 //-------------------------------------------------------------------
00876 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00877 void
00878 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::
00879 unpackAndCombine (const Teuchos::ArrayView<const LocalOrdinal>& importLIDs,
00880                   const Teuchos::ArrayView<const char>& imports,
00881                   const Teuchos::ArrayView<size_t>& numPacketsPerLID,
00882                   size_t constantNumPackets,
00883                   Distributor& distor,
00884                   CombineMode CM)
00885 {
00886   typedef typename Teuchos::ArrayView<const LocalOrdinal>::size_type Tsize_t;
00887 
00888   if (CM == Tpetra::ABSMAX) {
00889     std::cout << "Warning, VbrMatrix Import/Export doesn't support combine-mode==ABSMAX; use ADD, INSERT or REPLACE. (REPLACE is being used now.)"<<std::endl;
00890   }
00891 
00892   size_t offset = 0;
00893   for(Tsize_t i=0; i<importLIDs.size(); ++i) {
00894     ArrayView<const char> avC = imports.view(offset, numPacketsPerLID[i]);
00895     ArrayView<const GlobalOrdinal> avOrds = av_reinterpret_cast<const GlobalOrdinal>(avC);
00896     GlobalOrdinal gRow = this->getBlockRowMap()->getGlobalBlockID(importLIDs[i]);
00897     size_t avOrdsOffset = 0;
00898     LocalOrdinal numBlkCols = avOrds[avOrdsOffset++];
00899     LocalOrdinal numPtRows = avOrds[avOrdsOffset++];
00900     LocalOrdinal num_chars_for_ordinals = (2*numBlkCols+2)*sizeof(GlobalOrdinal);
00901     ArrayView<const char> avValsC = imports.view(offset+num_chars_for_ordinals, numPacketsPerLID[i]-num_chars_for_ordinals);
00902     ArrayView<const Scalar> avVals = av_reinterpret_cast<const Scalar>(avValsC);
00903 
00904     size_t avValsOffset = 0;
00905     for(Tsize_t j=0; j<numBlkCols; ++j) {
00906       GlobalOrdinal blkCol = avOrds[avOrdsOffset+j];
00907       LocalOrdinal numPtCols = avOrds[avOrdsOffset+numBlkCols+j];
00908       LocalOrdinal LDA = numPtRows;
00909       if (CM == Tpetra::ADD) {
00910         sumIntoGlobalBlockEntry(gRow, blkCol, numPtRows, numPtCols, LDA,
00911                          avVals.view(avValsOffset, numPtRows*numPtCols));
00912       }
00913       else if (CM == Tpetra::INSERT || CM == Tpetra::REPLACE) {
00914         setGlobalBlockEntry(gRow, blkCol, numPtRows, numPtCols, LDA,
00915                            avVals.view(avValsOffset, numPtRows*numPtCols));
00916       }
00917       else {
00918 //    std::cout << "Warning, VbrMatrix Import/Export doesn't support combine-mode==ABSMAX; use ADD, INSERT or REPLACE. (REPLACE is being used now.)"<<std::endl;
00919         setGlobalBlockEntry(gRow, blkCol, numPtRows, numPtCols, LDA,
00920                            avVals.view(avValsOffset, numPtRows*numPtCols));
00921       }
00922       avValsOffset += numPtRows*numPtCols;
00923     }
00924   }
00925 }
00926 
00927 //-------------------------------------------------------------------
00928 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00929 void
00930 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getLocalBlockEntryView(
00931       LocalOrdinal localBlockRow,
00932       LocalOrdinal localBlockCol,
00933       LocalOrdinal& numPtRows,
00934       LocalOrdinal& numPtCols,
00935       Teuchos::ArrayRCP<const Scalar>& blockEntry) const
00936 {
00937   typedef Teuchos::ArrayRCP<const size_t>       Host_View;
00938   typedef Teuchos::ArrayRCP<const LocalOrdinal> Host_View_LO;
00939   typedef typename Host_View_LO::iterator ITER;
00940   //This method returns a constant view of a block-entry (as an ArrayRCP).
00941 
00942   TEUCHOS_TEST_FOR_EXCEPTION(isFillComplete() == false, std::runtime_error,
00943    "Tpetra::VbrMatrix::getLocalBlockEntryView ERROR, this method can only be called after fillComplete() has been called.");
00944 
00945   TEUCHOS_TEST_FOR_EXCEPTION(is_storage_optimized_ == false, std::runtime_error,
00946    "Tpetra::VbrMatrix::getLocalBlockEntryView ERROR, this method can only be called if storage is optimized.");
00947 
00948   Teuchos::RCP<Node> node = getNode();
00949 
00950   Host_View bpr = constBlkGraph_->getNodeRowOffsets();
00951   const size_t bindx_offset = bpr[localBlockRow];
00952   const size_t length       = bpr[localBlockRow+1] - bindx_offset;
00953 
00954   Host_View_LO bindx = constBlkGraph_->getNodePackedIndices();
00955   ITER bindx_beg = bindx.begin()+bindx_offset,
00956        bindx_end = bindx_beg + length;
00957   ITER it = std::lower_bound(bindx_beg, bindx_end, localBlockCol);
00958 
00959   TEUCHOS_TEST_FOR_EXCEPTION(it == bindx_end || *it != localBlockCol, std::runtime_error,
00960     "Tpetra::VbrMatrix::getLocalBlockEntryView ERROR, specified localBlockCol not found.");
00961 
00962   numPtRows = getBlockRowMap()->getLocalBlockSize(localBlockRow);
00963   numPtCols = getBlockColMap()->getLocalBlockSize(localBlockCol);
00964 
00965   const LocalOrdinal blkSize = numPtRows*numPtCols;
00966   Host_View_LO indx = node->template viewBuffer<LocalOrdinal>(1, pbuf_indx_+bpr[localBlockRow]+(it-bindx_beg));
00967   const LocalOrdinal offset = indx[0];
00968   blockEntry = node->template viewBuffer<Scalar>(blkSize, pbuf_values1D_ + offset);
00969 }
00970 
00971 //-------------------------------------------------------------------
00972 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00973 Teuchos::RCP<Node>
00974 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getNode() const
00975 {
00976   return getBlockRowMap()->getPointMap()->getNode();
00977 }
00978 
00979 //-------------------------------------------------------------------
00980 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00981 void
00982 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::putScalar(Scalar s)
00983 {
00984   Teuchos::RCP<Node> node = getNode();
00985 
00986   if (isFillComplete()) {
00987     fill_device_ArrayRCP(node, pbuf_values1D_, s);
00988   }
00989   else {
00990     typedef typename Teuchos::Array<RowGlobalCols>::size_type Tsize_t;
00991     Teuchos::Array<RowGlobalCols>& rows = *data_2D_;
00992     Tsize_t numBlockRows = rows.size();
00993     for(Tsize_t r=0; r<numBlockRows; ++r) {
00994       typename RowGlobalCols::iterator
00995         col_iter = rows[r].begin(),
00996         col_end  = rows[r].end();
00997       for(; col_iter != col_end; ++col_iter) {
00998         Teuchos::ArrayRCP<Scalar>& vals = col_iter->second;
00999         std::fill(vals.begin(), vals.end(), s);
01000       }
01001     }
01002   }
01003 }
01004 
01005 //-------------------------------------------------------------------
01006 template<class ArrayType1,class ArrayType2,class Ordinal>
01007 void set_array_values(ArrayType1& dest, ArrayType2& src, Ordinal rows, Ordinal cols, Ordinal stride, Tpetra::CombineMode mode)
01008 {
01009   size_t offset = 0;
01010   size_t src_offset = 0;
01011 
01012   if (mode == ADD) {
01013     for(Ordinal col=0; col<cols; ++col) {
01014       for(Ordinal row=0; row<rows; ++row) {
01015         dest[offset++] += src[src_offset + row];
01016       }
01017       src_offset += stride;
01018     }
01019   }
01020   else {
01021     for(Ordinal col=0; col<cols; ++col) {
01022       for(Ordinal row=0; row<rows; ++row) {
01023         dest[offset++] = src[src_offset + row];
01024       }
01025       src_offset += stride;
01026     }
01027   }
01028 }
01029 
01030 //-------------------------------------------------------------------
01031 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
01032 void
01033 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::setGlobalBlockEntry(GlobalOrdinal globalBlockRow, GlobalOrdinal globalBlockCol, const Teuchos::SerialDenseMatrix<int,Scalar>& blockEntry)
01034 {
01035   //first get an ArrayRCP for the internal storage for this block-entry:
01036   Teuchos::ArrayRCP<Scalar> internalBlockEntry;
01037   LocalOrdinal blkRowSize = blockEntry.numRows();
01038   LocalOrdinal blkColSize = blockEntry.numCols();
01039   getGlobalBlockEntryViewNonConst(globalBlockRow,globalBlockCol, blkRowSize, blkColSize, internalBlockEntry);
01040 
01041   //now copy the incoming block-entry into internal storage:
01042   const Scalar* inputvalues = blockEntry.values();
01043   set_array_values<Teuchos::ArrayRCP<Scalar>, const Scalar*, LocalOrdinal >(
01044     internalBlockEntry, inputvalues, blkRowSize, blkColSize, blockEntry.stride(), REPLACE);
01045 }
01046 
01047 //-------------------------------------------------------------------
01048 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
01049 void
01050 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::setLocalBlockEntry(LocalOrdinal localBlockRow, LocalOrdinal localBlockCol, const Teuchos::SerialDenseMatrix<int,Scalar>& blockEntry)
01051 {
01052   //first get an ArrayRCP for the internal storage for this block-entry:
01053   Teuchos::ArrayRCP<Scalar> internalBlockEntry;
01054   LocalOrdinal blkRowSize = blockEntry.numRows();
01055   LocalOrdinal blkColSize = blockEntry.numCols();
01056   getLocalBlockEntryViewNonConst(localBlockRow,localBlockCol, blkRowSize, blkColSize, internalBlockEntry);
01057 
01058   //now copy the incoming block-entry into internal storage:
01059   const Scalar* inputvalues = blockEntry.values();
01060   set_array_values<Teuchos::ArrayRCP<Scalar>, const Scalar*, LocalOrdinal>(
01061 internalBlockEntry, inputvalues, blkRowSize, blkColSize, blockEntry.stride(), REPLACE);
01062 }
01063 
01064 //-------------------------------------------------------------------
01065 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
01066 void
01067 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::sumIntoGlobalBlockEntry(GlobalOrdinal globalBlockRow, GlobalOrdinal globalBlockCol, const Teuchos::SerialDenseMatrix<int,Scalar>& blockEntry)
01068 {
01069   //first get an ArrayRCP for the internal storage for this block-entry:
01070   Teuchos::ArrayRCP<Scalar> internalBlockEntry;
01071   LocalOrdinal blkRowSize = blockEntry.numRows();
01072   LocalOrdinal blkColSize = blockEntry.numCols();
01073   getGlobalBlockEntryViewNonConst(globalBlockRow,globalBlockCol, blkRowSize, blkColSize, internalBlockEntry);
01074 
01075   //now sum (add) the incoming block-entry into internal storage:
01076   const Scalar* inputvalues = blockEntry.values();
01077   set_array_values<Teuchos::ArrayRCP<Scalar>, const Scalar*, LocalOrdinal>(
01078     internalBlockEntry, inputvalues, blkRowSize, blkColSize, blockEntry.stride(), ADD);
01079 }
01080 
01081 //-------------------------------------------------------------------
01082 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
01083 void
01084 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::sumIntoLocalBlockEntry(LocalOrdinal localBlockRow, LocalOrdinal localBlockCol, const Teuchos::SerialDenseMatrix<int,Scalar>& blockEntry)
01085 {
01086   //first get an ArrayRCP for the internal storage for this block-entry:
01087   Teuchos::ArrayRCP<Scalar> internalBlockEntry;
01088   LocalOrdinal blkRowSize = blockEntry.numRows();
01089   LocalOrdinal blkColSize = blockEntry.numCols();
01090   getLocalBlockEntryViewNonConst(localBlockRow,localBlockCol, blkRowSize, blkColSize, internalBlockEntry);
01091 
01092   //now sum (add) the incoming block-entry into internal storage:
01093   const Scalar* inputvalues = blockEntry.values();
01094   set_array_values<Teuchos::ArrayRCP<Scalar>, const Scalar*, LocalOrdinal>(
01095     internalBlockEntry, inputvalues, blkRowSize, blkColSize, blockEntry.stride(), ADD);
01096 }
01097 
01098 //-------------------------------------------------------------------
01099 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
01100 void
01101 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::setGlobalBlockEntry(GlobalOrdinal globalBlockRow, GlobalOrdinal globalBlockCol, LocalOrdinal blkRowSize, LocalOrdinal blkColSize, LocalOrdinal LDA, const Teuchos::ArrayView<const Scalar>& blockEntry)
01102 {
01103   //first get an ArrayRCP for the internal storage for this block-entry:
01104   Teuchos::ArrayRCP<Scalar> internalBlockEntry;
01105   getGlobalBlockEntryViewNonConst(globalBlockRow,globalBlockCol, blkRowSize, blkColSize, internalBlockEntry);
01106 
01107   LocalOrdinal blk_size = blockEntry.size();
01108   TEUCHOS_TEST_FOR_EXCEPTION(blkColSize*LDA > blk_size, std::runtime_error,
01109     "Tpetra::VbrMatrix::setGlobalBlockEntry ERROR, inconsistent block-entry sizes.");
01110 
01111   //copy the incoming block-entry into internal storage:
01112   set_array_values(internalBlockEntry, blockEntry, blkRowSize, blkColSize, LDA, REPLACE);
01113 }
01114 
01115 //-------------------------------------------------------------------
01116 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
01117 void
01118 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::setLocalBlockEntry(LocalOrdinal localBlockRow, LocalOrdinal localBlockCol, LocalOrdinal blkRowSize, LocalOrdinal blkColSize, LocalOrdinal LDA, const Teuchos::ArrayView<const Scalar>& blockEntry)
01119 {
01120   //first get an ArrayRCP for the internal storage for this block-entry:
01121   Teuchos::ArrayRCP<Scalar> internalBlockEntry;
01122   getLocalBlockEntryViewNonConst(localBlockRow,localBlockCol, blkRowSize, blkColSize, internalBlockEntry);
01123 
01124   LocalOrdinal blk_size = blockEntry.size();
01125   TEUCHOS_TEST_FOR_EXCEPTION(blkColSize*LDA > blk_size, std::runtime_error,
01126     "Tpetra::VbrMatrix::setLocalBlockEntry ERROR, inconsistent block-entry sizes.");
01127 
01128   //copy the incoming block-entry into internal storage:
01129   set_array_values(internalBlockEntry, blockEntry, blkRowSize, blkColSize, LDA, REPLACE);
01130 }
01131 
01132 //-------------------------------------------------------------------
01133 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
01134 void
01135 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::sumIntoGlobalBlockEntry(GlobalOrdinal globalBlockRow, GlobalOrdinal globalBlockCol, LocalOrdinal blkRowSize, LocalOrdinal blkColSize, LocalOrdinal LDA, const Teuchos::ArrayView<const Scalar>& blockEntry)
01136 {
01137   //first get an ArrayRCP for the internal storage for this block-entry:
01138   Teuchos::ArrayRCP<Scalar> internalBlockEntry;
01139   getGlobalBlockEntryViewNonConst(globalBlockRow,globalBlockCol, blkRowSize, blkColSize, internalBlockEntry);
01140 
01141   LocalOrdinal blk_size = blockEntry.size();
01142   TEUCHOS_TEST_FOR_EXCEPTION(blkColSize*LDA > blk_size, std::runtime_error,
01143     "Tpetra::VbrMatrix::setGlobalBlockEntry ERROR, inconsistent block-entry sizes.");
01144 
01145   //copy the incoming block-entry into internal storage:
01146   set_array_values(internalBlockEntry, blockEntry, blkRowSize, blkColSize, LDA, ADD);
01147 }
01148 
01149 //-------------------------------------------------------------------
01150 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
01151 void
01152 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::sumIntoLocalBlockEntry(LocalOrdinal localBlockRow, LocalOrdinal localBlockCol, LocalOrdinal blkRowSize, LocalOrdinal blkColSize, LocalOrdinal LDA, const Teuchos::ArrayView<const Scalar>& blockEntry)
01153 {
01154   //first get an ArrayRCP for the internal storage for this block-entry:
01155   Teuchos::ArrayRCP<Scalar> internalBlockEntry;
01156   getLocalBlockEntryViewNonConst(localBlockRow,localBlockCol, blkRowSize, blkColSize, internalBlockEntry);
01157 
01158   LocalOrdinal blk_size = blockEntry.size();
01159   TEUCHOS_TEST_FOR_EXCEPTION(blkColSize*LDA > blk_size, std::runtime_error,
01160     "Tpetra::VbrMatrix::setLocalBlockEntry ERROR, inconsistent block-entry sizes.");
01161 
01162   //copy the incoming block-entry into internal storage:
01163   set_array_values(internalBlockEntry, blockEntry, blkRowSize, blkColSize, LDA, ADD);
01164 }
01165 
01166 //-------------------------------------------------------------------
01167 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
01168 void
01169 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::optimizeStorage()
01170 {
01171   typedef Teuchos::ArrayRCP<const size_t> Host_View;
01172   typedef Teuchos::ArrayRCP<const LocalOrdinal> Host_View_LO;
01173   typedef typename Teuchos::ArrayRCP<size_t>::size_type Tsize_t;
01174 
01175   if (is_storage_optimized_ == true) return;
01176 
01177   TEUCHOS_TEST_FOR_EXCEPTION(constBlkGraph_->isFillComplete() != true, std::runtime_error,
01178     "Tpetra::VbrMatrix::optimizeStorage ERROR, isFillComplete() is false, required to be true before optimizeStorage is called.");
01179 
01180   size_t num_block_nonzeros = constBlkGraph_->getNodeNumBlockEntries();
01181 
01182   const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> >& rowmap = constBlkGraph_->getBlockRowMap();
01183   const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> >& colmap = constBlkGraph_->getBlockColMap();
01184 
01185   const Teuchos::RCP<Node>& node = getBlockRowMap()->getPointMap()->getNode();
01186 
01187   //need to count the number of point-entries:
01188   size_t num_point_entries = 0;
01189   Host_View bptr = constBlkGraph_->getNodeRowOffsets();
01190 
01191   if (bptr.size() == 0) {
01192     is_storage_optimized_ = true;
01193     return;
01194   }
01195 
01196   Host_View_LO bindx = constBlkGraph_->getNodePackedIndices();
01197 
01198   for(Tsize_t r=0; r<bptr.size()-1; ++r) {
01199     size_t rbeg = bptr[r];
01200     size_t rend = bptr[r+1];
01201 
01202     LocalOrdinal rsize = rowmap->getLocalBlockSize(r);
01203 
01204     for(size_t j=rbeg; j<rend; ++j) {
01205       LocalOrdinal csize = colmap->getLocalBlockSize(bindx[j]);
01206       num_point_entries += rsize*csize;
01207     }
01208   }
01209 
01210   pbuf_indx_ = node->template allocBuffer<LocalOrdinal>(num_block_nonzeros+1);
01211   pbuf_values1D_ = node->template allocBuffer<Scalar>(num_point_entries);
01212 
01213   Teuchos::ArrayRCP<LocalOrdinal> view_indx = node->template viewBufferNonConst<LocalOrdinal>(KokkosClassic::WriteOnly, num_block_nonzeros+1, pbuf_indx_);
01214   Teuchos::ArrayRCP<Scalar> view_values1D = node->template viewBufferNonConst<Scalar>(KokkosClassic::WriteOnly, num_point_entries, pbuf_values1D_);
01215 
01216   bool have_2D_data = data_2D_->size() > 0;
01217   Scalar zero = Teuchos::ScalarTraits<Scalar>::zero();
01218 
01219   size_t ioffset = 0;
01220   size_t offset = 0;
01221   for(Tsize_t r=0; r<bptr.size()-1; ++r) {
01222     LocalOrdinal rsize = rowmap->getLocalBlockSize(r);
01223 
01224     RowGlobalCols* blk_row = have_2D_data ? &((*data_2D_)[r]) : NULL;
01225 
01226     for(size_t c=bptr[r]; c<bptr[r+1]; ++c) {
01227       view_indx[ioffset++] = offset;
01228 
01229       LocalOrdinal csize = colmap->getLocalBlockSize(bindx[c]);
01230       Tsize_t blkSize = rsize*csize;
01231 
01232       if (blk_row != NULL) {
01233         GlobalOrdinal global_col = colmap->getGlobalBlockID(bindx[c]);
01234         typename RowGlobalCols::iterator iter = blk_row->find(global_col);
01235         TEUCHOS_TEST_FOR_EXCEPTION(iter == blk_row->end(), std::runtime_error,
01236           "Tpetra::VbrMatrix::optimizeStorage ERROR, global_col not found in row.");
01237 
01238         Teuchos::ArrayRCP<Scalar> vals = iter->second;
01239         for(Tsize_t n=0; n<blkSize; ++n) {
01240           view_values1D[offset+n] = vals[n];
01241         }
01242       }
01243       else {
01244         for(Tsize_t n=0; n<blkSize; ++n) view_values1D[offset+n] = zero;
01245       }
01246       offset += blkSize;
01247     }
01248   }
01249   view_indx[ioffset] = offset;
01250 
01251   //Final step: release memory for the "2D" storage:
01252   data_2D_ = Teuchos::null;
01253 
01254   is_storage_optimized_ = true;
01255 }
01256 
01257 //-------------------------------------------------------------------
01258 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
01259 void VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::fillLocalMatrix()
01260 {
01261   //We insist that optimzeStorage has already been called.
01262   //We don't care whether this function (fillLocalMatrix()) is being
01263   //called for the first time or not.
01264   TEUCHOS_TEST_FOR_EXCEPTION(is_storage_optimized_ != true, std::runtime_error,
01265     "Tpetra::VbrMatrix::fillLocalMatrix ERROR, optimizeStorage is required to have already been called.");
01266 
01267   Teuchos::ArrayRCP<const size_t      > nodeRowOffsets = constBlkGraph_->getNodeRowOffsets();
01268   Teuchos::ArrayRCP<const LocalOrdinal> nodePackedInds = constBlkGraph_->getNodePackedIndices();
01269   if (Node::isHostNode == false) {
01270     RCP<Node> node = getRangeMap()->getNode();
01271     //
01272     Teuchos::ArrayRCP<size_t> dev_nodeRowOffsets = node->template allocBuffer<size_t>(nodeRowOffsets.size());
01273     node->copyToBuffer(nodeRowOffsets.size(), nodeRowOffsets(), dev_nodeRowOffsets);
01274     nodeRowOffsets = dev_nodeRowOffsets;
01275     //
01276     Teuchos::ArrayRCP<LocalOrdinal> dev_nodePackedInds = node->template allocBuffer<LocalOrdinal>(nodePackedInds.size());
01277     node->copyToBuffer(nodePackedInds.size(), nodePackedInds(), dev_nodePackedInds);
01278     nodePackedInds = dev_nodePackedInds;
01279   }
01280   lclMatrix_->setPackedValues (pbuf_values1D_,
01281                                getBlockRowMap()->getNodeFirstPointInBlocks_Device(),
01282                                getBlockColMap()->getNodeFirstPointInBlocks_Device(),
01283                                nodeRowOffsets,
01284                                nodePackedInds,
01285                                pbuf_indx_);
01286 }
01287 
01288 //-------------------------------------------------------------------
01289 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
01290 void VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::fillLocalMatVec()
01291 {
01292   //We insist that optimzeStorage has already been called.
01293   //We don't care whether this function (fillLocalMatVec()) is being
01294   //called for the first time or not.
01295   TEUCHOS_TEST_FOR_EXCEPTION(is_storage_optimized_ != true, std::runtime_error,
01296     "Tpetra::VbrMatrix::fillLocalMatrix ERROR, optimizeStorage is required to have already been called.");
01297 
01298   lclMatOps_.initializeValues (*lclMatrix_);
01299 }
01300 
01301 //-------------------------------------------------------------------
01302 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
01303 void
01304 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::createImporterExporter()
01305 {
01306   typedef typename Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > PtMap;
01307 
01308   const PtMap& ptDomMap = (constBlkGraph_->getBlockDomainMap())->getPointMap();
01309   const PtMap& ptRngMap = (constBlkGraph_->getBlockRangeMap())->getPointMap();
01310   const PtMap& ptRowMap = (constBlkGraph_->getBlockRowMap())->getPointMap();
01311   const PtMap& ptColMap = (constBlkGraph_->getBlockColMap())->getPointMap();
01312 
01313   if (!ptDomMap->isSameAs(*ptColMap)) {
01314     importer_ = Teuchos::rcp(new Tpetra::Import<LocalOrdinal,GlobalOrdinal,Node>(ptDomMap, ptColMap));
01315   }
01316   if (!ptRngMap->isSameAs(*ptRowMap)) {
01317     exporter_ = Teuchos::rcp(new Tpetra::Export<LocalOrdinal,GlobalOrdinal,Node>(ptRowMap, ptRngMap));
01318   }
01319 }
01320 
01321 //-------------------------------------------------------------------
01322 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
01323 void
01324 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::fillComplete(const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> >& blockDomainMap, const Teuchos::RCP<const BlockMap<LocalOrdinal,GlobalOrdinal,Node> >& blockRangeMap)
01325 {
01326   if (isFillComplete()) return;
01327 
01328   globalAssemble();
01329 
01330   blkGraph_->fillComplete(blockDomainMap, blockRangeMap);
01331 
01332   createImporterExporter();
01333 
01334   is_fill_completed_ = true;
01335 
01336   optimizeStorage();
01337 
01338   fillLocalMatrix();
01339   fillLocalMatVec();
01340 }
01341 
01342 //-------------------------------------------------------------------
01343 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
01344 void VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::fillComplete()
01345 {
01346   //In this case, our block-row-map will also be our domain-map and
01347   //range-map.
01348 
01349   fillComplete(getBlockRowMap(), getBlockRowMap());
01350 }
01351 
01352 //-------------------------------------------------------------------
01353 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
01354 void VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::globalAssemble()
01355 {
01356   if (getPointRowMap()->getComm()->getSize() == 1) {
01357     return;
01358   }
01359 
01360   //nonlocal_data_ contains data that belongs on other processors.
01361   //We'll refer to this as overlapping data, and create an 'overlapMap'
01362   //that describes the layout of this overlapping data.
01363   Teuchos::RCP<const Tpetra::BlockMap<LocalOrdinal,GlobalOrdinal,Node> >
01364      overlapMap = VbrUtils::createOverlapMap(nonlocal_data_, *getBlockRowMap());
01365 
01366   if (overlapMap->getGlobalNumBlocks() == 0) {
01367     return;
01368   }
01369 
01370   //VbrDataDist is a wrapper that makes our overlapping data behave
01371   //like a DistObject so that the overlapping data can be exported
01372   //to the owning processors in a standard way.
01373   typedef VbrUtils::VbrDataDist<LocalOrdinal, GlobalOrdinal, Scalar, Node> VDD;
01374   VDD vdd (nonlocal_data_, *overlapMap);
01375 
01376   //Create an exporter where the source map is our overlapMap and the
01377   //target map is the rowmap of our VbrMatrix.
01378   typedef Import<LocalOrdinal,GlobalOrdinal,Node> import_type;
01379   import_type importer (vdd.getMap (), convertBlockMapToPointMap (*getBlockRowMap ()));
01380 
01381   //Communicate the overlapping data to the owning processors and add it
01382   //into this VbrMatrix.
01383   this->doImport (vdd, importer, Tpetra::ADD);
01384 
01385   //Zero out the overlapping data so it can be re-populated and re-assembled
01386   //in future calls to globalAssemble.
01387   nonlocal_data_.zeroEntries ();
01388 }
01389 
01390 //-------------------------------------------------------------------
01391 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
01392 std::string
01393 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::description() const
01394 {
01395   std::ostringstream oss;
01396   oss << Teuchos::Describable::description();
01397   if (isFillComplete()) {
01398     oss << "{status = fill complete, global num block rows = "
01399         << getBlockRowMap()->getGlobalNumBlocks() << "}";
01400   }
01401   else {
01402     oss << "{status = fill not complete, global num block rows = "
01403         << getBlockRowMap()->getGlobalNumBlocks() << "}";
01404   }
01405   return oss.str();
01406 }
01407 
01408 //-------------------------------------------------------------------
01409 template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
01410 void
01411 VbrMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel) const
01412 {
01413   Teuchos::EVerbosityLevel vl = verbLevel;
01414   if (vl == Teuchos::VERB_DEFAULT) vl = Teuchos::VERB_LOW;
01415 
01416   if (vl == Teuchos::VERB_NONE) return;
01417 
01418   Teuchos::RCP<const Teuchos::Comm<int> > comm = getBlockRowMap()->getPointMap()->getComm();
01419   const int myProc = comm->getRank();
01420 
01421   if (myProc == 0) out << "VbrMatrix::describe is under construction..." << std::endl;
01422 }
01423 
01424 }//namespace Tpetra
01425 
01426 //
01427 // Explicit instantiation macro
01428 //
01429 // Must be expanded from within the Tpetra namespace!
01430 //
01431 
01432 #define TPETRA_VBRMATRIX_INSTANT(SCALAR,LO,GO,NODE) \
01433   \
01434   template class VbrMatrix< SCALAR , LO , GO , NODE >;
01435 
01436 #endif //TPETRA_VBRMATRIX_DEF_HPP
01437 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines