Tpetra_CrsMatrix.hpp

00001 //@HEADER
00002 // ************************************************************************
00003 // 
00004 //               Tpetra: Templated Linear Algebra Services Package 
00005 //                 Copyright (2008) Sandia Corporation
00006 // 
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 // 
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //  
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //  
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00025 // 
00026 // ************************************************************************
00027 //@HEADER
00028 
00029 #ifndef TPETRA_CRSMATRIX_HPP
00030 #define TPETRA_CRSMATRIX_HPP
00031 
00032 // TODO: add support for alpha,beta term coefficients: Y = alpha*A*X + beta*Y
00033 // TODO: row-wise insertion of entries in globalAssemble() may be more efficient
00034 
00035 #include <Kokkos_DefaultNode.hpp>
00036 #include <Kokkos_CrsMatrix.hpp>
00037 #include <Kokkos_DefaultSparseMultiply.hpp>
00038 #include <Kokkos_DefaultSparseSolve.hpp>
00039 #include <Kokkos_NodeHelpers.hpp>
00040 
00041 #include <Teuchos_SerialDenseMatrix.hpp>
00042 #include <Teuchos_CommHelpers.hpp>
00043 #include <Teuchos_ScalarTraits.hpp>
00044 #include <Teuchos_OrdinalTraits.hpp>
00045 #include <Teuchos_VerboseObject.hpp>
00046 #include <Teuchos_TypeNameTraits.hpp>
00047 
00048 #include "Tpetra_InverseOperator.hpp"
00049 #include "Tpetra_RowMatrix.hpp"
00050 #include "Tpetra_CrsGraph.hpp"
00051 #include "Tpetra_Import.hpp"
00052 #include "Tpetra_Export.hpp"
00053 #include "Tpetra_MultiVector.hpp"
00054 #include "Tpetra_Vector.hpp"
00055 #include "Tpetra_NodeOps.hpp"
00056 
00057 namespace Tpetra {
00058   // struct for i,j,v triplets
00059   template <class Ordinal, class Scalar>
00060   struct CrsIJV {
00061     CrsIJV() {}
00062     CrsIJV(Ordinal row, Ordinal col, const Scalar &val) {
00063       i = row;
00064       j = col;
00065       v = val;
00066     }
00067     Ordinal i,j;
00068     Scalar  v;
00069   };
00070 }
00071 
00072 namespace Teuchos {
00073   // SerializationTraits specialization for CrsIJV, using DirectSerialization
00074   template<typename Ordinal, typename Scalar>
00075   class SerializationTraits<int,Tpetra::CrsIJV<Ordinal,Scalar> >
00076   : public DirectSerializationTraits<int,Tpetra::CrsIJV<Ordinal,Scalar> >
00077   {};
00078 }
00079 
00080 namespace std {
00081   template <class Ordinal, class Scalar>
00082   bool operator<(const Tpetra::CrsIJV<Ordinal,Scalar> &ijv1, const Tpetra::CrsIJV<Ordinal,Scalar> &ijv2) {
00083     return ijv1.i < ijv2.i;
00084   }
00085 }
00086 
00087 namespace Tpetra 
00088 {
00089 
00091 
00107   template<class Scalar, class LocalOrdinal = int, class GlobalOrdinal = LocalOrdinal, class Node = Kokkos::DefaultNode::DefaultNodeType, class LocalMatVec = Kokkos::DefaultSparseMultiply<Scalar,LocalOrdinal,Node>, class LocalMatSolve = Kokkos::DefaultSparseSolve<Scalar,LocalOrdinal,Node> >
00108   class CrsMatrix : public RowMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>, public InverseOperator<Scalar,LocalOrdinal,GlobalOrdinal,Node> {
00109     public:
00111 
00112 
00114       CrsMatrix(const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > &rowMap, size_t maxNumEntriesPerRow, ProfileType pftype = DynamicProfile);
00115 
00117       CrsMatrix(const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > &rowMap, const Teuchos::ArrayRCP<const size_t> &NumEntriesPerRowToAlloc, ProfileType pftype = DynamicProfile);
00118 
00120       CrsMatrix(const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > &rowMap, const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > &colMap, size_t maxNumEntriesPerRow, ProfileType pftype = DynamicProfile);
00121 
00123       CrsMatrix(const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > &rowMap, const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > &colMap, const Teuchos::ArrayRCP<const size_t> &NumEntriesPerRowToAlloc, ProfileType pftype = DynamicProfile);
00124 
00126       explicit CrsMatrix(const Teuchos::RCP<CrsGraph<LocalOrdinal,GlobalOrdinal,Node> > &graph);
00127 
00128       // !Destructor.
00129       virtual ~CrsMatrix();
00130 
00132 
00134 
00135 
00137       void insertGlobalValues(GlobalOrdinal globalRow, const Teuchos::ArrayView<const GlobalOrdinal> &cols, const Teuchos::ArrayView<const Scalar> &vals);
00138 
00140       void insertLocalValues(LocalOrdinal localRow, const Teuchos::ArrayView<const LocalOrdinal> &cols, const Teuchos::ArrayView<const Scalar> &vals);
00141 
00143 
00147       void replaceGlobalValues(GlobalOrdinal globalRow, 
00148                          const Teuchos::ArrayView<const GlobalOrdinal> &cols,
00149                          const Teuchos::ArrayView<const Scalar>        &vals);
00150 
00152 
00153       void sumIntoGlobalValues(GlobalOrdinal globalRow, 
00154                          const Teuchos::ArrayView<const GlobalOrdinal> &cols,
00155                          const Teuchos::ArrayView<const Scalar>        &vals);
00156 
00157 
00159       void setAllToScalar(const Scalar &alpha);
00160 
00162       void scale(const Scalar &alpha);
00163 
00165 
00167 
00168 
00170       void globalAssemble();
00171 
00176       void fillComplete(const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > &domainMap, const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > &rangeMap, OptimizeOption os = DoOptimizeStorage);
00177 
00183       void fillComplete(OptimizeOption os = DoOptimizeStorage);
00184 
00186       void optimizeStorage();
00187 
00189 
00191 
00192 
00194       const Teuchos::RCP<const Teuchos::Comm<int> > & getComm() const;
00195 
00197       Teuchos::RCP<Node> getNode() const;
00198 
00200       const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > & getRowMap() const;
00201 
00203       const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > & getColMap() const;
00204 
00206       Teuchos::RCP<const RowGraph<LocalOrdinal,GlobalOrdinal,Node> > getGraph() const;
00207 
00209       Teuchos::RCP<const CrsGraph<LocalOrdinal,GlobalOrdinal> > getCrsGraph() const;
00210 
00212       global_size_t getGlobalNumRows() const;
00213 
00215       global_size_t getGlobalNumCols() const;
00216 
00218       size_t getNodeNumRows() const;
00219 
00221       size_t getNodeNumCols() const;
00222 
00224       GlobalOrdinal getIndexBase() const;
00225 
00227       global_size_t getGlobalNumEntries() const;
00228 
00230       size_t getNodeNumEntries() const;
00231 
00233 
00234       size_t getNumEntriesInGlobalRow(GlobalOrdinal globalRow) const;
00235 
00237 
00238       size_t getNumEntriesInLocalRow(LocalOrdinal localRow) const;
00239 
00241       global_size_t getGlobalNumDiags() const;
00242 
00244       size_t getNodeNumDiags() const;
00245 
00247       size_t getGlobalMaxNumRowEntries() const;
00248 
00250       size_t getNodeMaxNumRowEntries() const;
00251 
00253       bool hasColMap() const; 
00254 
00256       bool isLowerTriangular() const;
00257 
00259       bool isUpperTriangular() const;
00260 
00262       bool isLocallyIndexed() const;
00263 
00265       bool isGloballyIndexed() const;
00266 
00268       bool isFillComplete() const;
00269 
00271 
00281       void getGlobalRowCopy(GlobalOrdinal GlobalRow,
00282                             const Teuchos::ArrayView<GlobalOrdinal> &Indices,
00283                             const Teuchos::ArrayView<Scalar> &Values,
00284                             size_t &NumEntries) const;
00285 
00287 
00297       void getLocalRowCopy(LocalOrdinal LocalRow, 
00298                            const Teuchos::ArrayView<LocalOrdinal> &Indices, 
00299                            const Teuchos::ArrayView<Scalar> &Values,
00300                            size_t &NumEntries) const;
00301 
00303 
00312       void getGlobalRowView(GlobalOrdinal GlobalRow, 
00313                             Teuchos::ArrayRCP<const GlobalOrdinal> &indices,
00314                             Teuchos::ArrayRCP<const Scalar>        &values) const;
00315 
00317 
00326       void getLocalRowView(LocalOrdinal LocalRow,
00327                            Teuchos::ArrayRCP<const LocalOrdinal> &indices,
00328                            Teuchos::ArrayRCP<const Scalar>       &values) const;
00329 
00331 
00333       void getLocalDiagCopy(Vector<Scalar,LocalOrdinal,GlobalOrdinal> &diag) const;
00334 
00336 
00338 
00339 
00341       template <class DomainScalar, class RangeScalar>
00342       void multiply(const MultiVector<DomainScalar,LocalOrdinal,GlobalOrdinal,Node> & X, MultiVector<RangeScalar,LocalOrdinal,GlobalOrdinal,Node> &Y, Teuchos::ETransp trans) const;
00343 
00345       template <class DomainScalar, class RangeScalar>
00346       void solve(const MultiVector<RangeScalar,LocalOrdinal,GlobalOrdinal,Node> & Y, MultiVector<DomainScalar,LocalOrdinal,GlobalOrdinal,Node> &X, Teuchos::ETransp trans) const;
00347           
00349 
00351 
00352 
00355       const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > & getDomainMap() const;
00356 
00359       const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > & getRangeMap() const;
00360 
00362       void apply(const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> & X, MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &Y, Teuchos::ETransp mode = Teuchos::NO_TRANS) const;
00363 
00365       bool hasTransposeApply() const;
00366 
00368 
00370 
00371 
00373       void applyInverse(const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> & X, MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &Y, Teuchos::ETransp mode = Teuchos::NO_TRANS) const;
00374 
00376       bool hasTransposeApplyInverse() const;
00377 
00379 
00381 
00382 
00384       bool isStorageOptimized() const;
00385 
00387       ProfileType getProfileType() const;
00388 
00390       bool isStaticGraph() const;
00391 
00393 
00395 
00396 
00398       std::string description() const;
00399 
00401       void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const;
00402 
00404 
00405     private:
00406       // copy constructor disabled
00407       CrsMatrix(const CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve> &Source);
00408       // operator= disabled
00409       CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve> & operator=(const CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve> &rhs);
00410     protected:
00411 
00412       // useful typedefs
00413       typedef Teuchos::OrdinalTraits<LocalOrdinal>    LOT;
00414       typedef Teuchos::OrdinalTraits<GlobalOrdinal>   GOT;
00415       typedef Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> MV;
00416 
00417       void allocateValues(Scalar alpha = Teuchos::ScalarTraits<Scalar>::zero());
00418       void sortEntries();
00419       void mergeRedundantEntries();
00420       void checkInternalState() const;
00421       void updateAllocation(size_t lrow, size_t allocSize);
00422       void fillLocalMatrix();
00423 
00425 
00433       Teuchos::ArrayRCP<const Scalar> getFullView(size_t myRow, RowInfo sizeInfo) const;
00434 
00436 
00444       Teuchos::ArrayRCP<Scalar> getFullViewNonConst(size_t myRow, RowInfo sizeInfo);
00445 
00446       Teuchos::RCP<CrsGraph<LocalOrdinal,GlobalOrdinal> > graph_;
00447 
00448       Kokkos::CrsMatrix<Scalar,Node> lclMatrix_;
00449       LocalMatVec lclMatVec_;
00450       LocalMatSolve lclMatSolve_;
00451 
00452       bool valuesAreAllocated_,
00453            staticGraph_,
00454            constructedWithFilledGraph_,
00455            fillComplete_,
00456            storageOptimized_;
00457 
00458       // matrix values. before allocation, these are Teuchos::null.
00459       // after allocation, one is Teuchos::Null.
00460       // these are parallel compute buffers, not host memory. therefore, in general, they cannot be dereferenced in host cost.
00461       // 1D == StaticAllocation, 2D == DynamicAllocation
00462       // The allocation always matches that of graph_
00463       Teuchos::ArrayRCP<Scalar>                       pbuf_values1D_;
00464       Teuchos::ArrayRCP<Teuchos::ArrayRCP<Scalar> >   pbuf_values2D_;
00465 
00466       // multivectors used for import/export dest/source in apply()
00467       mutable Teuchos::RCP<MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> > importMV_, exportMV_;
00468 
00469       // a map between a (non-local) row and a list of (col,val)
00470       std::map<GlobalOrdinal, std::list<std::pair<GlobalOrdinal,Scalar> > > nonlocals_;
00471 
00472   }; // class CrsMatrix
00473 
00474 
00475 
00478 
00479   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
00480   CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::CrsMatrix(
00481                                           const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > &rowMap, 
00482                                           size_t maxNumEntriesPerRow, 
00483                                           ProfileType pftype)
00484   : lclMatrix_(rowMap->getNodeNumElements(), rowMap->getNode())
00485   , lclMatVec_(rowMap->getNode())
00486   , lclMatSolve_(rowMap->getNode())
00487   , valuesAreAllocated_(false)
00488   , staticGraph_(false)
00489   , constructedWithFilledGraph_(false)
00490   , fillComplete_(false)
00491   , storageOptimized_(false) {
00492     try {
00493       graph_ = Teuchos::rcp( new Tpetra::CrsGraph<LocalOrdinal,GlobalOrdinal,Node>(rowMap,maxNumEntriesPerRow,pftype) );
00494     }
00495     catch (std::exception &e) {
00496       TEST_FOR_EXCEPTION(true, std::runtime_error,
00497           Teuchos::typeName(*this) << "::CrsMatrix(): caught exception while allocating CrsGraph object: " 
00498           << std::endl << e.what() << std::endl);
00499     }
00500     checkInternalState();
00501   }
00502 
00503   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
00504   CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::CrsMatrix(
00505                                           const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > &rowMap, 
00506                                           const Teuchos::ArrayRCP<const size_t> &NumEntriesPerRowToAlloc, 
00507                                           ProfileType pftype)
00508   : lclMatrix_(rowMap->getNodeNumElements(), rowMap->getNode())
00509   , lclMatVec_(rowMap->getNode())
00510   , lclMatSolve_(rowMap->getNode())
00511   , valuesAreAllocated_(false)
00512   , staticGraph_(false)
00513   , constructedWithFilledGraph_(false)
00514   , fillComplete_(false)
00515   , storageOptimized_(false) {
00516     try {
00517       graph_ = Teuchos::rcp( new Tpetra::CrsGraph<LocalOrdinal,GlobalOrdinal,Node>(rowMap,NumEntriesPerRowToAlloc,pftype) );
00518     }
00519     catch (std::exception &e) {
00520       TEST_FOR_EXCEPTION(true, std::runtime_error,
00521           Teuchos::typeName(*this) << "::CrsMatrix(): caught exception while allocating CrsGraph object: " 
00522           << std::endl << e.what() << std::endl);
00523     }
00524     checkInternalState();
00525   }
00526 
00527   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
00528   CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::CrsMatrix(
00529                                           const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > &rowMap, 
00530                                           const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > &colMap, 
00531                                           size_t maxNumEntriesPerRow, 
00532                                           ProfileType pftype)
00533   : lclMatrix_(rowMap->getNodeNumElements(), rowMap->getNode())
00534   , lclMatVec_(rowMap->getNode())
00535   , lclMatSolve_(rowMap->getNode())
00536   , valuesAreAllocated_(false)
00537   , staticGraph_(false)
00538   , constructedWithFilledGraph_(false)
00539   , fillComplete_(false)
00540   , storageOptimized_(false) {
00541     try {
00542       graph_ = Teuchos::rcp( new Tpetra::CrsGraph<LocalOrdinal,GlobalOrdinal,Node>(rowMap,colMap,maxNumEntriesPerRow,pftype) );
00543     }
00544     catch (std::exception &e) {
00545       TEST_FOR_EXCEPTION(true, std::runtime_error,
00546           Teuchos::typeName(*this) << "::CrsMatrix(): caught exception while allocating CrsGraph object: " 
00547           << std::endl << e.what() << std::endl);
00548     }
00549     checkInternalState();
00550   }
00551 
00552   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
00553   CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::CrsMatrix(
00554                                           const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > &rowMap, 
00555                                           const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > &colMap, 
00556                                           const Teuchos::ArrayRCP<const size_t> &NumEntriesPerRowToAlloc, 
00557                                           ProfileType pftype)
00558   : lclMatrix_(rowMap->getNodeNumElements(), rowMap->getNode())
00559   , lclMatVec_(rowMap->getNode())
00560   , lclMatSolve_(rowMap->getNode())
00561   , valuesAreAllocated_(false)
00562   , staticGraph_(false)
00563   , constructedWithFilledGraph_(false)
00564   , fillComplete_(false)
00565   , storageOptimized_(false) {
00566     try {
00567       graph_ = Teuchos::rcp( new Tpetra::CrsGraph<LocalOrdinal,GlobalOrdinal,Node>(rowMap,NumEntriesPerRowToAlloc,pftype) );
00568     }
00569     catch (std::exception &e) {
00570       TEST_FOR_EXCEPTION(true, std::runtime_error,
00571           Teuchos::typeName(*this) << "::CrsMatrix(): caught exception while allocating CrsGraph object: " 
00572           << std::endl << e.what() << std::endl);
00573     }
00574     checkInternalState();
00575   }
00576 
00577   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
00578   CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::CrsMatrix(const Teuchos::RCP< CrsGraph<LocalOrdinal,GlobalOrdinal,Node> > &graph)
00579   : graph_(graph)
00580   , lclMatrix_(graph->getRowMap()->getNodeNumElements(), graph->getRowMap()->getNode())
00581   , lclMatVec_(graph->getNode())
00582   , lclMatSolve_(graph->getNode())
00583   , valuesAreAllocated_(false)
00584   , staticGraph_(true)
00585   , fillComplete_(graph->isFillComplete())
00586   , storageOptimized_(graph->isStorageOptimized()) {
00587     TEST_FOR_EXCEPTION(graph_ == Teuchos::null, std::runtime_error,
00588         Teuchos::typeName(*this) << "::CrsMatrix(graph): specified pointer is null.");
00589     // we won't prohibit the case where the graph is not yet filled, but we will check below to ensure that the
00590     // graph isn't filled between now and when fillComplete() is called on this matrix.
00591     // the user is not allowed to modify the graph. this check is about the most that we can do to check whether they have.
00592     constructedWithFilledGraph_ = graph_->isFillComplete();
00593     checkInternalState();
00594   }
00595 
00596 
00597   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
00598   CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::~CrsMatrix() {
00599   }
00600 
00601 
00602   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
00603   const Teuchos::RCP<const Teuchos::Comm<int> > &
00604   CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::getComm() const {
00605     return graph_->getComm();
00606   }
00607 
00608 
00609   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
00610   Teuchos::RCP<Node>
00611   CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::getNode() const {
00612     return lclMatVec_.getNode();
00613   }
00614 
00615 
00616   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
00617   ProfileType CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::getProfileType() const {
00618     return graph_->getProfileType();
00619   }
00620 
00621 
00622   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
00623   bool CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::isFillComplete() const {
00624     return fillComplete_; 
00625   }
00626 
00627 
00628   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
00629   bool CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::isStorageOptimized() const {
00630     return storageOptimized_; 
00631   }
00632 
00633 
00634   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
00635   bool CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::isLocallyIndexed() const {
00636     return graph_->isLocallyIndexed();
00637   }
00638 
00639 
00640   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
00641   bool CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::isGloballyIndexed() const {
00642     return graph_->isGloballyIndexed();
00643   }
00644 
00645 
00646   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
00647   bool CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::hasColMap() const {
00648     return graph_->hasColMap();
00649   }
00650 
00651 
00652   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
00653   global_size_t CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::getGlobalNumEntries() const {
00654     return graph_->getGlobalNumEntries();
00655   }
00656 
00657 
00658   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
00659   size_t CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::getNodeNumEntries() const {
00660     return graph_->getNodeNumEntries();
00661   }
00662 
00663 
00664   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
00665   global_size_t CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::getGlobalNumRows() const {
00666     return graph_->getGlobalNumRows(); 
00667   }
00668 
00669 
00670   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
00671   global_size_t CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::getGlobalNumCols() const { 
00672     return graph_->getGlobalNumCols(); 
00673   }
00674 
00675 
00676   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
00677   size_t CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::getNodeNumRows() const { 
00678     return graph_->getNodeNumRows(); 
00679   }
00680 
00681 
00682   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
00683   size_t CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::getNodeNumCols() const { 
00684     return graph_->getNodeNumCols(); 
00685   }
00686 
00687 
00688   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
00689   global_size_t CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::getGlobalNumDiags() const { 
00690     return graph_->getGlobalNumDiags(); 
00691   }
00692 
00693 
00694   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
00695   size_t CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::getNodeNumDiags() const { 
00696     return graph_->getNodeNumDiags(); 
00697   }
00698 
00699 
00700   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
00701   size_t CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::getNumEntriesInGlobalRow(GlobalOrdinal globalRow) const { 
00702     return graph_->getNumEntriesInGlobalRow(globalRow); 
00703   }
00704 
00705 
00706   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
00707   size_t CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::getNumEntriesInLocalRow(LocalOrdinal localRow) const { 
00708     return graph_->getNumEntriesInLocalRow(localRow);
00709   }
00710 
00711 
00712   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
00713   size_t CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::getGlobalMaxNumRowEntries() const { 
00714     return graph_->getGlobalMaxNumRowEntries(); 
00715   }
00716 
00717 
00718   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
00719   size_t CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::getNodeMaxNumRowEntries() const { 
00720     return graph_->getNodeMaxNumRowEntries(); 
00721   }
00722 
00723 
00724   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
00725   GlobalOrdinal CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::getIndexBase() const { 
00726     return getRowMap()->getIndexBase(); 
00727   }
00728 
00729 
00730   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
00731   const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > & 
00732   CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::getRowMap() const { 
00733     return graph_->getRowMap(); 
00734   }
00735 
00736 
00737   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
00738   const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > & 
00739   CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::getColMap() const {
00740     return graph_->getColMap(); 
00741   }
00742 
00743 
00744   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
00745   const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > & 
00746   CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::getDomainMap() const { 
00747     return graph_->getDomainMap(); 
00748   }
00749 
00750 
00751   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
00752   const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > & 
00753   CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::getRangeMap() const { 
00754     return graph_->getRangeMap(); 
00755   }
00756 
00757 
00758   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
00759   Teuchos::RCP<const RowGraph<LocalOrdinal,GlobalOrdinal,Node> >
00760   CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::getGraph() const { 
00761     return graph_; 
00762   }
00763 
00764 
00765   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
00766   Teuchos::RCP<const CrsGraph<LocalOrdinal,GlobalOrdinal> >
00767   CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::getCrsGraph() const { 
00768     return graph_; 
00769   }
00770 
00771 
00772   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
00773   bool CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::isLowerTriangular() const { 
00774     return graph_->isLowerTriangular(); 
00775   }
00776 
00777 
00778   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
00779   bool CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::isUpperTriangular() const { 
00780     return graph_->isUpperTriangular(); 
00781   }
00782 
00783 
00784   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
00785   bool CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::isStaticGraph() const { 
00786     return staticGraph_; 
00787   }
00788 
00789 
00790   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
00791   bool CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::hasTransposeApply() const {
00792     return true;
00793   }
00794 
00795   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
00796   bool CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::hasTransposeApplyInverse() const {
00797     return true;
00798   }
00799       
00800 
00803   //                                                                         //
00804   //                    Internal utility methods                             //
00805   //                                                                         //
00808 
00809 
00812   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
00813   void CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::allocateValues(Scalar alpha) {
00814 #ifdef HAVE_TPETRA_DEBUG
00815     // need graph_->getNodeAllocationSize(), but it is only available after the graph indices are allocated. we will 
00816     // internally enforce that the graph is allocated before allocateValues() is called. this test serves to make sure
00817     // that we have done so.
00818     TEST_FOR_EXCEPTION( graph_->indicesAreAllocated() == false, std::logic_error,
00819         Teuchos::typeName(*this) << "::allocateValues(): Internal logic error. Please contact Tpetra team.");
00820 #endif
00821     const size_t nlrs = getRowMap()->getNodeNumElements(),
00822                   nta = graph_->getNodeAllocationSize(),
00823            numEntries = graph_->getNodeNumEntries();
00824     if (valuesAreAllocated_) {
00825       return;
00826     }
00827     // do we even have anything to allocate?
00828     if (nta > 0) {
00829       Teuchos::RCP<Node> node = lclMatrix_.getNode();
00830       Kokkos::ReadyBufferHelper<Node> rbh(node);
00832       if (getProfileType() == StaticProfile) {
00833         // allocate values
00834         pbuf_values1D_ = node->template allocBuffer<Scalar>(nta);
00835         // init values in parallel, if the graph already has valid entries
00836         if (numEntries > 0) {
00837           InitOp<Scalar> wdp;
00838           wdp.val = alpha;
00839           rbh.begin();
00840           wdp.x   = rbh.addNonConstBuffer(pbuf_values1D_);
00841           rbh.end();
00842           node->template parallel_for<InitOp<Scalar> >(0,nta,wdp);
00843         }
00844       }
00846       else { // if getProfileType() == DynamicProfile
00847         InitOp<Scalar> wdp;
00848         wdp.val = alpha;
00849         // allocate array of buffers
00850         pbuf_values2D_ = Teuchos::arcp< Teuchos::ArrayRCP<Scalar> >(nlrs);
00851         for (size_t r=0; r<nlrs; ++r) {
00852           // this call to getNumAllocatedEntries() is cheap for the DynamicProfile case
00853           const size_t ntarow = graph_->getNumAllocatedEntriesInLocalRow(r),
00854                         nErow = graph_->getNumEntriesInLocalRow(r);
00855           if (ntarow > 0) {
00856             // allocate values for this row
00857             pbuf_values2D_[r] = node->template allocBuffer<Scalar>(ntarow);
00858             // initi values in parallel, if the graph already has valid entries
00859             if (nErow > 0) {
00860               rbh.begin();
00861               wdp.x   = rbh.addNonConstBuffer(pbuf_values2D_[r]);
00862               rbh.end();
00863               node->template parallel_for<InitOp<Scalar> >(0,ntarow,wdp);
00864             }
00865           }
00866         }
00867       }
00868     }
00869     valuesAreAllocated_ = true;
00870   }
00871 
00872 
00875   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
00876   void CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::checkInternalState() const {
00877 #ifdef HAVE_TPETRA_DEBUG
00878     Teuchos::RCP<Node> node = getNode();
00879     using Teuchos::null;
00880     std::string err = Teuchos::typeName(*this) + "::checkInternalState(): Likely internal logic error. Please contact Tpetra team.";
00881     // check the internal state of this data structure
00882     // this is called by numerous state-changing methods, in a debug build, to ensure that the object 
00883     // always remains in a valid state
00884 
00885     // constructedWithFilledGraph_ should only be true if matrix was constructed with a graph, in which case staticGraph_ should be true
00886     TEST_FOR_EXCEPTION( staticGraph_ == false && constructedWithFilledGraph_ == true,                  std::logic_error, err ); 
00887     // matrix values cannot be allocated without the graph indices being allocated
00888     TEST_FOR_EXCEPTION( valuesAreAllocated_ == true && graph_->indicesAreAllocated() == false,         std::logic_error, err );
00889     // if matrix is fill complete, then graph must be fill complete
00890     TEST_FOR_EXCEPTION( fillComplete_ == true && graph_->isFillComplete() == false,                    std::logic_error, err );
00891     // if matrix is storage optimized, then graph must be storage optimizied
00892     TEST_FOR_EXCEPTION( storageOptimized_   != graph_->isStorageOptimized(),                           std::logic_error, err );
00893     // if matrix is storage optimized, it must have been fill completed
00894     TEST_FOR_EXCEPTION( storageOptimized_ == true && fillComplete_ == false,                           std::logic_error, err );
00895     // if matrix is storage optimized, it should have a 1D allocation 
00896     TEST_FOR_EXCEPTION( storageOptimized_ == true && pbuf_values2D_ != Teuchos::null,                  std::logic_error, err );
00897     // if matrix/graph are static profile, then 2D allocation should not be present
00898     TEST_FOR_EXCEPTION( graph_->getProfileType() == StaticProfile  && pbuf_values2D_ != Teuchos::null, std::logic_error, err );
00899     // if matrix/graph are dynamic profile, then 1D allocation should not be present
00900     TEST_FOR_EXCEPTION( graph_->getProfileType() == DynamicProfile && pbuf_values1D_ != Teuchos::null, std::logic_error, err );
00901     // if values are allocated and they are non-zero in number, then one of the allocations should be present
00902     TEST_FOR_EXCEPTION( valuesAreAllocated_ == true && graph_->getNodeAllocationSize() && pbuf_values2D_ == Teuchos::null && pbuf_values1D_ == Teuchos::null,
00903                         std::logic_error, err );
00904     // we can nae have both a 1D and 2D allocation
00905     TEST_FOR_EXCEPTION( pbuf_values1D_ != Teuchos::null && pbuf_values2D_ != Teuchos::null, std::logic_error, err );
00906     // compare matrix allocations against graph allocations
00907     if (valuesAreAllocated_ && graph_->getNodeAllocationSize() > 0) {
00908       if (graph_->getProfileType() == StaticProfile) {
00909         if (graph_->isLocallyIndexed()) {
00910           TEST_FOR_EXCEPTION( pbuf_values1D_.size() != graph_->pbuf_lclInds1D_.size(),  std::logic_error, err );
00911         }
00912         else {
00913           TEST_FOR_EXCEPTION( pbuf_values1D_.size() != graph_->pbuf_gblInds1D_.size(),  std::logic_error, err );
00914         }
00915       } 
00916       else { // graph_->getProfileType() == DynamicProfile
00917         if (graph_->isLocallyIndexed()) {
00918           for (size_t r=0; r < getNodeNumRows(); ++r) {
00919             TEST_FOR_EXCEPTION( (pbuf_values2D_[r] == Teuchos::null) != (graph_->pbuf_lclInds2D_[r] == Teuchos::null), std::logic_error, err );
00920             if (pbuf_values2D_[r] != Teuchos::null && graph_->pbuf_lclInds2D_[r] != Teuchos::null) {
00921               TEST_FOR_EXCEPTION( pbuf_values2D_[r].size() != graph_->pbuf_lclInds2D_[r].size(), std::logic_error, err );
00922             }
00923           }
00924         }
00925         else {
00926           for (size_t r=0; r < getNodeNumRows(); ++r) {
00927             TEST_FOR_EXCEPTION( (pbuf_values2D_[r] == Teuchos::null) != (graph_->pbuf_gblInds2D_[r] == Teuchos::null), std::logic_error, err );
00928             if (pbuf_values2D_[r] != Teuchos::null && graph_->pbuf_gblInds2D_[r] != Teuchos::null) {
00929               TEST_FOR_EXCEPTION( pbuf_values2D_[r].size() != graph_->pbuf_gblInds2D_[r].size(), std::logic_error, err );
00930             }
00931           }
00932         }
00933       }
00934     }
00935 #endif
00936   }
00937 
00938 
00941   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
00942   Teuchos::ArrayRCP<const Scalar> 
00943   CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::getFullView(size_t myRow, RowInfo sizeInfo) const {
00944 #ifdef HAVE_TPETRA_DEBUG
00945     std::string err = Teuchos::typeName(*this) + "::getFullView(): Internal logic error. Please contact Tpetra team.";
00946     TEST_FOR_EXCEPTION(getRowMap()->isNodeLocalElement(myRow) == false, std::logic_error, err);
00947 #endif
00948     Teuchos::ArrayRCP<const Scalar> values = Teuchos::null;
00949     // sizeInfo indicates the allocation size for this row, whether it has actually been allocated or not
00950     if (sizeInfo.allocSize > 0 && valuesAreAllocated_) {
00951       Teuchos::RCP<Node> node = getNode();
00952       if (graph_->getProfileType() == StaticProfile) {
00953         values = node->template viewBuffer<Scalar>(sizeInfo.allocSize, pbuf_values1D_ + sizeInfo.offset1D);
00954       }
00955       else {  // dynamic profile
00956         values = node->template viewBuffer<Scalar>(sizeInfo.allocSize, pbuf_values2D_[myRow]);
00957       }
00958     }
00959 #ifdef HAVE_TPETRA_DEBUG
00960     TEST_FOR_EXCEPTION(valuesAreAllocated_ == true && values != Teuchos::null && static_cast<size_t>(values.size()) != sizeInfo.allocSize, std::logic_error, err);
00961 #endif
00962     return values;
00963   }
00964 
00965 
00968   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
00969   Teuchos::ArrayRCP<Scalar> 
00970   CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::getFullViewNonConst(size_t myRow, RowInfo sizeInfo) {
00971 #ifdef HAVE_TPETRA_DEBUG
00972     std::string err = Teuchos::typeName(*this) + "::getFullViewNonConst(): Internal logic error. Please contact Tpetra team.";
00973     TEST_FOR_EXCEPTION(getRowMap()->isNodeLocalElement(myRow) == false, std::logic_error, err);
00974 #endif
00975     Teuchos::ArrayRCP<Scalar> values = Teuchos::null;
00976     // sizeInfo indicates the allocation size for this row, whether it has actually been allocated or not
00977     if (sizeInfo.allocSize > 0 && valuesAreAllocated_) {
00978       Teuchos::RCP<Node> node = getNode();
00979       // if there are no valid entries, then this view can be constructed WriteOnly
00980       Kokkos::ReadWriteOption rw = (sizeInfo.numEntries == 0 ? Kokkos::WriteOnly : Kokkos::ReadWrite);
00981       if (graph_->getProfileType() == StaticProfile) {
00982         values = node->template viewBufferNonConst<Scalar>(rw, sizeInfo.allocSize, pbuf_values1D_ + sizeInfo.offset1D);
00983       }
00984       else {  // dynamic profile
00985         values = node->template viewBufferNonConst<Scalar>(rw, sizeInfo.allocSize, pbuf_values2D_[myRow]);
00986       }
00987     }
00988 #ifdef HAVE_TPETRA_DEBUG
00989     TEST_FOR_EXCEPTION(valuesAreAllocated_ == true && values != Teuchos::null && static_cast<size_t>(values.size()) != sizeInfo.allocSize, std::logic_error, err);
00990 #endif
00991     return values;
00992   }
00993 
00994 
00997   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
00998   void CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::updateAllocation(size_t lrow, size_t allocSize) {
00999     using Teuchos::ArrayRCP;
01000     Teuchos::RCP<Node> node = getNode();
01001     RowInfo sizeInfo = graph_->getRowInfo(lrow);
01002     if (pbuf_values2D_ == Teuchos::null) {
01003       pbuf_values2D_ = Teuchos::arcp< ArrayRCP<Scalar> >(getNodeNumRows());
01004     }
01005 #ifdef HAVE_TPETRA_DEBUG
01006     TEST_FOR_EXCEPT( getRowMap()->isNodeLocalElement(lrow) == false );
01007     TEST_FOR_EXCEPT( pbuf_values2D_[lrow] != Teuchos::null && allocSize < static_cast<size_t>(pbuf_values2D_[lrow].size()) );
01008     TEST_FOR_EXCEPT( allocSize == 0 );
01009 #endif
01010     ArrayRCP<Scalar> old_row, new_row;
01011     old_row = pbuf_values2D_[lrow];
01012     new_row = node->template allocBuffer<Scalar>(allocSize);
01013     if (sizeInfo.numEntries) {
01014       node->template copyBuffers<Scalar>(sizeInfo.numEntries,old_row,new_row);
01015     }
01016     old_row = Teuchos::null;
01017     pbuf_values2D_[lrow] = new_row;
01018   }
01019 
01020 
01023   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
01024   void CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::fillLocalMatrix() {
01025     lclMatrix_.clear();
01026     if (storageOptimized_) {
01027       // fill packed matrix; it is okay for pbuf_values1D_ to be null; the matrix will flag itself as empty
01028       lclMatrix_.setPackedValues(pbuf_values1D_);
01029     }
01030     else if (graph_->getProfileType() == StaticProfile) {
01031       if (pbuf_values1D_ != Teuchos::null) {
01032         const size_t nlrs = getNodeNumRows();
01033         for (size_t r=0; r < nlrs; ++r) {
01034           RowInfo sizeInfo = graph_->getRowInfo(r);
01035           Teuchos::ArrayRCP<const Scalar> rowvals;
01036           if (sizeInfo.numEntries > 0) {
01037             rowvals = pbuf_values1D_.persistingView(sizeInfo.offset1D, sizeInfo.numEntries);
01038             lclMatrix_.set2DValues(r,rowvals);
01039           }
01040         }
01041       }
01042     }
01043     else if (graph_->getProfileType() == DynamicProfile) {
01044       if (pbuf_values2D_ != Teuchos::null) {
01045         const size_t nlrs = getNodeNumRows();
01046         for (size_t r=0; r < nlrs; ++r) {
01047           RowInfo sizeInfo = graph_->getRowInfo(r);
01048           Teuchos::ArrayRCP<const Scalar> rowvals = pbuf_values2D_[r];
01049           if (sizeInfo.numEntries > 0) {
01050             rowvals = rowvals.persistingView(0,sizeInfo.numEntries);
01051             lclMatrix_.set2DValues(r,rowvals);
01052           }
01053         }
01054       }
01055     }
01056 
01057     // submit local matrix and local graph to lclMatVec_ and lclMatSolve_
01058     // lclMatVec_ and lclMatSolve_ are permitted to view, but we don't care whether they do or not
01059     lclMatVec_.clear();
01060     lclMatSolve_.clear();
01061     Teuchos::DataAccess ret;
01062     ret = lclMatVec_.initializeStructure( graph_->lclGraph_, Teuchos::View );
01063     ret = lclMatVec_.initializeValues( lclMatrix_, Teuchos::View );
01064     if (isLowerTriangular() || isUpperTriangular()) {
01065       ret = lclMatSolve_.initializeStructure( graph_->lclGraph_, Teuchos::View );
01066       ret = lclMatSolve_.initializeValues( lclMatrix_, Teuchos::View );
01067     }
01068   }
01069 
01070 
01073   //                                                                         //
01074   //                  User-visible class methods                             //
01075   //                                                                         //
01078 
01079 
01082   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
01083   void CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::insertLocalValues(
01084                                                          LocalOrdinal localRow, 
01085                          const Teuchos::ArrayView<const LocalOrdinal> &indices,
01086                          const Teuchos::ArrayView<const Scalar>       &values) {
01087     using Teuchos::ArrayRCP;
01088     TEST_FOR_EXCEPTION(isStorageOptimized() == true, std::runtime_error,
01089         Teuchos::typeName(*this) << "::insertLocalValues(): cannot insert new values after optimizeStorage() has been called.");
01090     TEST_FOR_EXCEPTION(graph_->isGloballyIndexed() == true, std::runtime_error,
01091         Teuchos::typeName(*this) << "::insertLocalValues(): graph indices are global; use insertGlobalValues().");
01092     TEST_FOR_EXCEPTION(hasColMap() == false, std::runtime_error,
01093         Teuchos::typeName(*this) << "::insertLocalValues(): cannot insert local indices without a column map; ");
01094     TEST_FOR_EXCEPTION(isStaticGraph() == true, std::runtime_error,
01095         Teuchos::typeName(*this) << "::insertLocalValues(): matrix was constructed with static graph; cannot insert new entries.");
01096     TEST_FOR_EXCEPTION(values.size() != indices.size(), std::runtime_error,
01097         Teuchos::typeName(*this) << "::insertLocalValues(): values.size() must equal indices.size().");
01098     TEST_FOR_EXCEPTION(getRowMap()->isNodeLocalElement(localRow) == false, std::runtime_error,
01099         Teuchos::typeName(*this) << "::insertLocalValues(): row does not belong to this node.");
01100     Teuchos::Array<LocalOrdinal> finds;
01101     Teuchos::Array<Scalar>       fvals;
01102     finds.reserve(indices.size());
01103     fvals.reserve(values.size());
01104     // use column map to filter the entries:
01105     const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> &cmap = *getColMap();
01106     for (size_t i=0; i < static_cast<size_t>(indices.size()); ++i) {
01107       if (cmap.isNodeLocalElement(indices[i])) {
01108         finds.push_back(indices[i]);
01109         fvals.push_back(values[i]);
01110       }
01111     }
01112     if (finds.size() > 0) {
01113       if (valuesAreAllocated_ == false) {
01114         // allocate graph, so we can access views below
01115         if (graph_->indicesAreAllocated() == false || graph_->indicesAreAllocated() == false) {
01116           graph_->allocateIndices( CrsGraph<LocalOrdinal,GlobalOrdinal,Node>::AllocateLocal );
01117         }
01118         // graph must be allocated before matrix, because matrix needs to know the total allocation of the graph
01119         allocateValues();
01120 #ifdef HAVE_TPETRA_DEBUG
01121         TEST_FOR_EXCEPTION(valuesAreAllocated_ == false, std::logic_error, 
01122             Teuchos::typeName(*this) << "::insertLocalValues(): Internal logic error. Please contact Tpetra team.");
01123 #endif
01124       }
01125       //
01126       ArrayRCP<LocalOrdinal> rowindsview;
01127       ArrayRCP<Scalar>       rowvalsview;
01128       RowInfo sizeInfo = graph_->getFullLocalViewNonConst(localRow, rowindsview);
01129       rowvalsview = getFullViewNonConst(localRow, sizeInfo);
01130       const size_t newSize = sizeInfo.numEntries + finds.size();
01131       if (newSize > sizeInfo.allocSize) {
01132         TEST_FOR_EXCEPTION(graph_->getProfileType() == StaticProfile, std::runtime_error,
01133             Teuchos::typeName(*this) << "::insertLocalValues(): new indices exceed statically allocated graph structure.");
01134         TPETRA_EFFICIENCY_WARNING(true,std::runtime_error,
01135             "::insertLocalValues(): Pre-allocated space has been exceeded, requiring new allocation. To improve efficiency, suggest larger allocation.");
01136         // update allocation only as much as necessary
01137         graph_->updateLocalAllocation(localRow,newSize);
01138         updateAllocation(localRow,newSize);
01139         // get new views; inefficient, but acceptible in this already inefficient case
01140         sizeInfo = graph_->getFullLocalViewNonConst(localRow, rowindsview);
01141         rowvalsview = getFullViewNonConst(localRow, sizeInfo);
01142       }
01143       // add new indices, values to graph and matrix rows
01144       graph_->insertLocalIndicesViaView( localRow, finds, rowindsview + sizeInfo.numEntries );
01145       std::copy( fvals.begin(), fvals.end(), rowvalsview + sizeInfo.numEntries);
01146 #ifdef HAVE_TPETRA_DEBUG
01147       {
01148         RowInfo sizeInfoNew = graph_->getRowInfo(localRow);
01149         TEST_FOR_EXCEPTION(sizeInfoNew.numEntries != newSize, std::logic_error,
01150             Teuchos::typeName(*this) << "::insertLocalValues(): Internal logic error. Please contact Tpetra team.");
01151       }
01152 #endif
01153       // 
01154       rowindsview = Teuchos::null;
01155       rowvalsview = Teuchos::null;
01156       // checkInternalState();
01157     }
01158   }
01159 
01160 
01163   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
01164   void CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::insertGlobalValues(GlobalOrdinal globalRow, 
01165                          const Teuchos::ArrayView<const GlobalOrdinal> &indices,
01166                          const Teuchos::ArrayView<const Scalar>        &values) {
01167     using Teuchos::ArrayRCP;
01168     TEST_FOR_EXCEPTION(graph_->isLocallyIndexed() == true, std::runtime_error,
01169         Teuchos::typeName(*this) << "::insertGlobalValues(): graph indices are local; use insertLocalValues().");
01170     TEST_FOR_EXCEPTION(isStaticGraph() == true, std::runtime_error,
01171         Teuchos::typeName(*this) << "::insertGlobalValues(): matrix was constructed with static graph. Cannot insert new entries.");
01172     TEST_FOR_EXCEPTION(values.size() != indices.size(), std::runtime_error,
01173         Teuchos::typeName(*this) << "::insertGlobalValues(): values.size() must equal indices.size().");
01174     const LocalOrdinal myRow = getRowMap()->getLocalElement(globalRow);
01175     if (myRow != LOT::invalid()) {
01176       // if we have a column map, use it to filter the entries.
01177       // only filter if this is our row.
01178       Teuchos::Array<GlobalOrdinal> finds_is_temporary;
01179       Teuchos::Array<Scalar>        fvals_is_temporary;
01180       Teuchos::ArrayView<const GlobalOrdinal> findices = indices;
01181       Teuchos::ArrayView<const Scalar       > fvalues  = values;
01182       if (hasColMap()) {
01183         // filter indices and values through the column map
01184         finds_is_temporary.reserve(indices.size());
01185         fvals_is_temporary.reserve(values.size());
01186         const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> &cmap = *getColMap();
01187         for (size_t i=0; i< static_cast<size_t>(indices.size()); ++i) {
01188           if (cmap.isNodeGlobalElement(indices[i])) {
01189             finds_is_temporary.push_back(indices[i]);
01190             fvals_is_temporary.push_back(values[i]);
01191           }
01192         }
01193         findices = finds_is_temporary();
01194         fvalues  = fvals_is_temporary();
01195       }
01196       // add the new indices and values
01197       if (findices.size() > 0) {
01198         if (valuesAreAllocated_ == false) {
01199           // allocate graph, so we can access views below
01200           if (graph_->indicesAreAllocated() == false) {
01201             graph_->allocateIndices( CrsGraph<LocalOrdinal,GlobalOrdinal,Node>::AllocateGlobal );
01202           }
01203           // graph must be allocated before matrix, because matrix needs to know the total allocation of the graph
01204           allocateValues();
01205 #ifdef HAVE_TPETRA_DEBUG
01206           TEST_FOR_EXCEPTION(valuesAreAllocated_ == false || graph_->indicesAreAllocated() == false, std::logic_error, 
01207               Teuchos::typeName(*this) << "::insertGlobalValues(): Internal logic error. Please contact Tpetra team.");
01208 #endif
01209         }
01210         // 
01211         ArrayRCP<GlobalOrdinal> rowindsview;
01212         ArrayRCP<Scalar>        rowvalsview;
01213         RowInfo sizeInfo = graph_->getFullGlobalViewNonConst(myRow, rowindsview);
01214         rowvalsview = getFullViewNonConst(myRow, sizeInfo);
01215         const size_t newSize = sizeInfo.numEntries + findices.size();
01216         if (newSize > sizeInfo.allocSize) {
01217           TEST_FOR_EXCEPTION(graph_->getProfileType() == StaticProfile, std::runtime_error,
01218               Teuchos::typeName(*this) << "::insertGlobalValues(): new indices exceed statically allocated graph structure.");
01219           TPETRA_EFFICIENCY_WARNING(true,std::runtime_error,
01220               "::insertGlobalValues(): Pre-allocated space has been exceeded, requiring new allocation. To improve efficiency, suggest larger allocation.");
01221           // update allocation only as much as necessary
01222           graph_->updateGlobalAllocation(myRow,newSize);
01223           updateAllocation(myRow,newSize);
01224           // get new views; inefficient, but acceptible in this already inefficient case
01225           sizeInfo = graph_->getFullGlobalViewNonConst(myRow, rowindsview);
01226           rowvalsview = getFullViewNonConst(myRow, sizeInfo);
01227         }
01228         // add new indices, values to graph and matrix rows
01229         graph_->insertGlobalIndicesViaView( myRow, findices, rowindsview + sizeInfo.numEntries );
01230         std::copy(  fvalues.begin(),  fvalues.end(), rowvalsview + sizeInfo.numEntries );
01231 #ifdef HAVE_TPETRA_DEBUG
01232         {
01233           RowInfo sizeInfoNew = graph_->getRowInfo(myRow);
01234           TEST_FOR_EXCEPTION(sizeInfoNew.numEntries != newSize, std::logic_error,
01235               Teuchos::typeName(*this) << "::insertGlobalValues(): Internal logic error. Please contact Tpetra team.");
01236         }
01237 #endif
01238         // 
01239         rowindsview = Teuchos::null;
01240         rowvalsview = Teuchos::null;
01241         // checkInternalState();
01242       }
01243     }
01244     else {
01245       typename Teuchos::ArrayView<const GlobalOrdinal>::iterator ind = indices.begin();
01246       typename Teuchos::ArrayView<const Scalar       >::iterator val =  values.begin();
01247       for (; val != values.end(); ++val, ++ind) {
01248         nonlocals_[globalRow].push_back(std::make_pair(*ind, *val));
01249       }
01250     }
01251   }
01252 
01253 
01256   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
01257   void CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::replaceGlobalValues(      
01258                                         GlobalOrdinal globalRow, 
01259                                         const Teuchos::ArrayView<const GlobalOrdinal> &indices,
01260                                         const Teuchos::ArrayView<const Scalar>        &values) {
01261     using Teuchos::ArrayRCP;
01262     // find the values for the specified indices
01263     // if the row is not ours, throw an exception
01264     // ignore values not in the matrix (indices not found)
01265     // operate whether indices are local or global
01266     const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid();
01267     TEST_FOR_EXCEPTION(values.size() != indices.size(), std::runtime_error,
01268         Teuchos::typeName(*this) << "::replaceGlobalValues(): values.size() must equal indices.size().");
01269     typename Teuchos::ArrayView<const GlobalOrdinal>::iterator ind = indices.begin();
01270     typename Teuchos::ArrayView<const        Scalar>::iterator val = values.begin();
01271     LocalOrdinal lrow = getRowMap()->getLocalElement(globalRow);
01272     TEST_FOR_EXCEPTION(lrow == LOT::invalid(), std::runtime_error,
01273         Teuchos::typeName(*this) << "::replaceGlobalValues(): specified global row does not belong to this processor.");
01274     //
01275     if (valuesAreAllocated_ == false) {
01276       // allocate graph, so we can access views below
01277       if (graph_->indicesAreAllocated() == false) {
01278         graph_->allocateIndices( CrsGraph<LocalOrdinal,GlobalOrdinal,Node>::AllocateGlobal );
01279       }
01280       // graph must be allocated before matrix, because matrix needs to know the total allocation of the graph
01281       allocateValues();
01282 #ifdef HAVE_TPETRA_DEBUG
01283       TEST_FOR_EXCEPTION(valuesAreAllocated_ == false || graph_->indicesAreAllocated() == false, std::logic_error, 
01284           Teuchos::typeName(*this) << "::insertLocalValues(): Internal logic error. Please contact Tpetra team.");
01285 #endif
01286     }
01287     // 
01288     if (isLocallyIndexed() == true) {
01289       ArrayRCP<const LocalOrdinal> lindrowview;
01290       RowInfo sizeInfo = graph_->getFullLocalView(lrow, lindrowview);
01291       ArrayRCP<Scalar> valrowview = getFullViewNonConst(lrow, sizeInfo);
01292       while (ind != indices.end()) {
01293         LocalOrdinal lind = getColMap()->getLocalElement(*ind);
01294         size_t loc = graph_->findLocalIndex(lrow,lind,lindrowview);
01295         if (loc != STINV) {
01296           valrowview[loc] = (*val);
01297         }
01298         ++ind;
01299         ++val;
01300       }
01301       valrowview = Teuchos::null;
01302       lindrowview = Teuchos::null;
01303     }
01304     else if (isGloballyIndexed() == true) {
01305       ArrayRCP<const GlobalOrdinal> gindrowview;
01306       RowInfo sizeInfo = graph_->getFullGlobalView(lrow, gindrowview);
01307       ArrayRCP<Scalar> valrowview = getFullViewNonConst(lrow, sizeInfo);
01308       while (ind != indices.end()) {
01309         size_t loc = graph_->findGlobalIndex(lrow,*ind,gindrowview);
01310         if (loc != STINV) {
01311           valrowview[loc] = (*val);
01312         }
01313         ++ind;
01314         ++val;
01315       }
01316       valrowview = Teuchos::null;
01317       gindrowview = Teuchos::null;
01318     }
01319     //else {
01320     // graph indices are not allocated, i.e., are non-existant; nothing to do
01321     //}
01322   }
01323 
01324 
01327   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
01328   void CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::sumIntoGlobalValues(GlobalOrdinal globalRow, 
01329                          const Teuchos::ArrayView<const GlobalOrdinal> &indices,
01330                          const Teuchos::ArrayView<const Scalar>        &values) {
01331     using Teuchos::ArrayRCP;
01332     // find the values for the specified indices
01333     // if the row is not ours, throw an exception
01334     // ignore values not in the matrix (indices not found)
01335     // operate whether indices are local or global
01336     const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid();
01337     TEST_FOR_EXCEPTION(values.size() != indices.size(), std::runtime_error,
01338         Teuchos::typeName(*this) << "::sumIntoGlobalValues(): values.size() must equal indices.size().");
01339     typename Teuchos::ArrayView<const GlobalOrdinal>::iterator ind = indices.begin();
01340     typename Teuchos::ArrayView<const        Scalar>::iterator val = values.begin();
01341     LocalOrdinal lrow = getRowMap()->getLocalElement(globalRow);
01342     TEST_FOR_EXCEPTION(lrow == LOT::invalid(), std::runtime_error,
01343         Teuchos::typeName(*this) << "::sumIntoGlobalValues(): specified global row does not belong to this processor.");
01344     //
01345     if (valuesAreAllocated_ == false) {
01346       // allocate graph, so we can access views below
01347       if (graph_->indicesAreAllocated() == false) {
01348         graph_->allocateIndices( CrsGraph<LocalOrdinal,GlobalOrdinal,Node>::AllocateGlobal );
01349       }
01350       // graph must be allocated before matrix, because matrix needs to know the total allocation of the graph
01351       allocateValues();
01352 #ifdef HAVE_TPETRA_DEBUG
01353       TEST_FOR_EXCEPTION(valuesAreAllocated_ == false || graph_->indicesAreAllocated() == false, std::logic_error, 
01354           Teuchos::typeName(*this) << "::insertLocalValues(): Internal logic error. Please contact Tpetra team.");
01355 #endif
01356     }
01357     //
01358     if (isLocallyIndexed() == true) {
01359       ArrayRCP<const LocalOrdinal> lindrowview;
01360       RowInfo sizeInfo = graph_->getFullLocalView(lrow, lindrowview);
01361       ArrayRCP<Scalar> valrowview = getFullViewNonConst(lrow, sizeInfo);
01362       while (ind != indices.end()) {
01363         LocalOrdinal lind = getColMap()->getLocalElement(*ind);
01364         size_t loc = graph_->findLocalIndex(lrow,lind,lindrowview);
01365         if (loc != STINV) {
01366           valrowview[loc] += (*val);
01367         }
01368         ++ind;
01369         ++val;
01370       }
01371       valrowview = Teuchos::null;
01372       lindrowview = Teuchos::null;
01373     }
01374     else if (isGloballyIndexed() == true) {
01375       ArrayRCP<const GlobalOrdinal> gindrowview;
01376       RowInfo sizeInfo = graph_->getFullGlobalView(lrow, gindrowview);
01377       ArrayRCP<Scalar> valrowview = getFullViewNonConst(lrow, sizeInfo);
01378       while (ind != indices.end()) {
01379         size_t loc = graph_->findGlobalIndex(lrow,*ind,gindrowview);
01380         if (loc != STINV) {
01381           valrowview[loc] += (*val);
01382         }
01383         ++ind;
01384         ++val;
01385       }
01386       valrowview = Teuchos::null;
01387       gindrowview = Teuchos::null;
01388     }
01389     //else {
01390       // indices are not allocated; nothing to do
01391     //}
01392   }
01393 
01394 
01397   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
01398   void CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::getLocalRowCopy(
01399                                 LocalOrdinal LocalRow, 
01400                                 const Teuchos::ArrayView<LocalOrdinal> &indices, 
01401                                 const Teuchos::ArrayView<Scalar>       &values,
01402                                 size_t &numEntries) const {
01403     using Teuchos::ArrayRCP;
01404     TEST_FOR_EXCEPTION(isGloballyIndexed()==true && hasColMap()==false, std::runtime_error,
01405         Teuchos::typeName(*this) << "::getLocalRowCopy(): local indices cannot be produced.");
01406     TEST_FOR_EXCEPTION(getRowMap()->isNodeLocalElement(LocalRow) == false, std::runtime_error,
01407         Teuchos::typeName(*this) << "::getLocalRowCopy(LocalRow,...): specified row (==" << LocalRow << ") is not valid on this node.");
01408     if (graph_->isLocallyIndexed()) {
01409       ArrayRCP<const LocalOrdinal> indrowview;
01410       ArrayRCP<const Scalar>       valrowview; 
01411       RowInfo sizeInfo = graph_->getFullLocalView(LocalRow, indrowview);
01412       valrowview = getFullView(LocalRow, sizeInfo);
01413       numEntries = sizeInfo.numEntries;
01414       TEST_FOR_EXCEPTION(static_cast<size_t>(indices.size()) < numEntries || static_cast<size_t>(values.size()) < numEntries, std::runtime_error, 
01415           Teuchos::typeName(*this) << "::getLocalRowCopy(LocalRow,indices,values): size of indices,values must be sufficient to store the specified row.");
01416       if (numEntries > 0) {
01417         std::copy( indrowview.begin(), indrowview.begin() + numEntries, indices.begin() );
01418         std::copy( valrowview.begin(), valrowview.begin() + numEntries, values.begin() );
01419       }
01420       valrowview = Teuchos::null;
01421       indrowview = Teuchos::null;
01422     }
01423     else if (graph_->isGloballyIndexed()) {
01424       ArrayRCP<const GlobalOrdinal> indrowview;
01425       ArrayRCP<const Scalar>        valrowview; 
01426       RowInfo sizeInfo = graph_->getFullGlobalView(LocalRow, indrowview);
01427       valrowview = getFullView(LocalRow, sizeInfo);
01428       numEntries = sizeInfo.numEntries;
01429       TEST_FOR_EXCEPTION(static_cast<size_t>(indices.size()) < numEntries || static_cast<size_t>(values.size()) < numEntries, std::runtime_error, 
01430           Teuchos::typeName(*this) << "::getLocalRowCopy(LocalRow,indices,values): size of indices,values must be sufficient to store the specified row.");
01431       if (numEntries > 0) {
01432         std::copy( valrowview.begin(), valrowview.begin() + numEntries, values.begin() );
01433       }
01434       for (size_t j=0; j < numEntries; ++j) {
01435         indices[j] = getColMap()->getLocalElement(indrowview[j]);
01436       }
01437       valrowview = Teuchos::null;
01438       indrowview = Teuchos::null;
01439     }
01440     else {
01441 #ifdef HAVE_TPETRA_DEBUG
01442       // should have fallen in one of the above
01443       TEST_FOR_EXCEPTION( valuesAreAllocated_ == true, std::logic_error, 
01444           Teuchos::typeName(*this) << "::getLocalRowCopy(): Internal logic error. Please contact Tpetra team.");
01445 #endif
01446       numEntries = 0;
01447     }
01448   }
01449 
01450 
01453   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
01454   void CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::getGlobalRowCopy(
01455                                 GlobalOrdinal globalRow, 
01456                                 const Teuchos::ArrayView<GlobalOrdinal> &indices,
01457                                 const Teuchos::ArrayView<Scalar>        &values,
01458                                 size_t &numEntries) const {
01459     using Teuchos::ArrayRCP;
01460     // Only locally owned rows can be queried, otherwise complain
01461     LocalOrdinal myRow = getRowMap()->getLocalElement(globalRow);
01462     TEST_FOR_EXCEPTION(myRow == LOT::invalid(), std::runtime_error,
01463         Teuchos::typeName(*this) << "::getGlobalRowCopy(globalRow,...): globalRow does not belong to this node.");
01464     if (graph_->isGloballyIndexed()) {
01465       ArrayRCP<const GlobalOrdinal> indrowview;
01466       ArrayRCP<const Scalar>        valrowview; 
01467       RowInfo sizeInfo = graph_->getFullGlobalView(myRow, indrowview);
01468       valrowview = getFullView(myRow, sizeInfo);
01469       numEntries = sizeInfo.numEntries;
01470       TEST_FOR_EXCEPTION(static_cast<size_t>(indices.size()) < numEntries || static_cast<size_t>(values.size()) < numEntries, std::runtime_error, 
01471           Teuchos::typeName(*this) << "::getGlobalRowCopy(GlobalRow,indices,values): size of indices,values must be sufficient to store the specified row.");
01472       if (numEntries > 0) {
01473         std::copy( indrowview.begin(), indrowview.begin() + numEntries, indices.begin() );
01474         std::copy( valrowview.begin(), valrowview.begin() + numEntries, values.begin() );
01475       }
01476       valrowview = Teuchos::null;
01477       indrowview = Teuchos::null;
01478     }
01479     else if (graph_->isLocallyIndexed()) {
01480       ArrayRCP<const LocalOrdinal> indrowview;
01481       ArrayRCP<const Scalar>       valrowview; 
01482       RowInfo sizeInfo = graph_->getFullLocalView(myRow, indrowview);
01483       valrowview = getFullView(myRow, sizeInfo);
01484       numEntries = sizeInfo.numEntries;
01485       TEST_FOR_EXCEPTION(static_cast<size_t>(indices.size()) < numEntries || static_cast<size_t>(values.size()) < numEntries, std::runtime_error, 
01486           Teuchos::typeName(*this) << "::getGlobalRowCopy(GlobalRow,indices,values): size of indices,values must be sufficient to store the specified row.");
01487       if (numEntries > 0) {
01488         std::copy( valrowview.begin(), valrowview.begin() + numEntries, values.begin() );
01489       }
01490       for (size_t j=0; j < numEntries; ++j) {
01491         indices[j] = getColMap()->getGlobalElement(indrowview[j]);
01492       }
01493       valrowview = Teuchos::null;
01494       indrowview = Teuchos::null;
01495     }
01496     else {
01497 #ifdef HAVE_TPETRA_DEBUG
01498       // should have fallen in one of the above
01499       TEST_FOR_EXCEPTION( valuesAreAllocated_ == true, std::logic_error, 
01500           Teuchos::typeName(*this) << "::getGlobalRowCopy(): Internal logic error. Please contact Tpetra team.");
01501 #endif
01502       numEntries = 0;
01503     }
01504   }
01505 
01506 
01509   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
01510   void CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::getGlobalRowView(
01511                                 GlobalOrdinal GlobalRow, 
01512                                 Teuchos::ArrayRCP<const GlobalOrdinal> &indices,
01513                                 Teuchos::ArrayRCP<const Scalar>        &values) const {
01514     TEST_FOR_EXCEPTION(isLocallyIndexed() == true, std::runtime_error,
01515         Teuchos::typeName(*this) << "::getGlobalRowView(): global indices do not exist; call getLocalRowView().");
01516     LocalOrdinal lrow = getRowMap()->getLocalElement(GlobalRow);
01517     TEST_FOR_EXCEPTION(lrow == LOT::invalid(), std::runtime_error,
01518         Teuchos::typeName(*this) << "::getGlobalRowView(GlobalRow,...): GlobalRow (== " << GlobalRow << ") does not belong to this node.");
01519     RowInfo sizeInfo = graph_->getFullGlobalView(lrow,indices);
01520     values = getFullView(lrow,sizeInfo);
01521     if (sizeInfo.numEntries != sizeInfo.allocSize) {
01522 #ifdef HAVE_TPETRA_DEBUG
01523       TEST_FOR_EXCEPTION( indices == Teuchos::null && values != Teuchos::null, std::logic_error, 
01524           Teuchos::typeName(*this) << "::getGlobalRowView(): Internal logic error. Please contact Tpetra team.");
01525 #endif
01526       if (indices != Teuchos::null) {
01527         indices = indices.persistingView(0,sizeInfo.numEntries);
01528         if (values != Teuchos::null) {
01529           values  =  values.persistingView(0,sizeInfo.numEntries);
01530         }
01531       }
01532     }
01533     return;
01534   }
01535 
01536 
01539   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
01540   void CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::getLocalRowView(
01541                                 LocalOrdinal LocalRow, 
01542                                 Teuchos::ArrayRCP<const LocalOrdinal> &indices,
01543                                 Teuchos::ArrayRCP<const Scalar>        &values) const {
01544     TEST_FOR_EXCEPTION(isGloballyIndexed() == true, std::runtime_error,
01545         Teuchos::typeName(*this) << "::getLocalRowView(): local indices do not exist; call getGlobalRowView().");
01546     TEST_FOR_EXCEPTION(getRowMap()->isNodeLocalElement(LocalRow) == false, std::runtime_error,
01547         Teuchos::typeName(*this) << "::getLocalRowView(LocalRow,...): LocalRow (== " << LocalRow << ") is not valid on this node.");
01548     RowInfo sizeInfo = graph_->getFullLocalView(LocalRow,indices);
01549     values = getFullView(LocalRow,sizeInfo);
01550     if (sizeInfo.numEntries != sizeInfo.allocSize) {
01551 #ifdef HAVE_TPETRA_DEBUG
01552       TEST_FOR_EXCEPTION( indices == Teuchos::null && values != Teuchos::null, std::logic_error, 
01553           Teuchos::typeName(*this) << "::getLocalRowView(): Internal logic error. Please contact Tpetra team.");
01554 #endif
01555       if (indices != Teuchos::null) {
01556         indices = indices.persistingView(0,sizeInfo.numEntries);
01557         if (values != Teuchos::null) {
01558           values  =  values.persistingView(0,sizeInfo.numEntries);
01559         }
01560       }
01561     }
01562     return;
01563   }
01564 
01565 
01568   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
01569   void CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::scale(const Scalar &alpha) {
01570     // scale all values in the matrix
01571     // it is easiest to scale all allocated values, instead of scaling only the ones with valid entries
01572     // however, if there are no valid entries, we can short-circuit
01573     // furthermore, if the values aren't allocated, we can short-circuit (unallocated values are zero, scaling to zero)
01574     const size_t     nlrs = graph_->getNodeNumRows(),
01575                  numAlloc = graph_->getNodeAllocationSize(),
01576                numEntries = graph_->getNodeNumEntries();
01577     if (valuesAreAllocated_ == false || numAlloc == 0 || numEntries == 0) {
01578       // do nothing
01579     }
01580     else {
01581       Teuchos::RCP<Node> node = lclMatrix_.getNode();
01582       Kokkos::ReadyBufferHelper<Node> rbh(node);
01583       ScaleOp<Scalar> wdp;
01584       wdp.val = alpha;
01585       if (graph_->getProfileType() == StaticProfile) {
01586         rbh.begin();
01587         wdp.x = rbh.addNonConstBuffer(pbuf_values1D_);
01588         rbh.end();
01589         node->template parallel_for<ScaleOp<Scalar> >(0,numAlloc,wdp);
01590       }
01591       else if (graph_->getProfileType() == DynamicProfile) {
01592         for (size_t row=0; row < nlrs; ++row) {
01593           if (pbuf_values2D_[row] != Teuchos::null) {
01594             rbh.begin();
01595             wdp.x = rbh.addNonConstBuffer(pbuf_values2D_[row]);
01596             rbh.end();
01597             node->template parallel_for<ScaleOp<Scalar> >(0,pbuf_values2D_[row].size(),wdp);
01598           }
01599         }
01600       }
01601     }
01602   }
01603 
01604 
01607   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
01608   void CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::setAllToScalar(const Scalar &alpha) {
01609     // set all values in the matrix
01610     // it is easiest to set all allocated values, instead of setting only the ones with valid entries
01611     // however, if there are no valid entries, we can short-circuit
01612     // we must allocate, though we can set the values in the allocateValues() call.
01613     // this method is equivalent replacing all valid entries with alpha.
01614     const size_t     nlrs = graph_->getNodeNumRows(),
01615                  numAlloc = graph_->getNodeAllocationSize(),
01616                numEntries = graph_->getNodeNumEntries();
01617     if (numEntries == 0) {
01618       // do nothing
01619     }
01620     else if (valuesAreAllocated_) {
01621       // numEntries > 0, so graph must already be allocated; allocatedValues() will verify this
01622       allocateValues(alpha);
01623     }
01624     else {
01625       Teuchos::RCP<Node> node = lclMatrix_.getNode();
01626       Kokkos::ReadyBufferHelper<Node> rbh(node);
01627       InitOp<Scalar> wdp;
01628       wdp.val = alpha;
01629       if (graph_->getProfileType() == StaticProfile) {
01630         rbh.begin();
01631         wdp.x = rbh.addNonConstBuffer(pbuf_values1D_);
01632         rbh.end();
01633         node->template parallel_for<InitOp<Scalar> >(0,numAlloc,wdp);
01634       }
01635       else if (graph_->getProfileType() == DynamicProfile) {
01636         for (size_t row=0; row < nlrs; ++row) {
01637           if (pbuf_values2D_[row] != Teuchos::null) {
01638             rbh.begin();
01639             wdp.x = rbh.addNonConstBuffer(pbuf_values2D_[row]);
01640             rbh.end();
01641             node->template parallel_for<InitOp<Scalar> >(0,pbuf_values2D_[row].size(),wdp);
01642           }
01643         }
01644       }
01645     }
01646   }
01647 
01648 
01651   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
01652   void CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::getLocalDiagCopy(Vector<Scalar,LocalOrdinal,GlobalOrdinal> &dvec) const {
01653     TEST_FOR_EXCEPTION(isFillComplete() == false, std::runtime_error,
01654         Teuchos::typeName(*this) << ": cannot call getLocalDiagCopy() until fillComplete() has been called.");
01655     TEST_FOR_EXCEPTION(dvec.getMap()->isSameAs(*getRowMap()) == false, std::runtime_error,
01656         Teuchos::typeName(*this) << "::getLocalDiagCopy(dvec): dvec must have the same map as the CrsMatrix.");
01657     const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid();
01658 #ifdef HAVE_TPETRA_DEBUG
01659     size_t numDiagFound = 0;
01660 #endif
01661     const size_t nlrs = getNodeNumRows();
01662     Teuchos::ArrayRCP<Scalar> vecView = dvec.get1dViewNonConst();
01663     for (size_t r=0; r < nlrs; ++r) {
01664       vecView[r] = Teuchos::ScalarTraits<Scalar>::zero();
01665       GlobalOrdinal rgid = getRowMap()->getGlobalElement(r);
01666       if (getColMap()->isNodeGlobalElement(rgid)) {
01667         LocalOrdinal rlid = getColMap()->getLocalElement(rgid);
01668         Teuchos::ArrayRCP<const LocalOrdinal> inds;
01669         RowInfo sizeInfo = graph_->getFullLocalView(r,inds);
01670         if (sizeInfo.numEntries > 0) {
01671           const size_t j = graph_->findLocalIndex(r, rlid, inds);
01672           if (j != STINV) {
01673             Teuchos::ArrayRCP<const Scalar> vals;
01674             vals = getFullView(r,sizeInfo);
01675             vecView[r] = vals[j];
01676             vals = Teuchos::null;
01677 #ifdef HAVE_TPETRA_DEBUG
01678             ++numDiagFound;
01679 #endif
01680           }
01681         }
01682         inds = Teuchos::null;
01683       }
01684     }
01685     vecView = Teuchos::null;
01686 #ifdef HAVE_TPETRA_DEBUG
01687     TEST_FOR_EXCEPTION(numDiagFound != getNodeNumDiags(), std::logic_error, 
01688         "CrsMatrix::getLocalDiagCopy(): logic error. Please contact Tpetra team.");
01689 #endif
01690   }
01691 
01692 
01695   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
01696   void CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::globalAssemble() {
01697     using Teuchos::arcp;
01698     using Teuchos::OrdinalTraits;
01699     using Teuchos::Array;
01700     using Teuchos::SerialDenseMatrix;
01701     using Teuchos::ArrayView;
01702     using Teuchos::ArrayRCP;
01703     using Teuchos::rcp;
01704     using std::list;
01705     using std::pair;
01706     using std::make_pair;
01707     using Teuchos::tuple;
01708     typedef typename std::map<GlobalOrdinal,std::list<pair<GlobalOrdinal,Scalar> > >::const_iterator NLITER;
01709     int numImages = getComm()->getSize();
01710     int myImageID = getComm()->getRank();
01711     // Determine if any nodes have global entries to share
01712     size_t MyNonlocals = nonlocals_.size(), 
01713            MaxGlobalNonlocals;
01714     Teuchos::reduceAll<int,size_t>(*getComm(),Teuchos::REDUCE_MAX,MyNonlocals,&MaxGlobalNonlocals);
01715     if (MaxGlobalNonlocals == 0) return;  // no entries to share
01716 
01717     // compute a list of NLRs from nonlocals_ and use it to compute:
01718     //      IdsAndRows: a vector of (id,row) pairs
01719     //          NLR2Id: a map from NLR to the Id that owns it
01720     // globalNeighbors: a global graph of connectivity between images: globalNeighbors(i,j) indicates that j sends to i
01721     //         sendIDs: a list of all images I send to
01722     //         recvIDs: a list of all images I receive from (constructed later)
01723     Array<pair<int,GlobalOrdinal> > IdsAndRows;
01724     std::map<GlobalOrdinal,int> NLR2Id;
01725     SerialDenseMatrix<int,char> globalNeighbors;
01726     Array<int> sendIDs, recvIDs;
01727     {
01728       // nonlocals_ contains the entries we are holding for all non-local rows
01729       // we want a list of the rows for which we have data
01730       Array<GlobalOrdinal> NLRs;
01731       std::set<GlobalOrdinal> setOfRows;
01732       for (NLITER iter = nonlocals_.begin(); iter != nonlocals_.end(); ++iter)
01733       {
01734         setOfRows.insert(iter->first);
01735       }
01736       // copy the elements in the set into an Array
01737       NLRs.resize(setOfRows.size());
01738       std::copy(setOfRows.begin(), setOfRows.end(), NLRs.begin());
01739 
01740       // get a list of ImageIDs for the non-local rows (NLRs)
01741       Array<int> NLRIds(NLRs.size());
01742       {
01743         LookupStatus stat = getRowMap()->getRemoteIndexList(NLRs(),NLRIds());
01744         char lclerror = ( stat == IDNotPresent ? 1 : 0 );
01745         char gblerror;
01746         Teuchos::reduceAll(*getComm(),Teuchos::REDUCE_MAX,lclerror,&gblerror);
01747         TEST_FOR_EXCEPTION(gblerror, std::runtime_error,
01748             Teuchos::typeName(*this) << "::globalAssemble(): non-local entries correspond to invalid rows.");
01749       }
01750 
01751       // build up a list of neighbors, as well as a map between NLRs and Ids
01752       // localNeighbors[i] != 0 iff I have data to send to image i
01753       // put NLRs,Ids into an array of pairs
01754       IdsAndRows.reserve(NLRs.size());
01755       Array<char> localNeighbors(numImages,0);
01756       typename Array<GlobalOrdinal>::const_iterator nlr;
01757       typename Array<int>::const_iterator id;
01758       for (nlr = NLRs.begin(), id = NLRIds.begin();
01759            nlr != NLRs.end(); ++nlr, ++id) 
01760       {
01761         NLR2Id[*nlr] = *id;
01762         localNeighbors[*id] = 1;
01763         IdsAndRows.push_back(make_pair<int,GlobalOrdinal>(*id,*nlr));
01764       }
01765       for (int j=0; j<numImages; ++j)
01766       {
01767         if (localNeighbors[j]) {
01768           sendIDs.push_back(j);
01769         }
01770       }
01771       // sort IdsAndRows, by Ids first, then rows
01772       std::sort(IdsAndRows.begin(),IdsAndRows.end());
01773       // gather from other nodes to form the full graph
01774       globalNeighbors.shapeUninitialized(numImages,numImages);
01775       Teuchos::gatherAll(*getComm(),numImages,localNeighbors.getRawPtr(),numImages*numImages,globalNeighbors.values());
01776       // globalNeighbors at this point contains (on all images) the
01777       // connectivity between the images. 
01778       // globalNeighbors(i,j) != 0 means that j sends to i/that i receives from j
01779     }
01780 
01782     // FIGURE OUT WHO IS SENDING TO WHOM AND HOW MUCH
01783     // DO THIS IN THE PROCESS OF PACKING ALL OUTGOING DATA ACCORDING TO DESTINATION ID
01785 
01786     // loop over all columns to know from which images I can expect to receive something
01787     for (int j=0; j<numImages; ++j)
01788     {
01789       if (globalNeighbors(myImageID,j)) {
01790         recvIDs.push_back(j);
01791       }
01792     }
01793     size_t numRecvs = recvIDs.size();
01794 
01795     // we know how many we're sending to already
01796     // form a contiguous list of all data to be sent
01797     // track the number of entries for each ID
01798     Array<CrsIJV<GlobalOrdinal,Scalar> > IJVSendBuffer;
01799     Array<size_t> sendSizes(sendIDs.size(), 0);
01800     size_t numSends = 0;
01801     for (typename Array<pair<int,GlobalOrdinal> >::const_iterator IdAndRow = IdsAndRows.begin();
01802          IdAndRow != IdsAndRows.end(); ++IdAndRow) 
01803     {
01804       int            id = IdAndRow->first;
01805       GlobalOrdinal row = IdAndRow->second;
01806       // have we advanced to a new send?
01807       if (sendIDs[numSends] != id) {
01808         numSends++;
01809         TEST_FOR_EXCEPTION(sendIDs[numSends] != id, std::logic_error, Teuchos::typeName(*this) << "::globalAssemble(): internal logic error. Contact Tpetra team.");
01810       }
01811       // copy data for row into contiguous storage
01812       for (typename list<pair<GlobalOrdinal,Scalar> >::const_iterator jv = nonlocals_[row].begin(); jv != nonlocals_[row].end(); ++jv)
01813       {
01814         IJVSendBuffer.push_back( CrsIJV<GlobalOrdinal,Scalar>(row,jv->first,jv->second) );
01815         sendSizes[numSends]++;
01816       }
01817     }
01818     if (IdsAndRows.size() > 0) {
01819       numSends++; // one last increment, to make it a count instead of an index
01820     }
01821     TEST_FOR_EXCEPTION(Teuchos::as<typename Array<int>::size_type>(numSends) != sendIDs.size(), std::logic_error, Teuchos::typeName(*this) << "::globalAssemble(): internal logic error. Contact Tpetra team.");
01822 
01823     // don't need this data anymore
01824     nonlocals_.clear();
01825 
01827     // TRANSMIT SIZE INFO BETWEEN SENDERS AND RECEIVERS
01829     // perform non-blocking sends: send sizes to our recipients
01830     Array<Teuchos::RCP<Teuchos::CommRequest> > sendRequests;
01831     for (size_t s=0; s < numSends ; ++s) {
01832       // we'll fake the memory management, because all communication will be local to this method and the scope of our data
01833       sendRequests.push_back( Teuchos::isend<int,size_t>(*getComm(),Teuchos::rcpFromRef(sendSizes[s]),sendIDs[s]) );
01834     }
01835     // perform non-blocking receives: receive sizes from our senders
01836     Array<Teuchos::RCP<Teuchos::CommRequest> > recvRequests;
01837     Array<size_t> recvSizes(numRecvs);
01838     for (size_t r=0; r < numRecvs; ++r) {
01839       // we'll fake the memory management, because all communication will be local to this method and the scope of our data
01840       recvRequests.push_back( Teuchos::ireceive(*getComm(),rcp(&recvSizes[r],false),recvIDs[r]) );
01841     }
01842     // wait on all 
01843     if (!sendRequests.empty()) {
01844       Teuchos::waitAll(*getComm(),sendRequests());
01845     }
01846     if (!recvRequests.empty()) {
01847       Teuchos::waitAll(*getComm(),recvRequests());
01848     }
01849     Teuchos::barrier(*getComm());
01850     sendRequests.clear();
01851     recvRequests.clear();
01852 
01854     // NOW SEND/RECEIVE ALL ROW DATA
01856     // from the size info, build the ArrayViews into IJVSendBuffer
01857     Array<ArrayView<CrsIJV<GlobalOrdinal,Scalar> > > sendBuffers(numSends,Teuchos::null);
01858     {
01859       size_t cur = 0;
01860       for (size_t s=0; s<numSends; ++s) {
01861         sendBuffers[s] = IJVSendBuffer(cur,sendSizes[s]);
01862         cur += sendSizes[s];
01863       }
01864     }
01865     // perform non-blocking sends
01866     for (size_t s=0; s < numSends ; ++s)
01867     {
01868       // we'll fake the memory management, because all communication will be local to this method and the scope of our data
01869       ArrayRCP<CrsIJV<GlobalOrdinal,Scalar> > tmparcp = arcp(sendBuffers[s].getRawPtr(),0,sendBuffers[s].size(),false);
01870       sendRequests.push_back( Teuchos::isend<int,CrsIJV<GlobalOrdinal,Scalar> >(*getComm(),tmparcp,sendIDs[s]) );
01871     }
01872     // calculate amount of storage needed for receives
01873     // setup pointers for the receives as well
01874     size_t totalRecvSize = std::accumulate(recvSizes.begin(),recvSizes.end(),0);
01875     Array<CrsIJV<GlobalOrdinal,Scalar> > IJVRecvBuffer(totalRecvSize);
01876     // from the size info, build the ArrayViews into IJVRecvBuffer
01877     Array<ArrayView<CrsIJV<GlobalOrdinal,Scalar> > > recvBuffers(numRecvs,Teuchos::null);
01878     {
01879       size_t cur = 0;
01880       for (size_t r=0; r<numRecvs; ++r) {
01881         recvBuffers[r] = IJVRecvBuffer(cur,recvSizes[r]);
01882         cur += recvSizes[r];
01883       }
01884     }
01885     // perform non-blocking recvs
01886     for (size_t r=0; r < numRecvs ; ++r)
01887     {
01888       // we'll fake the memory management, because all communication will be local to this method and the scope of our data
01889       ArrayRCP<CrsIJV<GlobalOrdinal,Scalar> > tmparcp = arcp(recvBuffers[r].getRawPtr(),0,recvBuffers[r].size(),false);
01890       recvRequests.push_back( Teuchos::ireceive(*getComm(),tmparcp,recvIDs[r]) );
01891     }
01892     // perform waits
01893     if (!sendRequests.empty()) {
01894       Teuchos::waitAll(*getComm(),sendRequests());
01895     }
01896     if (!recvRequests.empty()) {
01897       Teuchos::waitAll(*getComm(),recvRequests());
01898     }
01899     Teuchos::barrier(*getComm());
01900     sendRequests.clear();
01901     recvRequests.clear();
01902 
01903 
01905     // NOW PROCESS THE RECEIVED ROW DATA
01907     // TODO: instead of adding one entry at a time, add one row at a time.
01908     //       this requires resorting; they arrived sorted by sending node, so that entries could be non-contiguous if we received
01909     //       multiple entries for a particular row from different processors.
01910     //       it also requires restoring the data, which may make it not worth the trouble.
01911     for (typename Array<CrsIJV<GlobalOrdinal,Scalar> >::const_iterator ijv = IJVRecvBuffer.begin(); ijv != IJVRecvBuffer.end(); ++ijv)
01912     {
01913       try {
01914         insertGlobalValues(ijv->i, tuple(ijv->j), tuple(ijv->v));
01915       }
01916       catch (std::runtime_error &e) {
01917         std::ostringstream omsg;
01918         omsg << e.what() << std::endl
01919           << "caught in globalAssemble() in " << __FILE__ << ":" << __LINE__ << std::endl ;
01920         throw std::runtime_error(omsg.str());
01921       }
01922     }
01923 
01924     // WHEW! THAT WAS TIRING!
01925   }
01926 
01927 
01930   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
01931   void CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::fillComplete(OptimizeOption os) {
01932     fillComplete(getRowMap(),getRowMap(),os);
01933   }
01934 
01935 
01938   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
01939   void CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::fillComplete(
01940                                             const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > &domainMap, 
01941                                             const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > &rangeMap, 
01942                                             OptimizeOption os) {
01943     if (getComm()->getSize() > 1) {
01944       globalAssemble();
01945     }
01946     else {
01947       TEST_FOR_EXCEPTION(nonlocals_.size() > 0, std::runtime_error,
01948           Teuchos::typeName(*this) << "::fillComplete(): cannot have non-local entries on a serial run. Invalid entry was submitted to the CrsMatrix.");
01949     }
01950 
01951     if (graph_->isFillComplete()) {
01952       // if the graph is filled, it should be because 
01953       // - it was given at construction, already filled
01954       // - it was filled on a previous call to fillComplete()
01955       // if neither, it means that the graph has been filled by the user.
01956       // this means that a graph passed at construction has been filled in the interim.
01957       // in this case, indices have been sorted and merged, so that we are no longer able
01958       // to sort or merge the associated values. nothing we can do here.
01959       if ((constructedWithFilledGraph_ || isFillComplete()) == false) {
01960         TPETRA_ABUSE_WARNING(true,std::runtime_error,
01961             "::fillComplete(): fillComplete() has been called on graph since matrix was constructed.");
01962         return;
01963       }
01964     }
01965 
01966     if (isStaticGraph() == false) {
01967       graph_->makeIndicesLocal(domainMap,rangeMap);
01968     }
01969     sortEntries();
01970     mergeRedundantEntries();
01971     if (isStaticGraph() == false) {
01972       // can't optimize graph storage before optimizing our storage
01973       graph_->fillComplete(domainMap,rangeMap,DoNotOptimizeStorage);
01974     }
01975 
01976     fillComplete_ = true;
01977 
01978     if (os == DoOptimizeStorage) {
01979       // this will call optimizeStorage() on the graph as well, if isStaticGraph() == false
01980       // these routines will fill the local graph and local matrix
01981       optimizeStorage();
01982     }
01983     else { 
01984       // local graph already filled.
01985       // fill the local matrix.
01986       fillLocalMatrix();
01987     }
01988 
01989     checkInternalState();
01990   }
01991 
01992 
01995   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
01996   void CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::sortEntries() {
01997     using Teuchos::ArrayRCP;
01998     TEST_FOR_EXCEPTION(graph_->isGloballyIndexed() == true, std::logic_error,
01999         Teuchos::typeName(*this) << "::sortEntries(): sortEntries() must be called after indices are transformed to local.\n"
02000         << "Likely internal logic error. Please contact Tpetra team.");
02001     if (graph_->isSorted()) return;
02002     if (graph_->getNodeAllocationSize()) {
02003       const size_t nlrs = getNodeNumRows();
02004       for (size_t r=0; r < nlrs; ++r) {
02005         // TODO: This is slightly inefficient, because it may query pbuf_rowOffsets_ repeatadly. 
02006         //       However, it is very simple code. Consider rewriting it.
02007         Teuchos::ArrayRCP<LocalOrdinal> inds;
02008         Teuchos::ArrayRCP<Scalar>       vals;
02009         RowInfo sizeInfo = graph_->getFullLocalViewNonConst(r, inds);
02010         vals = getFullViewNonConst(r, sizeInfo);
02011         sort2(inds.begin(), inds.begin() + sizeInfo.numEntries, vals);
02012       }
02013     }
02014     graph_->setSorted(true);  // we just sorted them
02015     return;
02016   }
02017 
02018 
02021   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
02022   void CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::mergeRedundantEntries() {
02023     using Teuchos::ArrayRCP;
02024     TEST_FOR_EXCEPTION(graph_->isSorted() == false, std::runtime_error,
02025         Teuchos::typeName(*this) << "::mergeRedundantEntries() cannot be called before indices are sorted.\n"
02026         << "Likely interal logic error. Please contact Tpetra team.");
02027     if (graph_->notRedundant()) return;
02028     if (graph_->getNodeAllocationSize() > 0) {
02029       for (size_t r=0; r<getNodeNumRows(); ++r) {
02030         // TODO: This is slightly inefficient, because it may query pbuf_rowOffsets_ repeatadly. 
02031         //       However, it is very simple code. Consider rewriting it.
02032         Teuchos::ArrayRCP<LocalOrdinal> inds;
02033         Teuchos::ArrayRCP<Scalar>       vals;
02034         RowInfo sizeInfo = graph_->getFullLocalViewNonConst(r, inds);
02035         vals = getFullViewNonConst(r, sizeInfo);
02036         if (sizeInfo.numEntries > 0) {
02037           size_t curEntry = 0;
02038           Scalar curValue = vals[curEntry];
02039           for (size_t k=1; k < sizeInfo.numEntries; ++k) {
02040             if (inds[k] == inds[k-1]) {
02041               curValue += vals[k];
02042             }
02043             else {
02044               vals[curEntry++] = curValue;
02045               curValue = vals[k];
02046             }
02047           }
02048           vals[curEntry] = curValue;
02049         }
02050         vals = Teuchos::null;
02051         inds = Teuchos::null;
02052       }
02053       graph_->removeRedundantIndices();
02054     }
02055   }
02056 
02057 
02060   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
02061   void CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::optimizeStorage() {
02062     // optimizeStorage will perform two functions:
02063     // 1) create a single allocation of memory
02064     // 2) pack data in that allocation
02065     // if getProfileType() == StaticProfile, then 1) has already been done
02066 #ifdef HAVE_TPETRA_DEBUG
02067     std::string err = Teuchos::typeName(*this) + "::optimizeStorage(): Internal logic error. Please contact Tpetra team.";
02068 #endif
02069     if (isStorageOptimized() == true) return;
02070     TEST_FOR_EXCEPTION(isFillComplete() == false || graph_->isSorted() == false || graph_->notRedundant() == false, std::runtime_error,
02071         Teuchos::typeName(*this) << "::optimizeStorage(): fillComplete() must be called before optimizeStorage().");
02072     TEST_FOR_EXCEPTION(graph_->isStorageOptimized(), std::logic_error,
02073         Teuchos::typeName(*this) << "::optimizeStorage(): optimizeStorage() already called on graph, but not on matrix.");
02074     // 
02075     Teuchos::RCP<Node> node = lclMatrix_.getNode();
02076     const size_t nlrs = getNodeNumRows(),
02077            numEntries = graph_->getNodeNumEntries();
02078     if (nlrs > 0) {
02079       if (valuesAreAllocated_ == false) {
02080         // haven't even allocated values yet. allocate to optimal size and fill with zero
02081         if (numEntries > 0) {
02082           // allocate
02083           pbuf_values1D_ = node->template allocBuffer<Scalar>(numEntries);        
02084           // init to zero
02085           Kokkos::ReadyBufferHelper<Node> rbh(node);
02086           InitOp<Scalar> wdp;
02087           wdp.val = Teuchos::ScalarTraits<Scalar>::zero();
02088           rbh.begin();
02089           wdp.x   = rbh.addNonConstBuffer(pbuf_values1D_);
02090           rbh.end();
02091           node->template parallel_for<InitOp<Scalar> >(0,numEntries,wdp);
02092         } 
02093         valuesAreAllocated_ = true;
02094       }
02095       else if (graph_->getProfileType() == DynamicProfile) {
02096         // allocate a single memory block
02097         if (numEntries > 0) {
02098           // numEntries > 0  =>  allocSize > 0  =>  pbuf_values2D_ != Teuchos::null
02099 #ifdef HAVE_TPETRA_DEBUG
02100           TEST_FOR_EXCEPTION( pbuf_values2D_ == Teuchos::null, std::logic_error, err);
02101 #endif
02102           pbuf_values1D_ = node->template allocBuffer<Scalar>(numEntries);
02103           size_t sofar = 0;
02104           for (size_t row=0; row<nlrs; ++row) {
02105             RowInfo sizeInfo = graph_->getRowInfo(row);
02106             if (sizeInfo.numEntries > 0) {
02107               node->template copyBuffers<Scalar>( sizeInfo.numEntries, pbuf_values2D_[row], pbuf_values1D_ + sofar );
02108               pbuf_values2D_[row] = Teuchos::null;
02109             }
02110             sofar += sizeInfo.numEntries;
02111           }
02112 #ifdef HAVE_TPETRA_DEBUG
02113           TEST_FOR_EXCEPTION(sofar != numEntries, std::logic_error, err);
02114 #endif
02115         }
02116         pbuf_values2D_ = Teuchos::null;
02117       }
02118       else {
02119         // storage is already allocated; just need to pack
02120         if (numEntries > 0) {
02121 #ifdef HAVE_TPETRA_DEBUG
02122           TEST_FOR_EXCEPTION( pbuf_values1D_ == Teuchos::null, std::logic_error, err);
02123 #endif
02124           size_t sofar = 0;
02125           for (size_t row=0; row<nlrs; ++row) {
02126             RowInfo sizeInfo = graph_->getRowInfo(row);
02127             if (sizeInfo.numEntries > 0 && sizeInfo.offset1D != sofar) {
02128               node->template copyBuffers<Scalar>( sizeInfo.numEntries, 
02129                                                   pbuf_values1D_ + sizeInfo.offset1D,
02130                                                   pbuf_values1D_ + sofar );
02131             }
02132             sofar += sizeInfo.numEntries;
02133           }
02134           pbuf_values1D_ = pbuf_values1D_.persistingView(0,sofar);
02135 #ifdef HAVE_TPETRA_DEBUG
02136           TEST_FOR_EXCEPTION(sofar != numEntries, std::logic_error, err);
02137 #endif
02138         }
02139       }
02140     }
02141     storageOptimized_ = true;
02142 
02143     if (isStaticGraph() == false) {
02144       graph_->optimizeStorage();
02145     }
02146 
02147     // local graph was filled during graph_->optimizeStorage()
02148     fillLocalMatrix(); 
02149   }
02150 
02151 
02154   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
02155   void CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::applyInverse(
02156                                     const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &Y, 
02157                                           MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &X, 
02158                                           Teuchos::ETransp mode) const {
02159     solve<Scalar,Scalar>(Y,X,mode);
02160   }
02161 
02162 
02165   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
02166   template <class DomainScalar, class RangeScalar>
02167   void CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::solve(
02168                                     const MultiVector<RangeScalar,LocalOrdinal,GlobalOrdinal,Node>  &Y, 
02169                                           MultiVector<DomainScalar,LocalOrdinal,GlobalOrdinal,Node> &X,
02170                                           Teuchos::ETransp mode) const {
02171     // Solve U X = Y  or  L X = Y
02172     // X belongs to domain map, while Y belongs to range map
02173     typedef Teuchos::ScalarTraits<Scalar> ST;
02174     using Teuchos::null;
02175     using Teuchos::ArrayView;
02176     TEST_FOR_EXCEPTION(!isFillComplete(), std::runtime_error, 
02177         Teuchos::typeName(*this) << ": cannot call applyInverse() until fillComplete() has been called.");
02178     TEST_FOR_EXCEPTION(X.getNumVectors() != Y.getNumVectors(), std::runtime_error,
02179         Teuchos::typeName(*this) << "::applyInverse(X,Y): X and Y must have the same number of vectors.");
02180     TEST_FOR_EXCEPTION(isLowerTriangular() == false && isUpperTriangular() == false, std::runtime_error,
02181         Teuchos::typeName(*this) << "::applyInverser() requires either upper or lower triangular structure.");
02182 
02183 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP
02184     int myImageID = Teuchos::rank(*getComm());
02185     Teuchos::RCP<Teuchos::FancyOStream> out = Teuchos::VerboseObjectBase::getDefaultOStream();
02186     if (myImageID == 0) {
02187       *out << "Entering CrsMatrix::applyInverse()" << std::endl
02188                 << "Column Map: " << std::endl;
02189     }
02190     *out << *this->getColMap() << std::endl;
02191     if (myImageID == 0) {
02192       *out << "Initial input: " << std::endl;
02193     }
02194     Y.describe(*out,Teuchos::VERB_EXTREME);
02195 #endif
02196 
02197     const size_t numVectors = X.getNumVectors();
02198     Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal> > importer = graph_->getImporter();
02199     Teuchos::RCP<const Export<LocalOrdinal,GlobalOrdinal> > exporter = graph_->getExporter();
02200     Teuchos::RCP<MV>        Xcopy;
02201     Teuchos::RCP<const MV>  Ycopy;
02202     Kokkos::MultiVector<Scalar,Node>        *lclX = &X.getLocalMVNonConst();
02203     const Kokkos::MultiVector<Scalar,Node>  *lclY = &Y.getLocalMV();
02204 
02205     // it is okay if X and Y reference the same data, because we can perform a triangular solve in-situ.
02206     // however, we require that column access to each is strided.
02207 
02208     // cannot handle non-constant stride right now
02209     if (X.isConstantStride() == false) {
02210       // generate a copy of X 
02211       Xcopy = Teuchos::rcp(new MV(X));
02212       lclX = &Xcopy->getLocalMVNonConst();
02213 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP
02214       if (myImageID == 0) *out << "X is not constant stride, duplicating X results in a strided copy" << std::endl;
02215       Xcopy->describe(*out,Teuchos::VERB_EXTREME);
02216 #endif
02217     }
02218 
02219     // cannot handle non-constant stride right now
02220     if (Y.isConstantStride() == false) {
02221       // generate a copy of Y 
02222       Ycopy = Teuchos::rcp(new MV(Y));
02223       lclY = &Ycopy->getLocalMV();
02224 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP
02225       if (myImageID == 0) *out << "Y is not constant stride, duplicating Y results in a strided copy" << std::endl;
02226       Ycopy->describe(*out,Teuchos::VERB_EXTREME);
02227 #endif
02228     }
02229 
02230     if (importer != null) {
02231       if (importMV_ != null && importMV_->getNumVectors() != numVectors) importMV_ = null;
02232       if (importMV_ == null) {
02233         importMV_ = Teuchos::rcp( new MV(getColMap(),numVectors) );
02234       }
02235     }
02236     if (exporter != null) {
02237       if (exportMV_ != null && exportMV_->getNumVectors() != numVectors) exportMV_ = null;
02238       if (exportMV_ == null) {
02239         exportMV_ = Teuchos::rcp( new MV(getRowMap(),numVectors) );
02240       }
02241     }
02242 
02243     // import/export patterns are different depending on transpose.
02244     if (mode == Teuchos::NO_TRANS) {
02245       // applyInverse(NO_TRANS): RangeMap -> DomainMap
02246       // lclMatSolve_: RowMap -> ColMap
02247       // importer: DomainMap -> ColMap
02248       // exporter: RowMap -> RangeMap
02249       // 
02250       // applyInverse = reverse(exporter)  o   lclMatSolve_  o reverse(importer)
02251       //                RangeMap   ->    RowMap     ->     ColMap         ->    DomainMap
02252       // If we have a non-trivial importer, we must import elements that are permuted or are on other processors
02253       if (exporter != null) {
02254         exportMV_->doImport(Y, *exporter, INSERT);
02255 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP
02256         if (myImageID == 0) {
02257           *out << "Performed import of Y using exporter..." << std::endl;
02258         }
02259         exportMV_->describe(*out,Teuchos::VERB_EXTREME);
02260 #endif
02261         lclY = &exportMV_->getLocalMV();
02262       }
02263       // If we have a non-trivial exporter, we must export elements that are permuted or belong to other processors
02264       // We will compute solution into the to-be-exported MV; get a view
02265       if (importer != null) {
02266         lclX = &importMV_->getLocalMVNonConst();
02267       }
02268       // Do actual computation
02269       Teuchos::EDiag diag = ( getNodeNumDiags() < getNodeNumRows() ? Teuchos::UNIT_DIAG : Teuchos::NON_UNIT_DIAG );
02270       if (isUpperTriangular()) {
02271         lclMatSolve_.template solve<Scalar,Scalar>(Teuchos::NO_TRANS, Teuchos::UPPER_TRI, diag, *lclY, *lclX);
02272       }
02273       else {
02274         lclMatSolve_.template solve<Scalar,Scalar>(Teuchos::NO_TRANS, Teuchos::LOWER_TRI, diag, *lclY, *lclX);
02275       }
02276 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP
02277       if (myImageID == 0) *out << "Matrix-MV solve..." << std::endl;
02278       if (exporter != null) {
02279         exportMV_->describe(*out,Teuchos::VERB_EXTREME);
02280       } 
02281       else {
02282         X.describe(*out,Teuchos::VERB_EXTREME);
02283       }
02284 #endif
02285       // do the export
02286       if (importer != null) {
02287         // Make sure target is zero: necessary because we are adding. may need adjusting for alpha,beta apply()
02288         X.putScalar(ST::zero());  
02289         X.doExport(*importMV_, *importer, ADD); // Fill Y with Values from export vector
02290 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP
02291         if (myImageID == 0) *out << "Output vector after export() using exporter..." << std::endl;
02292         X.describe(*out,Teuchos::VERB_EXTREME);
02293 #endif
02294       }
02295       // Handle case of rangemap being a local replicated map: in this case, sum contributions from each processor
02296       if (X.isDistributed() == false) {
02297         X.reduce();
02298 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP
02299         if (myImageID == 0) *out << "Output vector is local; result after reduce()..." << std::endl;
02300         X.describe(*out,Teuchos::VERB_EXTREME);
02301 #endif
02302       }
02303     }
02304     else {
02305       // mode == CONJ_TRANS or TRANS
02306       TEST_FOR_EXCEPTION(Teuchos::ScalarTraits<Scalar>::isComplex && mode == Teuchos::TRANS, std::logic_error,
02307           Teuchos::typeName(*this) << "::applyInverse() does not currently support transposed multiplications for complex scalar types.");
02308       // applyInverse(TRANS): DomainMap -> RangeMap
02309       // lclMatSolve_(TRANS): ColMap -> RowMap
02310       // importer: DomainMap -> ColMap
02311       // exporter: RowMap -> RangeMap
02312       // 
02313       // applyInverse =        importer o   lclMatSolve_  o  exporter
02314       //                Domainmap -> ColMap     ->      RowMap -> RangeMap
02315       // If we have a non-trivial importer, we must import elements that are permuted or are on other processors
02316       if (importer != null) {
02317         importMV_->doImport(Y,*importer,INSERT);
02318         lclY = &importMV_->getLocalMV();
02319 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP
02320         if (myImageID == 0) {
02321           *out << "Performed import of Y using importer..." << std::endl;
02322         }
02323         importMV_->describe(*out,Teuchos::VERB_EXTREME);
02324 #endif
02325       }
02326       // If we have a non-trivial exporter, we must export elements that are permuted or belong to other processors
02327       // We will compute solution into the to-be-exported MV; get a view
02328       if (exporter != null) {
02329         lclX = &exportMV_->getLocalMVNonConst();
02330       }
02331       // Do actual computation
02332       Teuchos::EDiag diag = ( getNodeNumDiags() < getNodeNumRows() ? Teuchos::UNIT_DIAG : Teuchos::NON_UNIT_DIAG );
02333       if (isUpperTriangular()) {
02334         lclMatSolve_.template solve<Scalar,Scalar>(Teuchos::CONJ_TRANS, Teuchos::UPPER_TRI, diag, *lclY, *lclX);
02335       }
02336       else {
02337         lclMatSolve_.template solve<Scalar,Scalar>(Teuchos::CONJ_TRANS, Teuchos::LOWER_TRI, diag, *lclY, *lclX);
02338       }
02339 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP
02340       if (myImageID == 0) *out << "Matrix-MV solve..." << std::endl;
02341       if (exporter != null) {
02342         exportMV_->describe(*out,Teuchos::VERB_EXTREME);
02343       } 
02344       else {
02345         X.describe(*out,Teuchos::VERB_EXTREME);
02346       }
02347 #endif
02348       // do the export
02349       if (exporter != null) {
02350         X.putScalar(ST::zero()); // Make sure target is zero: necessary because we are adding. may need adjusting for alpha,beta apply()
02351         X.doExport(*exportMV_,*exporter,ADD);
02352 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP
02353         if (myImageID == 0) *out << "Output vector after export() using exporter..." << std::endl;
02354         X.describe(*out,Teuchos::VERB_EXTREME);
02355 #endif
02356       }
02357       // Handle case of rangemap being a local replicated map: in this case, sum contributions from each processor
02358       if (X.isDistributed() == false) {
02359         X.reduce();
02360 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP
02361         if (myImageID == 0) *out << "Output vector is local; result after reduce()..." << std::endl;
02362         X.describe(*out,Teuchos::VERB_EXTREME);
02363 #endif
02364       }
02365     }
02366   }
02367 
02368 
02371   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
02372   void CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::apply(
02373                                         const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &X, 
02374                                         MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &Y, 
02375                                         Teuchos::ETransp mode) const {
02376     multiply<Scalar,Scalar>(X,Y,mode);
02377   }
02378 
02379 
02382   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
02383   template <class DomainScalar, class RangeScalar>
02384   void CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::multiply(
02385                                         const MultiVector<DomainScalar,LocalOrdinal,GlobalOrdinal,Node> &X, 
02386                                         MultiVector<RangeScalar,LocalOrdinal,GlobalOrdinal,Node> &Y, 
02387                                         Teuchos::ETransp mode) const {
02388     typedef Teuchos::ScalarTraits<Scalar> ST;
02389     using Teuchos::null;
02390     using Teuchos::ArrayView;
02391     TEST_FOR_EXCEPTION(!isFillComplete(), std::runtime_error, 
02392         Teuchos::typeName(*this) << ": cannot call multiply() until fillComplete() has been called.");
02393     TEST_FOR_EXCEPTION(X.getNumVectors() != Y.getNumVectors(), std::runtime_error,
02394         Teuchos::typeName(*this) << "::multiply(X,Y): X and Y must have the same number of vectors.");
02395 
02396 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP
02397     int myImageID = Teuchos::rank(*getComm());
02398     Teuchos::RCP<Teuchos::FancyOStream> out = Teuchos::VerboseObjectBase::getDefaultOStream();
02399     if (myImageID == 0) {
02400       *out << "Entering CrsMatrix::multiply()" << std::endl
02401                 << "Column Map: " << std::endl;
02402     }
02403     *out << *this->getColMap() << std::endl;
02404     if (myImageID == 0) {
02405       *out << "Initial input: " << std::endl;
02406     }
02407     X.describe(*out,Teuchos::VERB_EXTREME);
02408 #endif
02409 
02410     const size_t numVectors = X.getNumVectors();
02411     // because of Views, it is difficult to determine if X and Y point to the same data. 
02412     // however, if they reference the exact same object, we will do the user the favor of copying X into new storage (with a warning)
02413     // we ony need to do this if we have trivial importers; otherwise, we don't actually apply the operator from X into Y
02414     Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal> > importer = graph_->getImporter();
02415     Teuchos::RCP<const Export<LocalOrdinal,GlobalOrdinal> > exporter = graph_->getExporter();
02416     Teuchos::RCP<const MV> Xcopy;
02417     Teuchos::RCP<MV>       Ycopy;
02418     const Kokkos::MultiVector<Scalar,Node> *lclX = &X.getLocalMV();
02419     Kokkos::MultiVector<Scalar,Node>       *lclY = &Y.getLocalMVNonConst();
02420 
02421     // cannot handle non-constant stride right now
02422     if (X.isConstantStride() == false) {
02423       // generate a copy of X 
02424       Xcopy = Teuchos::rcp(new MV(X));
02425       lclX = &Xcopy->getLocalMV();
02426 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP
02427       if (myImageID == 0) *out << "X is not constant stride, duplicating X results in a strided copy" << std::endl;
02428       Xcopy->describe(*out,Teuchos::VERB_EXTREME);
02429 #endif
02430     }
02431 
02432     // cannot handle non-constant stride right now
02433     if (Y.isConstantStride() == false) {
02434       // generate a copy of Y 
02435       Ycopy = Teuchos::rcp(new MV(Y));
02436       lclY = &Ycopy->getLocalMVNonConst();
02437 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP
02438       if (myImageID == 0) *out << "Y is not constant stride, duplicating Y results in a strided copy" << std::endl;
02439       Ycopy->describe(*out,Teuchos::VERB_EXTREME);
02440 #endif
02441     }
02442 
02443     // cannot handle in-situ matvec
02444     if (lclX==lclY && importer==null && exporter==null) {
02445       TPETRA_EFFICIENCY_WARNING(true,std::runtime_error,
02446           "::multiply(X,Y): If X and Y are the same, it necessitates a temporary copy of X, which is inefficient.");
02447       // generate a copy of X 
02448       Xcopy = Teuchos::rcp(new MV(X));
02449       lclX = &Xcopy->getLocalMV();
02450 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP
02451       if (myImageID == 0) *out << "X and Y are co-located, duplicating X results in a strided copy" << std::endl;
02452       Xcopy->describe(*out,Teuchos::VERB_EXTREME);
02453 #endif
02454     }
02455 
02456     if (importer != null) {
02457       if (importMV_ != null && importMV_->getNumVectors() != numVectors) importMV_ = null;
02458       if (importMV_ == null) {
02459         importMV_ = Teuchos::rcp( new MV(getColMap(),numVectors) );
02460       }
02461     }
02462     if (exporter != null) {
02463       if (exportMV_ != null && exportMV_->getNumVectors() != numVectors) exportMV_ = null;
02464       if (exportMV_ == null) {
02465         exportMV_ = Teuchos::rcp( new MV(getRowMap(),numVectors) );
02466       }
02467     }
02468 
02469     // import/export patterns are different depending on transpose.
02470     if (mode == Teuchos::NO_TRANS) {
02471       // If we have a non-trivial importer, we must import elements that are permuted or are on other processors
02472       if (importer != null) {
02473         importMV_->doImport(X, *importer, INSERT);
02474 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP
02475         if (myImageID == 0) {
02476           *out << "Performed import of X using importer..." << std::endl;
02477         }
02478         importMV_->describe(*out,Teuchos::VERB_EXTREME);
02479 #endif
02480         lclX = &importMV_->getLocalMV();
02481       }
02482       // If we have a non-trivial exporter, we must export elements that are permuted or belong to other processors
02483       // We will compute solution into the to-be-exported MV; get a view
02484       if (exporter != null) {
02485         lclY = &exportMV_->getLocalMVNonConst();
02486       }
02487       // Do actual computation
02488       lclMatVec_.template multiply<Scalar,Scalar>(Teuchos::NO_TRANS, Teuchos::ScalarTraits<Scalar>::one(), *lclX, Teuchos::ScalarTraits<Scalar>::zero(), *lclY);
02489 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP
02490       if (myImageID == 0) *out << "Matrix-MV product..." << std::endl;
02491       if (exportMV_ != null) {
02492         exportMV_->describe(*out,Teuchos::VERB_EXTREME);
02493       } 
02494       else {
02495         Y.describe(*out,Teuchos::VERB_EXTREME);
02496       }
02497 #endif
02498       // do the export
02499       if (exporter != null) {
02500         // Make sure target is zero: necessary because we are adding. may need adjusting for alpha,beta apply()
02501         Y.putScalar(ST::zero());  
02502         Y.doExport(*exportMV_, *exporter, ADD); // Fill Y with Values from export vector
02503 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP
02504         if (myImageID == 0) *out << "Output vector after export() using exporter..." << std::endl;
02505         Y.describe(*out,Teuchos::VERB_EXTREME);
02506 #endif
02507       }
02508       // Handle case of rangemap being a local replicated map: in this case, sum contributions from each processor
02509       if (Y.isDistributed() == false) {
02510         Y.reduce();
02511 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP
02512         if (myImageID == 0) *out << "Output vector is local; result after reduce()..." << std::endl;
02513         Y.describe(*out,Teuchos::VERB_EXTREME);
02514 #endif
02515       }
02516     }
02517     else {
02518       // mode == CONJ_TRANS or TRANS
02519       TEST_FOR_EXCEPTION(Teuchos::ScalarTraits<Scalar>::isComplex && mode == Teuchos::TRANS, std::logic_error,
02520           Teuchos::typeName(*this) << "::multiply() does not currently support transposed multiplications for complex scalar types.");
02521       // If we have a non-trivial exporter, we must import elements that are permuted or are on other processors
02522       if (exporter != null) {
02523         exportMV_->doImport(X,*exporter,INSERT);
02524         lclX = &exportMV_->getLocalMV();
02525 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP
02526         if (myImageID == 0) {
02527           *out << "Performed import of X using exporter..." << std::endl;
02528         }
02529         exportMV_->describe(*out,Teuchos::VERB_EXTREME);
02530 #endif
02531       }
02532       // If we have a non-trivial importer, we must export elements that are permuted or belong to other processors
02533       // We will compute colutioni into the to-be-exported MV; get a view
02534       if (importer != null) {
02535         lclY = &importMV_->getLocalMVNonConst();
02536       }
02537       // Do actual computation
02538       lclMatVec_.template multiply<Scalar,Scalar>(Teuchos::CONJ_TRANS, Teuchos::ScalarTraits<Scalar>::one(), *lclX, Teuchos::ScalarTraits<Scalar>::zero(), *lclY);
02539 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP
02540       if (myImageID == 0) *out << "Matrix-MV product..." << std::endl;
02541       if (importMV_ != null) {
02542         importMV_->describe(*out,Teuchos::VERB_EXTREME);
02543       } 
02544       else {
02545         Y.describe(*out,Teuchos::VERB_EXTREME);
02546       }
02547 #endif
02548       if (importer != null) {
02549         Y.putScalar(ST::zero()); // Make sure target is zero: necessary because we are adding. may need adjusting for alpha,beta multiply()
02550         Y.doExport(*importMV_,*importer,ADD);
02551 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP
02552         if (myImageID == 0) *out << "Output vector after export() using importer..." << std::endl;
02553         Y.describe(*out,Teuchos::VERB_EXTREME);
02554 #endif
02555       }
02556       // Handle case of rangemap being a local replicated map: in this case, sum contributions from each processor
02557       if (Y.isDistributed() == false) {
02558         Y.reduce();
02559 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP
02560         if (myImageID == 0) *out << "Output vector is local; result after reduce()..." << std::endl;
02561         Y.describe(*out,Teuchos::VERB_EXTREME);
02562 #endif
02563       }
02564     }
02565   }
02566 
02567 
02570   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
02571   std::string CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::description() const {
02572     std::ostringstream oss;
02573     oss << Teuchos::Describable::description();
02574     if (isFillComplete()) {
02575       oss << "{status = fill complete"
02576           << ", global rows = " << getGlobalNumRows()
02577           << ", global cols = " << getGlobalNumCols()
02578           << ", global num entries = " << getGlobalNumEntries()
02579           << "}";
02580     }
02581     else {
02582       oss << "{status = fill not complete"
02583           << ", global rows = " << getGlobalNumRows()
02584           << "}";
02585     }
02586     return oss.str();
02587   }
02588 
02589 
02592   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
02593   void CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel) const {
02594     using std::endl;
02595     using std::setw;
02596     using Teuchos::VERB_DEFAULT;
02597     using Teuchos::VERB_NONE;
02598     using Teuchos::VERB_LOW;
02599     using Teuchos::VERB_MEDIUM;
02600     using Teuchos::VERB_HIGH;
02601     using Teuchos::VERB_EXTREME;
02602     Teuchos::EVerbosityLevel vl = verbLevel;
02603     if (vl == VERB_DEFAULT) vl = VERB_LOW;
02604     Teuchos::RCP<const Teuchos::Comm<int> > comm = this->getComm();
02605     const int myImageID = comm->getRank(),
02606               numImages = comm->getSize();
02607     size_t width = 1;
02608     for (size_t dec=10; dec<getGlobalNumRows(); dec *= 10) {
02609       ++width;
02610     }
02611     width = std::max<size_t>(width,11) + 2;
02612     Teuchos::OSTab tab(out);
02613     //    none: print nothing
02614     //     low: print O(1) info from node 0
02615     //  medium: print O(P) info, num entries per node
02616     //    high: print O(N) info, num entries per row
02617     // extreme: print O(NNZ) info: print indices and values
02618     // 
02619     // for medium and higher, print constituent objects at specified verbLevel
02620     if (vl != VERB_NONE) {
02621       if (myImageID == 0) out << this->description() << std::endl; 
02622       // O(1) globals, minus what was already printed by description()
02623       if (isFillComplete() && myImageID == 0) {
02624         out << "Global number of diagonals = " << getGlobalNumDiags() << std::endl;
02625         out << "Global max number of entries = " << getGlobalMaxNumRowEntries() << std::endl;
02626       }
02627       // constituent objects
02628       if (vl == VERB_MEDIUM || vl == VERB_HIGH || vl == VERB_EXTREME) {
02629         if (myImageID == 0) out << "\nRow map: " << std::endl;
02630         getRowMap()->describe(out,vl);
02631         if (getColMap() != Teuchos::null) {
02632           if (myImageID == 0) out << "\nColumn map: " << std::endl;
02633           getColMap()->describe(out,vl);
02634         }
02635         if (getDomainMap() != Teuchos::null) {
02636           if (myImageID == 0) out << "\nDomain map: " << std::endl;
02637           getDomainMap()->describe(out,vl);
02638         }
02639         if (getRangeMap() != Teuchos::null) {
02640           if (myImageID == 0) out << "\nRange map: " << std::endl;
02641           getRangeMap()->describe(out,vl);
02642         }
02643       }
02644       // O(P) data
02645       if (vl == VERB_MEDIUM || vl == VERB_HIGH || vl == VERB_EXTREME) {
02646         for (int imageCtr = 0; imageCtr < numImages; ++imageCtr) {
02647           if (myImageID == imageCtr) {
02648             out << "Node ID = " << imageCtr << std::endl
02649                 << "Node number of entries = " << getNodeNumEntries() << std::endl
02650                 << "Node number of diagonals = " << getNodeNumDiags() << std::endl
02651                 << "Node max number of entries = " << getNodeMaxNumRowEntries() << std::endl
02652                 << "Node number of allocated entries = " << graph_->getNodeAllocationSize() << std::endl;
02653           }
02654           comm->barrier();
02655           comm->barrier();
02656           comm->barrier();
02657         }
02658       }
02659       // O(N) and O(NNZ) data
02660       if (vl == VERB_HIGH || vl == VERB_EXTREME) {
02661         for (int imageCtr = 0; imageCtr < numImages; ++imageCtr) {
02662           if (myImageID == imageCtr) {
02663             out << std::setw(width) << "Node ID"
02664                 << std::setw(width) << "Global Row" 
02665                 << std::setw(width) << "Num Entries";
02666             if (vl == VERB_EXTREME) {
02667               out << std::setw(width) << "(Index,Value)";
02668             }
02669             out << std::endl;
02670             for (size_t r=0; r < getNodeNumRows(); ++r) {
02671               const size_t nE = getNumEntriesInLocalRow(r);
02672               GlobalOrdinal gid = getRowMap()->getGlobalElement(r);
02673               out << std::setw(width) << myImageID 
02674                   << std::setw(width) << gid
02675                   << std::setw(width) << nE;
02676               if (vl == VERB_EXTREME) {
02677                 if (isGloballyIndexed()) {
02678                   Teuchos::ArrayRCP<const GlobalOrdinal> rowinds;
02679                   Teuchos::ArrayRCP<const Scalar> rowvals;
02680                   getGlobalRowView(gid,rowinds,rowvals);
02681                   for (size_t j=0; j < nE; ++j) {
02682                     out << " (" << rowinds[j]
02683                         << ", " << rowvals[j]
02684                         << ") ";
02685                   }
02686                 }
02687                 else if (isLocallyIndexed()) {
02688                   Teuchos::ArrayRCP<const LocalOrdinal> rowinds;
02689                   Teuchos::ArrayRCP<const Scalar> rowvals;
02690                   getLocalRowView(r,rowinds,rowvals);
02691                   for (size_t j=0; j < nE; ++j) {
02692                     out << " (" << getColMap()->getGlobalElement(rowinds[j]) 
02693                         << ", " << rowvals[j]
02694                         << ") ";
02695                   }
02696                 }
02697               }
02698               out << std::endl;
02699             }
02700           }
02701           comm->barrier();
02702           comm->barrier();
02703           comm->barrier();
02704         }
02705       }
02706     }
02707   }
02708 
02709 } // namespace Tpetra
02710 
02711 #endif

Generated on Wed May 12 21:40:14 2010 for Tpetra Matrix/Vector Services by  doxygen 1.4.7