Tpetra_CrsMatrixMultiplyOp_def.hpp

Go to the documentation of this file.
00001 //@HEADER
00002 // ************************************************************************
00003 // 
00004 //               Tpetra: Templated Linear Algebra Services Package 
00005 //                 Copyright (2008) Sandia Corporation
00006 // 
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 // 
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //  
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //  
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00025 // 
00026 // ************************************************************************
00027 //@HEADER
00028 
00029 #ifndef TPETRA_CRSMATRIXMULTIPLYOP_DEF_HPP
00030 #define TPETRA_CRSMATRIXMULTIPLYOP_DEF_HPP
00031 
00032 #include "Tpetra_CrsMatrix.hpp"
00033 
00034 #ifdef DOXYGEN_USE_ONLY
00035   #include "Tpetra_CrsMatrixMultiplyOp_decl.hpp"
00036 #endif
00037 
00043 namespace Tpetra {
00044 
00045   template <class OpScalar, class MatScalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00046   CrsMatrixMultiplyOp<OpScalar,MatScalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::CrsMatrixMultiplyOp(const Teuchos::RCP<const CrsMatrix<MatScalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps> > &A) 
00047   : matrix_(A) {
00048     // we don't require that A is fill complete; we will query for the importer/exporter at apply()-time
00049 #ifdef HAVE_KOKKOS_CUDA_NODE_MEMORY_PROFILING
00050     importTimer_ = Teuchos::TimeMonitor::getNewTimer( "CrsMatrixMultiplyOp::import" );
00051     exportTimer_ = Teuchos::TimeMonitor::getNewTimer( "CrsMatrixMultiplyOp::export" );
00052 #endif
00053   }
00054 
00055   template <class OpScalar, class MatScalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00056   CrsMatrixMultiplyOp<OpScalar,MatScalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::~CrsMatrixMultiplyOp() {
00057   }
00058 
00059   template <class OpScalar, class MatScalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00060   void 
00061   CrsMatrixMultiplyOp<OpScalar,MatScalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::apply(
00062               const MultiVector<OpScalar,LocalOrdinal,GlobalOrdinal,Node> & X_in, 
00063                     MultiVector<OpScalar,LocalOrdinal,GlobalOrdinal,Node> &Y_in,
00064                     Teuchos::ETransp mode, OpScalar alpha, OpScalar beta) const 
00065   {
00066     TEST_FOR_EXCEPTION(!matrix_->isFillComplete(), std::runtime_error, 
00067         Teuchos::typeName(*this) << "::apply(): underlying matrix is not fill-complete.");
00068     TEST_FOR_EXCEPTION(X_in.getNumVectors() != Y_in.getNumVectors(), std::runtime_error,
00069         Teuchos::typeName(*this) << "::apply(X,Y): X and Y must have the same number of vectors.");
00070     TEST_FOR_EXCEPTION(Teuchos::ScalarTraits<OpScalar>::isComplex && mode == Teuchos::TRANS, std::logic_error,
00071         Teuchos::typeName(*this) << "::apply() does not currently support transposed multiplications for complex scalar types.");
00072     if (mode == Teuchos::NO_TRANS) {
00073       applyNonTranspose(X_in, Y_in, alpha, beta);
00074     }
00075     else {
00076       applyTranspose(X_in, Y_in, alpha, beta);
00077     }
00078   }
00079 
00081   template <class OpScalar, class MatScalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00082   void 
00083   CrsMatrixMultiplyOp<OpScalar,MatScalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::applyNonTranspose(
00084       const MultiVector<OpScalar,LocalOrdinal,GlobalOrdinal,Node> & X_in, 
00085             MultiVector<OpScalar,LocalOrdinal,GlobalOrdinal,Node> & Y_in,
00086             OpScalar alpha, OpScalar beta) const 
00087   {
00088     typedef Teuchos::ScalarTraits<OpScalar> ST;
00089     using Teuchos::null;
00090 
00091     const int myImageID = Teuchos::rank(*matrix_->getComm());
00092 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP
00093     Teuchos::RCP<Teuchos::FancyOStream> out = Teuchos::VerboseObjectBase::getDefaultOStream();
00094     if (myImageID == 0) {
00095       *out << "Entering CrsMatrixMultiplyOp::applyNonTranspose()" << std::endl
00096                 << "Column Map: " << std::endl;
00097     }
00098     *out << matrix_->getColMap() << std::endl;
00099     if (myImageID == 0) {
00100       *out << "Initial input: " << std::endl;
00101     }
00102     X_in.describe(*out,Teuchos::VERB_EXTREME);
00103 #endif
00104 
00105     const size_t numVectors = X_in.getNumVectors();
00106     // because of Views, it is difficult to determine if X and Y point to the same data. 
00107     // however, if they reference the exact same object, we will do the user the favor of copying X into new storage (with a warning)
00108     // we ony need to do this if we have trivial importers; otherwise, we don't actually apply the operator from X into Y
00109     const Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Node> > importer = matrix_->getGraph()->getImporter();
00110     const Teuchos::RCP<const Export<LocalOrdinal,GlobalOrdinal,Node> > exporter = matrix_->getGraph()->getExporter();
00111     // access X indirectly, in case we need to create temporary storage
00112     Teuchos::RCP<const MV> X;
00113 
00114     // some parameters for below
00115     const bool Y_is_replicated = !Y_in.isDistributed(),
00116                Y_is_overwritten = (beta == ST::zero());
00117     if (Y_is_replicated && myImageID > 0) {
00118       beta = ST::zero();
00119     }
00120 
00121     // currently, cannot multiply from multivector of non-constant stride
00122     if (X_in.isConstantStride() == false && importer == null) {
00123       // generate a strided copy of X_in 
00124       X = Teuchos::rcp(new MV(X_in));
00125 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP
00126       if (myImageID == 0) *out << "X is not constant stride, duplicating X results in a strided copy" << std::endl;
00127       X->describe(*out,Teuchos::VERB_EXTREME);
00128 #endif
00129     }
00130     else {
00131       // just temporary, so this non-owning RCP is okay
00132       X = Teuchos::rcp(&X_in, false);
00133     }
00134 
00135     // set up import/export temporary multivectors
00136     if (importer != null) {
00137       if (importMV_ != null && importMV_->getNumVectors() != numVectors) importMV_ = null;
00138       if (importMV_ == null) {
00139         importMV_ = Teuchos::rcp( new MV(matrix_->getColMap(),numVectors) );
00140       }
00141     }
00142     if (exporter != null) {
00143       if (exportMV_ != null && exportMV_->getNumVectors() != numVectors) exportMV_ = null;
00144       if (exportMV_ == null) {
00145         exportMV_ = Teuchos::rcp( new MV(matrix_->getRowMap(),numVectors) );
00146       }
00147     }
00148 
00149     // If we have a non-trivial importer, we must import elements that are permuted or are on other processors
00150     if (importer != null) {
00151       {
00152 #ifdef HAVE_KOKKOS_CUDA_NODE_MEMORY_PROFILING
00153         Teuchos::TimeMonitor lcltimer(*importTimer_);
00154 #endif
00155         importMV_->doImport(X_in, *importer, INSERT);
00156       }
00157       // multiply out of importMV_
00158       X = importMV_;
00159 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP
00160       if (myImageID == 0) {
00161         *out << "Performed import of X using importer..." << std::endl;
00162       }
00163       X->describe(*out,Teuchos::VERB_EXTREME);
00164 #endif
00165     }
00166 
00167     // If we have a non-trivial exporter, we must export elements that are permuted or go to other processors
00168     // We will compute solution into the to-be-exported MV
00169     if (exporter != null) {
00170       // Do actual computation
00171       matrix_->template multiply<OpScalar,OpScalar>(*X, *exportMV_, Teuchos::NO_TRANS, alpha, ST::zero());
00172 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP
00173       if (myImageID == 0) *out << "Export vector after multiply()..." << std::endl;
00174       exportMV_->describe(*out,Teuchos::VERB_EXTREME);
00175 #endif
00176       if (Y_is_overwritten) Y_in.putScalar(ST::zero());
00177       else                  Y_in.scale(beta);
00178       {
00179 #ifdef HAVE_KOKKOS_CUDA_NODE_MEMORY_PROFILING
00180         Teuchos::TimeMonitor lcltimer(*exportTimer_);
00181 #endif
00182         Y_in.doExport(*exportMV_, *exporter, ADD);
00183       }
00184     }
00185     // otherwise, multiply into Y
00186     else {
00187       // can't multiply in-situ; can't mutiply into non-strided multivector
00188       if (Y_in.isConstantStride() == false || X.getRawPtr() == &Y_in) {
00189         // generate a strided copy of Y 
00190         MV Y(Y_in);
00191         matrix_->template multiply<OpScalar,OpScalar>(*X, Y, Teuchos::NO_TRANS, alpha, beta);
00192         Y_in = Y;
00193       }
00194       else {
00195         matrix_->template multiply<OpScalar,OpScalar>(*X, Y_in, Teuchos::NO_TRANS, alpha, beta);
00196       }
00197     }
00198 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP
00199     if (myImageID == 0) *out << "Y_in vector after multiply/export..." << std::endl;
00200     Y_in.describe(*out,Teuchos::VERB_EXTREME);
00201 #endif
00202     // Handle case of rangemap being a local replicated map: in this case, sum contributions from each processor
00203     if (Y_is_replicated) {
00204       Y_in.reduce();
00205 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP
00206       if (myImageID == 0) *out << "Output vector is local; result after reduce()..." << std::endl;
00207       Y_in.describe(*out,Teuchos::VERB_EXTREME);
00208 #endif
00209     }
00210   }
00211 
00213   template <class OpScalar, class MatScalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00214   void 
00215   CrsMatrixMultiplyOp<OpScalar,MatScalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::applyTranspose(
00216                const MultiVector<OpScalar,LocalOrdinal,GlobalOrdinal,Node> & X_in, 
00217                      MultiVector<OpScalar,LocalOrdinal,GlobalOrdinal,Node> & Y_in,
00218                OpScalar alpha, OpScalar beta) const 
00219   {
00220     typedef Teuchos::ScalarTraits<OpScalar> ST;
00221     using Teuchos::null;
00222 
00223     int myImageID = Teuchos::rank(*matrix_->getComm());
00224 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP
00225     Teuchos::RCP<Teuchos::FancyOStream> out = Teuchos::VerboseObjectBase::getDefaultOStream();
00226     if (myImageID == 0) {
00227       *out << "Entering CrsMatrixMultiplyOp::applyTranspose()" << std::endl
00228                 << "Column Map: " << std::endl;
00229     }
00230     *out << matrix_->getColMap() << std::endl;
00231     if (myImageID == 0) {
00232       *out << "Initial input: " << std::endl;
00233     }
00234     X_in.describe(*out,Teuchos::VERB_EXTREME);
00235 #endif
00236 
00237     const size_t numVectors = X_in.getNumVectors();
00238     // because of Views, it is difficult to determine if X and Y point to the same data. 
00239     // however, if they reference the exact same object, we will do the user the favor of copying X into new storage (with a warning)
00240     // we ony need to do this if we have trivial importers; otherwise, we don't actually apply the operator from X into Y
00241     Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Node> > importer = matrix_->getGraph()->getImporter();
00242     Teuchos::RCP<const Export<LocalOrdinal,GlobalOrdinal,Node> > exporter = matrix_->getGraph()->getExporter();
00243     // access X indirectly, in case we need to create temporary storage
00244     Teuchos::RCP<const MV> X;
00245 
00246     // some parameters for below
00247     const bool Y_is_replicated = !Y_in.isDistributed(),
00248                Y_is_overwritten = (beta == ST::zero());
00249     if (Y_is_replicated && myImageID > 0) {
00250       beta = ST::zero();
00251     }
00252 
00253     // currently, cannot multiply from multivector of non-constant stride
00254     if (X_in.isConstantStride() == false && importer==null) {
00255       // generate a strided copy of X_in 
00256       X = Teuchos::rcp(new MV(X_in));
00257 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP
00258       if (myImageID == 0) *out << "X is not constant stride, duplicating X results in a strided copy" << std::endl;
00259       X->describe(*out,Teuchos::VERB_EXTREME);
00260 #endif
00261     }
00262     else {
00263       // just temporary, so this non-owning RCP is okay
00264       X = Teuchos::rcp(&X_in, false);
00265     }
00266 
00267     // set up import/export temporary multivectors
00268     if (importer != null) {
00269       if (importMV_ != null && importMV_->getNumVectors() != numVectors) importMV_ = null;
00270       if (importMV_ == null) {
00271         importMV_ = Teuchos::rcp( new MV(matrix_->getColMap(),numVectors) );
00272       }
00273     }
00274     if (exporter != null) {
00275       if (exportMV_ != null && exportMV_->getNumVectors() != numVectors) exportMV_ = null;
00276       if (exportMV_ == null) {
00277         exportMV_ = Teuchos::rcp( new MV(matrix_->getRowMap(),numVectors) );
00278       }
00279     }
00280 
00281     // If we have a non-trivial exporter, we must import elements that are permuted or are on other processors
00282     if (exporter != null) {
00283       {
00284 #ifdef HAVE_KOKKOS_CUDA_NODE_MEMORY_PROFILING
00285         Teuchos::TimeMonitor lcltimer(*importTimer_);
00286 #endif
00287         exportMV_->doImport(X_in,*exporter,INSERT);
00288       }
00289       // multiply out of exportMV_
00290       X = exportMV_;
00291 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP
00292       if (myImageID == 0) {
00293         *out << "Performed import of X using exporter..." << std::endl;
00294       }
00295       X->describe(*out,Teuchos::VERB_EXTREME);
00296 #endif
00297     }
00298 
00299 
00300     // If we have a non-trivial importer, we must export elements that are permuted or belong to other processors
00301     // We will compute solution into the to-be-exported MV; get a view
00302     if (importer != null) {
00303       // Do actual computation
00304       matrix_->template multiply<OpScalar,OpScalar>(*X, *importMV_, Teuchos::CONJ_TRANS, alpha, ST::zero());
00305 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP
00306       if (myImageID == 0) *out << "Import vector after multiply()..." << std::endl;
00307       importMV_->describe(*out,Teuchos::VERB_EXTREME);
00308 #endif
00309       if (Y_is_overwritten) Y_in.putScalar(ST::zero());
00310       else                  Y_in.scale(beta);
00311       //
00312       {
00313 #ifdef HAVE_KOKKOS_CUDA_NODE_MEMORY_PROFILING
00314         Teuchos::TimeMonitor lcltimer(*importTimer_);
00315 #endif
00316         Y_in.doExport(*importMV_,*importer,ADD);
00317       }
00318     }
00319     // otherwise, multiply into Y
00320     else {
00321       // can't multiply in-situ; can't multiply into non-strided multivector
00322       if (Y_in.isConstantStride() == false || X.getRawPtr() == &Y_in) {
00323         // generate a strided copy of Y
00324         MV Y(Y_in);
00325         matrix_->template multiply<OpScalar,OpScalar>(*X, Y, Teuchos::CONJ_TRANS, alpha, beta);
00326         Y_in = Y;
00327       }
00328       else {
00329         matrix_->template multiply<OpScalar,OpScalar>(*X, Y_in, Teuchos::CONJ_TRANS, alpha, beta);
00330       }
00331     }
00332 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP
00333     if (myImageID == 0) *out << "Y_in vector after multiply/export..." << std::endl;
00334     Y_in.describe(*out,Teuchos::VERB_EXTREME);
00335 #endif
00336     // Handle case of rangemap being a local replicated map: in this case, sum contributions from each processor
00337     if (Y_is_replicated) {
00338       Y_in.reduce();
00339 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP
00340       if (myImageID == 0) *out << "Output vector is local; result after reduce()..." << std::endl;
00341       Y_in.describe(*out,Teuchos::VERB_EXTREME);
00342 #endif
00343     }
00344   }
00345 
00346   template <class OpScalar, class MatScalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00347   bool 
00348   CrsMatrixMultiplyOp<OpScalar,MatScalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::hasTransposeApply() const {
00349     return true;
00350   }
00351 
00352   template <class OpScalar, class MatScalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00353   const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > & 
00354   CrsMatrixMultiplyOp<OpScalar,MatScalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getDomainMap() const {
00355     return matrix_->getDomainMap();
00356   }
00357 
00358   template <class OpScalar, class MatScalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00359   const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > & 
00360   CrsMatrixMultiplyOp<OpScalar,MatScalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::getRangeMap() const {
00361     return matrix_->getRangeMap();
00362   }
00363 
00364 } // Tpetra namespace
00365 
00366 template <class OpScalar, class MatScalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatOps>
00367 Teuchos::RCP< Tpetra::CrsMatrixMultiplyOp<OpScalar,MatScalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps> >
00368 Tpetra::createCrsMatrixMultiplyOp(const Teuchos::RCP<const Tpetra::CrsMatrix<MatScalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps> > &A) {
00369   return Teuchos::rcp(new Tpetra::CrsMatrixMultiplyOp<OpScalar,MatScalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>(A) );
00370 }
00371 
00372 //
00373 // Explicit instantiation macro
00374 //
00375 // Must be expanded from within the Tpetra namespace!
00376 //
00377 
00379 #define TPETRA_CRSMATRIX_MULTIPLYOP_INSTANT(OPSCALAR,MATSCALAR,LO,GO,NODE) \
00380   \
00381   template class CrsMatrixMultiplyOp< OPSCALAR , MATSCALAR , LO , GO , NODE >; \
00382   \
00383   template Teuchos::RCP< Tpetra::CrsMatrixMultiplyOp<OPSCALAR,MATSCALAR,LO,GO,NODE> > \
00384   createCrsMatrixMultiplyOp(const Teuchos::RCP<const Tpetra::CrsMatrix<MATSCALAR,LO,GO,NODE> > &A); \
00385   \
00386   template void CrsMatrix<MATSCALAR,LO,GO,NODE>::multiply<OPSCALAR,OPSCALAR>( \
00387         const MultiVector<OPSCALAR,LO,GO,NODE> &X, \
00388               MultiVector<OPSCALAR,LO,GO,NODE> &Y, \
00389               Teuchos::ETransp mode,               \
00390               OPSCALAR alpha, OPSCALAR beta        \
00391               ) const; \
00392 
00393 #endif // TPETRA_CRSMATRIXMULTIPLYOP_DEF_HPP
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
Generated on Wed Apr 13 10:21:41 2011 for Tpetra Matrix/Vector Services by  doxygen 1.6.3