Tpetra_CrsMatrixMultiplyOp_def.hpp

00001 //@HEADER
00002 // ************************************************************************
00003 // 
00004 //               Tpetra: Templated Linear Algebra Services Package 
00005 //                 Copyright (2008) Sandia Corporation
00006 // 
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 // 
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //  
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //  
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00025 // 
00026 // ************************************************************************
00027 //@HEADER
00028 
00029 #ifndef TPETRA_CRSMATRIXMULTIPLYOP_DEF_HPP
00030 #define TPETRA_CRSMATRIXMULTIPLYOP_DEF_HPP
00031 
00032 #include "Tpetra_CrsMatrix.hpp"
00033 
00034 namespace Tpetra {
00035 
00036   template <class OpScalar, class MatScalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
00037   CrsMatrixMultiplyOp<OpScalar,MatScalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::CrsMatrixMultiplyOp(const Teuchos::RCP<const CrsMatrix<MatScalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve> > &A) 
00038   : matrix_(A) {
00039     // we don't require that A is fill complete; we will query for the importer/exporter at apply()-time
00040   }
00041 
00042   template <class OpScalar, class MatScalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
00043   CrsMatrixMultiplyOp<OpScalar,MatScalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::~CrsMatrixMultiplyOp() {
00044   }
00045 
00046   template <class OpScalar, class MatScalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
00047   void 
00048   CrsMatrixMultiplyOp<OpScalar,MatScalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::apply(
00049               const MultiVector<OpScalar,LocalOrdinal,GlobalOrdinal,Node> & X_in, MultiVector<OpScalar,LocalOrdinal,GlobalOrdinal,Node> &Y_in,
00050               Teuchos::ETransp mode, OpScalar alpha, OpScalar beta) const 
00051   {
00052     TEST_FOR_EXCEPTION(!matrix_->isFillComplete(), std::runtime_error, 
00053         Teuchos::typeName(*this) << "::apply(): underlying matrix is not fill-complete.");
00054     TEST_FOR_EXCEPTION(X_in.getNumVectors() != Y_in.getNumVectors(), std::runtime_error,
00055         Teuchos::typeName(*this) << "::apply(X,Y): X and Y must have the same number of vectors.");
00056     TEST_FOR_EXCEPTION(Teuchos::ScalarTraits<OpScalar>::isComplex && mode == Teuchos::TRANS, std::logic_error,
00057         Teuchos::typeName(*this) << "::apply() does not currently support transposed multiplications for complex scalar types.");
00058     if (mode == Teuchos::NO_TRANS) {
00059       applyNonTranspose(X_in, Y_in, alpha, beta);
00060     }
00061     else {
00062       applyTranspose(X_in, Y_in, alpha, beta);
00063     }
00064   }
00065 
00067   template <class OpScalar, class MatScalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
00068   void 
00069   CrsMatrixMultiplyOp<OpScalar,MatScalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::applyNonTranspose(
00070       const MultiVector<OpScalar,LocalOrdinal,GlobalOrdinal,Node> & X_in, 
00071             MultiVector<OpScalar,LocalOrdinal,GlobalOrdinal,Node> & Y_in,
00072             OpScalar alpha, OpScalar beta) const 
00073   {
00074     typedef Teuchos::ScalarTraits<OpScalar> ST;
00075     using Teuchos::null;
00076 
00077     const int myImageID = Teuchos::rank(*matrix_->getComm());
00078 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP
00079     Teuchos::RCP<Teuchos::FancyOStream> out = Teuchos::VerboseObjectBase::getDefaultOStream();
00080     if (myImageID == 0) {
00081       *out << "Entering CrsMatrixMultiplyOp::applyNonTranspose()" << std::endl
00082                 << "Column Map: " << std::endl;
00083     }
00084     *out << matrix_->getColMap() << std::endl;
00085     if (myImageID == 0) {
00086       *out << "Initial input: " << std::endl;
00087     }
00088     X_in.describe(*out,Teuchos::VERB_EXTREME);
00089 #endif
00090 
00091     const size_t numVectors = X_in.getNumVectors();
00092     // because of Views, it is difficult to determine if X and Y point to the same data. 
00093     // however, if they reference the exact same object, we will do the user the favor of copying X into new storage (with a warning)
00094     // we ony need to do this if we have trivial importers; otherwise, we don't actually apply the operator from X into Y
00095     const Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Node> > importer = matrix_->getGraph()->getImporter();
00096     const Teuchos::RCP<const Export<LocalOrdinal,GlobalOrdinal,Node> > exporter = matrix_->getGraph()->getExporter();
00097     // access X indirectly, in case we need to create temporary storage
00098     Teuchos::RCP<const MV> X;
00099 
00100     // some parameters for below
00101     const bool Y_is_replicated = !Y_in.isDistributed(),
00102                Y_is_overwritten = (beta == ST::zero());
00103     if (Y_is_replicated && myImageID > 0) {
00104       beta = ST::zero();
00105     }
00106 
00107     // currently, cannot multiply from multivector of non-constant stride
00108     if (X_in.isConstantStride() == false && importer == null) {
00109       // generate a strided copy of X_in 
00110       X = Teuchos::rcp(new MV(X_in));
00111 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP
00112       if (myImageID == 0) *out << "X is not constant stride, duplicating X results in a strided copy" << std::endl;
00113       X->describe(*out,Teuchos::VERB_EXTREME);
00114 #endif
00115     }
00116     else {
00117       // just temporary, so this non-owning RCP is okay
00118       X = Teuchos::rcp(&X_in, false);
00119     }
00120 
00121     // set up import/export temporary multivectors
00122     if (importer != null) {
00123       if (importMV_ != null && importMV_->getNumVectors() != numVectors) importMV_ = null;
00124       if (importMV_ == null) {
00125         importMV_ = Teuchos::rcp( new MV(matrix_->getColMap(),numVectors) );
00126       }
00127     }
00128     if (exporter != null) {
00129       if (exportMV_ != null && exportMV_->getNumVectors() != numVectors) exportMV_ = null;
00130       if (exportMV_ == null) {
00131         exportMV_ = Teuchos::rcp( new MV(matrix_->getRowMap(),numVectors) );
00132       }
00133     }
00134 
00135     // If we have a non-trivial importer, we must import elements that are permuted or are on other processors
00136     if (importer != null) {
00137       importMV_->doImport(X_in, *importer, INSERT);
00138       // multiply out of importMV_
00139       X = importMV_;
00140 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP
00141       if (myImageID == 0) {
00142         *out << "Performed import of X using importer..." << std::endl;
00143       }
00144       X->describe(*out,Teuchos::VERB_EXTREME);
00145 #endif
00146     }
00147 
00148     // If we have a non-trivial exporter, we must export elements that are permuted or go to other processors
00149     // We will compute solution into the to-be-exported MV
00150     if (exporter != null) {
00151       // Do actual computation
00152       matrix_->template multiply<OpScalar,OpScalar>(*X, *exportMV_, Teuchos::NO_TRANS, alpha, ST::zero());
00153 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP
00154       if (myImageID == 0) *out << "Export vector after multiply()..." << std::endl;
00155       exportMV_->describe(*out,Teuchos::VERB_EXTREME);
00156 #endif
00157       if (Y_is_overwritten) Y_in.putScalar(ST::zero());
00158       else                  Y_in.scale(beta);
00159       Y_in.doExport(*exportMV_, *exporter, ADD);
00160     }
00161     // otherwise, multiply into Y
00162     else {
00163       // can't multiply in-situ; can't mutiply into non-strided multivector
00164       if (Y_in.isConstantStride() == false || X.getRawPtr() == &Y_in) {
00165         // generate a strided copy of Y 
00166         MV Y(Y_in);
00167         matrix_->template multiply<OpScalar,OpScalar>(*X, Y, Teuchos::NO_TRANS, alpha, beta);
00168         Y_in = Y;
00169       }
00170       else {
00171         matrix_->template multiply<OpScalar,OpScalar>(*X, Y_in, Teuchos::NO_TRANS, alpha, beta);
00172       }
00173     }
00174 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP
00175     if (myImageID == 0) *out << "Y_in vector after multiply/export..." << std::endl;
00176     Y_in.describe(*out,Teuchos::VERB_EXTREME);
00177 #endif
00178     // Handle case of rangemap being a local replicated map: in this case, sum contributions from each processor
00179     if (Y_is_replicated) {
00180       Y_in.reduce();
00181 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP
00182       if (myImageID == 0) *out << "Output vector is local; result after reduce()..." << std::endl;
00183       Y_in.describe(*out,Teuchos::VERB_EXTREME);
00184 #endif
00185     }
00186   }
00187 
00189   template <class OpScalar, class MatScalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
00190   void 
00191   CrsMatrixMultiplyOp<OpScalar,MatScalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::applyTranspose(
00192                const MultiVector<OpScalar,LocalOrdinal,GlobalOrdinal,Node> & X_in, 
00193                      MultiVector<OpScalar,LocalOrdinal,GlobalOrdinal,Node> & Y_in,
00194                OpScalar alpha, OpScalar beta) const 
00195   {
00196     typedef Teuchos::ScalarTraits<OpScalar> ST;
00197     using Teuchos::null;
00198 
00199     int myImageID = Teuchos::rank(*matrix_->getComm());
00200 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP
00201     Teuchos::RCP<Teuchos::FancyOStream> out = Teuchos::VerboseObjectBase::getDefaultOStream();
00202     if (myImageID == 0) {
00203       *out << "Entering CrsMatrixMultiplyOp::applyTranspose()" << std::endl
00204                 << "Column Map: " << std::endl;
00205     }
00206     *out << matrix_->getColMap() << std::endl;
00207     if (myImageID == 0) {
00208       *out << "Initial input: " << std::endl;
00209     }
00210     X_in.describe(*out,Teuchos::VERB_EXTREME);
00211 #endif
00212 
00213     const size_t numVectors = X_in.getNumVectors();
00214     // because of Views, it is difficult to determine if X and Y point to the same data. 
00215     // however, if they reference the exact same object, we will do the user the favor of copying X into new storage (with a warning)
00216     // we ony need to do this if we have trivial importers; otherwise, we don't actually apply the operator from X into Y
00217     Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Node> > importer = matrix_->getGraph()->getImporter();
00218     Teuchos::RCP<const Export<LocalOrdinal,GlobalOrdinal,Node> > exporter = matrix_->getGraph()->getExporter();
00219     // access X indirectly, in case we need to create temporary storage
00220     Teuchos::RCP<const MV> X;
00221 
00222     // some parameters for below
00223     const bool Y_is_replicated = !Y_in.isDistributed(),
00224                Y_is_overwritten = (beta == ST::zero());
00225     if (Y_is_replicated && myImageID > 0) {
00226       beta = ST::zero();
00227     }
00228 
00229     // currently, cannot multiply from multivector of non-constant stride
00230     if (X_in.isConstantStride() == false && importer==null) {
00231       // generate a strided copy of X_in 
00232       X = Teuchos::rcp(new MV(X_in));
00233 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP
00234       if (myImageID == 0) *out << "X is not constant stride, duplicating X results in a strided copy" << std::endl;
00235       X->describe(*out,Teuchos::VERB_EXTREME);
00236 #endif
00237     }
00238     else {
00239       // just temporary, so this non-owning RCP is okay
00240       X = Teuchos::rcp(&X_in, false);
00241     }
00242 
00243     // set up import/export temporary multivectors
00244     if (importer != null) {
00245       if (importMV_ != null && importMV_->getNumVectors() != numVectors) importMV_ = null;
00246       if (importMV_ == null) {
00247         importMV_ = Teuchos::rcp( new MV(matrix_->getColMap(),numVectors) );
00248       }
00249     }
00250     if (exporter != null) {
00251       if (exportMV_ != null && exportMV_->getNumVectors() != numVectors) exportMV_ = null;
00252       if (exportMV_ == null) {
00253         exportMV_ = Teuchos::rcp( new MV(matrix_->getRowMap(),numVectors) );
00254       }
00255     }
00256 
00257     // If we have a non-trivial exporter, we must import elements that are permuted or are on other processors
00258     if (exporter != null) {
00259       exportMV_->doImport(X_in,*exporter,INSERT);
00260       // multiply out of exportMV_
00261       X = exportMV_;
00262 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP
00263       if (myImageID == 0) {
00264         *out << "Performed import of X using exporter..." << std::endl;
00265       }
00266       X->describe(*out,Teuchos::VERB_EXTREME);
00267 #endif
00268     }
00269 
00270     // If we have a non-trivial importer, we must export elements that are permuted or belong to other processors
00271     // We will compute colutioni into the to-be-exported MV; get a view
00272     if (importer != null) {
00273       // Do actual computation
00274       matrix_->template multiply<OpScalar,OpScalar>(*X, *importMV_, Teuchos::CONJ_TRANS, alpha, ST::zero());
00275 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP
00276       if (myImageID == 0) *out << "Import vector after multiply()..." << std::endl;
00277       importMV_->describe(*out,Teuchos::VERB_EXTREME);
00278 #endif
00279       if (Y_is_overwritten) Y_in.putScalar(ST::zero());
00280       else                  Y_in.scale(beta);
00281       Y_in.doExport(*importMV_,*importer,ADD);
00282     }
00283     // otherwise, multiply into Y
00284     else {
00285       // can't multiply in-situ; can't multiply into non-strided multivector
00286       if (Y_in.isConstantStride() == false || X.getRawPtr() == &Y_in) {
00287         // generate a strided copy of Y
00288         MV Y(Y_in);
00289         matrix_->template multiply<OpScalar,OpScalar>(*X, Y, Teuchos::CONJ_TRANS, alpha, beta);
00290         Y_in = Y;
00291       }
00292       else {
00293         matrix_->template multiply<OpScalar,OpScalar>(*X, Y_in, Teuchos::CONJ_TRANS, alpha, beta);
00294       }
00295     }
00296 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP
00297     if (myImageID == 0) *out << "Y_in vector after multiply/export..." << std::endl;
00298     Y_in.describe(*out,Teuchos::VERB_EXTREME);
00299 #endif
00300     // Handle case of rangemap being a local replicated map: in this case, sum contributions from each processor
00301     if (Y_is_replicated) {
00302       Y_in.reduce();
00303 #ifdef TPETRA_CRSMATRIX_MULTIPLY_DUMP
00304       if (myImageID == 0) *out << "Output vector is local; result after reduce()..." << std::endl;
00305       Y_in.describe(*out,Teuchos::VERB_EXTREME);
00306 #endif
00307     }
00308   }
00309 
00310   template <class OpScalar, class MatScalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
00311   bool 
00312   CrsMatrixMultiplyOp<OpScalar,MatScalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::hasTransposeApply() const {
00313     return true;
00314   }
00315 
00316   template <class OpScalar, class MatScalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
00317   const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > & 
00318   CrsMatrixMultiplyOp<OpScalar,MatScalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::getDomainMap() const {
00319     return matrix_->getDomainMap();
00320   }
00321 
00322   template <class OpScalar, class MatScalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
00323   const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > & 
00324   CrsMatrixMultiplyOp<OpScalar,MatScalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>::getRangeMap() const {
00325     return matrix_->getRangeMap();
00326   }
00327 
00328 }
00329 
00330 template <class OpScalar, class MatScalar, class LocalOrdinal, class GlobalOrdinal, class Node, class LocalMatVec, class LocalMatSolve>
00331 Teuchos::RCP< Tpetra::CrsMatrixMultiplyOp<OpScalar,MatScalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve> >
00332 Tpetra::createCrsMatrixMultiplyOp(const Teuchos::RCP<const Tpetra::CrsMatrix<MatScalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve> > &A) {
00333   return rcp(new Tpetra::CrsMatrixMultiplyOp<OpScalar,MatScalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatVec,LocalMatSolve>(A) );
00334 }
00335 
00336 //
00337 // Explicit instantiation macro
00338 //
00339 // Must be expanded from within the Tpetra namespace!
00340 //
00341 
00342 #define TPETRA_CRSMATRIX_MULTIPLYOP_INSTANT(OPSCALAR,MATSCALAR,LO,GO,NODE) \
00343   \
00344   template class CrsMatrixMultiplyOp< OPSCALAR , MATSCALAR , LO , GO , NODE >; \
00345   \
00346   template Teuchos::RCP< CrsMatrixMultiplyOp<OPSCALAR,MATSCALAR,LO,GO,NODE> > \
00347   createCrsMatrixMultiplyOp(const Teuchos::RCP<const CrsMatrix<MATSCALAR,LO,GO,NODE> > &A); \
00348   \
00349   template void CrsMatrix<MATSCALAR,LO,GO,NODE>::multiply<OPSCALAR,OPSCALAR>( \
00350         const MultiVector<OPSCALAR,LO,GO,NODE> &X, \
00351               MultiVector<OPSCALAR,LO,GO,NODE> &Y, \
00352               Teuchos::ETransp mode,               \
00353               OPSCALAR alpha, OPSCALAR beta        \
00354               ) const; \
00355 
00356 #endif // TPETRA_CRSMATRIXMULTIPLYOP_DEF_HPP

Generated on Tue Jul 13 09:39:06 2010 for Tpetra Matrix/Vector Services by  doxygen 1.4.7