Tpetra Matrix/Vector Services Version of the Day
TpetraExt_MatrixMatrix_def.hpp
Go to the documentation of this file.
00001 // @HEADER
00002 // ***********************************************************************
00003 //
00004 //          Tpetra: Templated Linear Algebra Services Package
00005 //                 Copyright (2008) Sandia Corporation
00006 //
00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00008 // the U.S. Government retains certain rights in this software.
00009 //
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00038 //
00039 // ************************************************************************
00040 // @HEADER
00041 
00042 #ifndef TPETRA_MATRIXMATRIX_DEF_HPP
00043 #define TPETRA_MATRIXMATRIX_DEF_HPP
00044 
00045 #include "TpetraExt_MatrixMatrix_decl.hpp"
00046 #include "Teuchos_VerboseObject.hpp"
00047 #include "Teuchos_Array.hpp"
00048 #include "Tpetra_Util.hpp"
00049 #include "Tpetra_ConfigDefs.hpp"
00050 #include "Tpetra_CrsMatrix.hpp"
00051 #include "TpetraExt_MMHelpers_def.hpp"
00052 #include "Tpetra_RowMatrixTransposer.hpp"
00053 #include "Tpetra_ConfigDefs.hpp"
00054 #include "Tpetra_Map.hpp"
00055 #include "Tpetra_Export.hpp"
00056 #include "Tpetra_Import_Util.hpp"
00057 #include "Tpetra_Import_Util2.hpp"
00058 #include <algorithm>
00059 #include "Teuchos_FancyOStream.hpp"
00060 
00061 //#define COMPUTE_MMM_STATISTICS
00062 
00063 
00069 namespace Tpetra {
00070 
00071 
00072 namespace MatrixMatrix{
00073 
00074 
00075 template <class Scalar,
00076            class LocalOrdinal,
00077            class GlobalOrdinal,
00078            class Node,
00079            class SpMatOps >
00080 void Multiply(
00081   const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, SpMatOps>& A,
00082   bool transposeA,
00083   const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, SpMatOps>& B,
00084   bool transposeB,
00085   CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, SpMatOps>& C,
00086   bool call_FillComplete_on_result)
00087 {
00088 #ifdef HAVE_TPETRA_MMM_TIMINGS
00089   using Teuchos::TimeMonitor;
00090   Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("TpetraExt: MMM All Setup")));
00091 #endif
00092 
00093   //TEUCHOS_FUNC_TIME_MONITOR_DIFF("My Matrix Mult", mmm_multiply);
00094   typedef CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, SpMatOps> Matrix_t;
00095   //
00096   //This method forms the matrix-matrix product C = op(A) * op(B), where
00097   //op(A) == A   if transposeA is false,
00098   //op(A) == A^T if transposeA is true,
00099   //and similarly for op(B).
00100   //
00101 
00102   //A and B should already be Filled.
00103   //(Should we go ahead and call FillComplete() on them if necessary?
00104   // or error out? For now, we choose to error out.)
00105   TEUCHOS_TEST_FOR_EXCEPTION(!A.isFillComplete(), std::runtime_error, "MatrixMatrix::Multiply(): Matrix A is not fill complete.");
00106   TEUCHOS_TEST_FOR_EXCEPTION(!B.isFillComplete(), std::runtime_error, "MatrixMatrix::Multiply(): Matrix B is not fill complete.");
00107   TEUCHOS_TEST_FOR_EXCEPTION(C.isLocallyIndexed() , std::runtime_error, "MatrixMatrix::Multiply(): Result matrix C must not be locally indexed.");
00108 
00109   //Convience typedefs
00110   typedef CrsMatrixStruct<
00111     Scalar,
00112     LocalOrdinal,
00113     GlobalOrdinal,
00114     Node,
00115     SpMatOps> CrsMatrixStruct_t;
00116   typedef Map<LocalOrdinal, GlobalOrdinal, Node> Map_t;
00117 
00118   RCP<const Matrix_t > Aprime = null;
00119   RCP<const Matrix_t > Bprime = null;
00120 
00121   // Is this a "clean" matrix
00122   bool NewFlag=!C.getGraph()->isLocallyIndexed() && !C.getGraph()->isGloballyIndexed();
00123 
00124   bool use_optimized_ATB=false;
00125   if(transposeA && !transposeB && call_FillComplete_on_result && NewFlag) {
00126     use_optimized_ATB=true;
00127   }
00128 #ifdef USE_OLD_TRANSPOSE // NOTE: For Grey Ballard's use.  Remove this later.
00129   use_optimized_ATB=false;
00130 #endif
00131 
00132 
00133   if(!use_optimized_ATB && transposeA) {
00134     RowMatrixTransposer<Scalar, LocalOrdinal, GlobalOrdinal, Node, SpMatOps> at (Teuchos::rcpFromRef (A));
00135     Aprime = at.createTranspose();
00136   }
00137   else{
00138     Aprime = rcpFromRef(A);
00139   }
00140 
00141   if(transposeB){
00142     RowMatrixTransposer<Scalar, LocalOrdinal, GlobalOrdinal, Node, SpMatOps> bt (Teuchos::rcpFromRef (B));
00143     Bprime=bt.createTranspose();
00144   }
00145   else{
00146     Bprime = rcpFromRef(B);
00147   }
00148 
00149 
00150   //now check size compatibility
00151   global_size_t numACols = A.getDomainMap()->getGlobalNumElements();
00152   global_size_t numBCols = B.getDomainMap()->getGlobalNumElements();
00153   global_size_t Aouter = transposeA ? numACols : A.getGlobalNumRows();
00154   global_size_t Bouter = transposeB ? B.getGlobalNumRows() : numBCols;
00155   global_size_t Ainner = transposeA ? A.getGlobalNumRows() : numACols;
00156   global_size_t Binner = transposeB ? numBCols : B.getGlobalNumRows();
00157   TEUCHOS_TEST_FOR_EXCEPTION(!A.isFillComplete(), std::runtime_error,
00158     "MatrixMatrix::Multiply: ERROR, inner dimensions of op(A) and op(B) "
00159     "must match for matrix-matrix product. op(A) is "
00160     <<Aouter<<"x"<<Ainner << ", op(B) is "<<Binner<<"x"<<Bouter<<std::endl);
00161 
00162   //The result matrix C must at least have a row-map that reflects the
00163   //correct row-size. Don't check the number of columns because rectangular
00164   //matrices which were constructed with only one map can still end up
00165   //having the correct capacity and dimensions when filled.
00166   TEUCHOS_TEST_FOR_EXCEPTION(Aouter > C.getGlobalNumRows(), std::runtime_error,
00167     "MatrixMatrix::Multiply: ERROR, dimensions of result C must "
00168     "match dimensions of op(A) * op(B). C has "<<C.getGlobalNumRows()
00169      << " rows, should have at least "<<Aouter << std::endl);
00170 
00171   //It doesn't matter whether C is already Filled or not. If it is already
00172   //Filled, it must have space allocated for the positions that will be
00173   //referenced in forming C = op(A)*op(B). If it doesn't have enough space,
00174   //we'll error out later when trying to store result values.
00175 
00176   // CGB: However, matrix must be in active-fill
00177   TEUCHOS_TEST_FOR_EXCEPT( C.isFillActive() == false );
00178 
00179   //We're going to need to import remotely-owned sections of A and/or B
00180   //if more than 1 processor is performing this run, depending on the scenario.
00181   int numProcs = A.getComm()->getSize();
00182 
00183   //Declare a couple of structs that will be used to hold views of the data
00184   //of A and B, to be used for fast access during the matrix-multiplication.
00185   CrsMatrixStruct_t Aview;
00186   CrsMatrixStruct_t Bview;
00187 
00188   RCP<const Map_t > targetMap_A = Aprime->getRowMap();
00189   RCP<const Map_t > targetMap_B = Bprime->getRowMap();
00190 
00191 #ifdef HAVE_TPETRA_MMM_TIMINGS
00192   MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("TpetraExt: MMM All I&X")));
00193 #endif
00194 
00195   //Now import any needed remote rows and populate the Aview struct.
00196   // NOTE: We assert that an import isn't needed --- since we do the transpose above to handle that.
00197   if(!use_optimized_ATB) {
00198     RCP<const Import<LocalOrdinal,GlobalOrdinal, Node> > dummyImporter;
00199     MMdetails::import_and_extract_views(*Aprime, targetMap_A, Aview,dummyImporter,true);
00200   }
00201 
00202 
00203   //We will also need local access to all rows of B that correspond to the
00204   //column-map of op(A).
00205   if (numProcs > 1) {
00206     targetMap_B = Aprime->getColMap(); //colmap_op_A;
00207   }
00208 
00209   //Now import any needed remote rows and populate the Bview struct.
00210   if(!use_optimized_ATB)
00211     MMdetails::import_and_extract_views(*Bprime, targetMap_B, Bview, Aprime->getGraph()->getImporter());
00212 
00213 #ifdef HAVE_TPETRA_MMM_TIMINGS
00214   MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("TpetraExt: MMM All Multiply")));
00215 #endif
00216 
00217 
00218   //Now call the appropriate method to perform the actual multiplication.
00219   CrsWrapper_CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, SpMatOps> crsmat(C);
00220 
00221   if(use_optimized_ATB) {
00222     MMdetails::mult_AT_B_newmatrix(A, B, C);
00223   }
00224   else if(call_FillComplete_on_result && NewFlag ) {
00225     MMdetails::mult_A_B_newmatrix(Aview, Bview, C);
00226   }
00227   else {
00228     MMdetails::mult_A_B(Aview, Bview, crsmat);
00229 #ifdef HAVE_TPETRA_MMM_TIMINGS
00230     MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("TpetraExt: MMM All FillComplete")));
00231 #endif
00232     if (call_FillComplete_on_result) {
00233       //We'll call FillComplete on the C matrix before we exit, and give
00234       //it a domain-map and a range-map.
00235       //The domain-map will be the domain-map of B, unless
00236       //op(B)==transpose(B), in which case the range-map of B will be used.
00237       //The range-map will be the range-map of A, unless
00238       //op(A)==transpose(A), in which case the domain-map of A will be used.
00239       if (!C.isFillComplete()) {
00240         C.fillComplete(Bprime->getDomainMap(), Aprime->getRangeMap());
00241       }
00242     }
00243   }
00244 
00245 }
00246 
00247 
00248 template <class Scalar,
00249           class LocalOrdinal,
00250           class GlobalOrdinal,
00251           class Node,
00252           class SpMatOps >
00253 void Jacobi(Scalar omega,
00254             const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> & Dinv,
00255             const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, SpMatOps>& A,
00256             const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, SpMatOps>& B,
00257             CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, SpMatOps>& C,
00258             bool call_FillComplete_on_result)
00259 {
00260 #ifdef HAVE_TPETRA_MMM_TIMINGS
00261   using Teuchos::TimeMonitor;
00262   Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("TpetraExt: Jacobi All Setup")));
00263 #endif
00264   typedef CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, SpMatOps> Matrix_t;
00265 
00266   //A and B should already be Filled.
00267   //(Should we go ahead and call FillComplete() on them if necessary?
00268   // or error out? For now, we choose to error out.)
00269   TEUCHOS_TEST_FOR_EXCEPTION(!A.isFillComplete(), std::runtime_error, "MatrixMatrix::Multiply(): Matrix A is not fill complete.");
00270   TEUCHOS_TEST_FOR_EXCEPTION(!B.isFillComplete(), std::runtime_error, "MatrixMatrix::Multiply(): Matrix B is not fill complete.");
00271   TEUCHOS_TEST_FOR_EXCEPTION(C.isLocallyIndexed() , std::runtime_error, "MatrixMatrix::Multiply(): Result matrix C must not be locally indexed.");
00272 
00273   //Convience typedefs
00274   typedef CrsMatrixStruct<
00275     Scalar,
00276     LocalOrdinal,
00277     GlobalOrdinal,
00278     Node,
00279     SpMatOps> CrsMatrixStruct_t;
00280   typedef Map<LocalOrdinal, GlobalOrdinal, Node> Map_t;
00281 
00282   RCP<const Matrix_t > Aprime = rcpFromRef(A);
00283   RCP<const Matrix_t > Bprime = rcpFromRef(B);
00284 
00285   //now check size compatibility
00286   global_size_t numACols = A.getDomainMap()->getGlobalNumElements();
00287   global_size_t numBCols = B.getDomainMap()->getGlobalNumElements();
00288   global_size_t Aouter = A.getGlobalNumRows();
00289   global_size_t Bouter = numBCols;
00290   global_size_t Ainner = numACols;
00291   global_size_t Binner = B.getGlobalNumRows();
00292   TEUCHOS_TEST_FOR_EXCEPTION(!A.isFillComplete(), std::runtime_error,
00293     "MatrixMatrix::Jacobi: ERROR, inner dimensions of op(A) and op(B) "
00294     "must match for matrix-matrix product. op(A) is "
00295     <<Aouter<<"x"<<Ainner << ", op(B) is "<<Binner<<"x"<<Bouter<<std::endl);
00296 
00297   //The result matrix C must at least have a row-map that reflects the
00298   //correct row-size. Don't check the number of columns because rectangular
00299   //matrices which were constructed with only one map can still end up
00300   //having the correct capacity and dimensions when filled.
00301   TEUCHOS_TEST_FOR_EXCEPTION(Aouter > C.getGlobalNumRows(), std::runtime_error,
00302     "MatrixMatrix::Multiply: ERROR, dimensions of result C must "
00303     "match dimensions of op(A) * op(B). C has "<<C.getGlobalNumRows()
00304      << " rows, should have at least "<<Aouter << std::endl);
00305 
00306   //It doesn't matter whether C is already Filled or not. If it is already
00307   //Filled, it must have space allocated for the positions that will be
00308   //referenced in forming C = op(A)*op(B). If it doesn't have enough space,
00309   //we'll error out later when trying to store result values.
00310 
00311   // CGB: However, matrix must be in active-fill
00312   TEUCHOS_TEST_FOR_EXCEPT( C.isFillActive() == false );
00313 
00314   //We're going to need to import remotely-owned sections of A and/or B
00315   //if more than 1 processor is performing this run, depending on the scenario.
00316   int numProcs = A.getComm()->getSize();
00317 
00318   //Declare a couple of structs that will be used to hold views of the data
00319   //of A and B, to be used for fast access during the matrix-multiplication.
00320   CrsMatrixStruct_t Aview;
00321   CrsMatrixStruct_t Bview;
00322 
00323   RCP<const Map_t > targetMap_A = Aprime->getRowMap();
00324   RCP<const Map_t > targetMap_B = Bprime->getRowMap();
00325 
00326 #ifdef HAVE_TPETRA_MMM_TIMINGS
00327   MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("TpetraExt: Jacobi All I&X")));
00328 #endif
00329 
00330   //Now import any needed remote rows and populate the Aview struct.
00331   MMdetails::import_and_extract_views(*Aprime, targetMap_A, Aview);
00332 
00333   //We will also need local access to all rows of B that correspond to the
00334   //column-map of op(A).
00335   if (numProcs > 1) {
00336     targetMap_B = Aprime->getColMap(); //colmap_op_A;
00337   }
00338 
00339   //Now import any needed remote rows and populate the Bview struct.
00340   MMdetails::import_and_extract_views(*Bprime, targetMap_B, Bview, Aprime->getGraph()->getImporter());
00341 
00342 #ifdef HAVE_TPETRA_MMM_TIMINGS
00343   MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("TpetraExt: Jacobi All Multiply")));
00344 #endif
00345 
00346 
00347   //Now call the appropriate method to perform the actual multiplication.
00348   CrsWrapper_CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, SpMatOps> crsmat(C);
00349 
00350   // Is this a "clean" matrix
00351   bool NewFlag=!C.getGraph()->isLocallyIndexed() && !C.getGraph()->isGloballyIndexed();
00352 
00353   if(call_FillComplete_on_result && NewFlag ) {
00354     MMdetails::jacobi_A_B_newmatrix(omega,Dinv,Aview, Bview, C);
00355   }
00356   else {
00357     TEUCHOS_TEST_FOR_EXCEPTION(
00358       true, std::runtime_error,
00359       "jacobi_A_B_general not implemented");
00360     // FIXME (mfh 03 Apr 2014) This statement is unreachable, so I'm
00361     // commenting it out.
00362 // #ifdef HAVE_TPETRA_MMM_TIMINGS
00363 //     MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("TpetraExt: Jacobi FillComplete")));
00364 // #endif
00365     // FIXME (mfh 03 Apr 2014) This statement is unreachable, so I'm
00366     // commenting it out.
00367     // if (call_FillComplete_on_result) {
00368     //   //We'll call FillComplete on the C matrix before we exit, and give
00369     //   //it a domain-map and a range-map.
00370     //   //The domain-map will be the domain-map of B, unless
00371     //   //op(B)==transpose(B), in which case the range-map of B will be used.
00372     //   //The range-map will be the range-map of A, unless
00373     //   //op(A)==transpose(A), in which case the domain-map of A will be used.
00374     //   if (!C.isFillComplete()) {
00375     //     C.fillComplete(Bprime->getDomainMap(), Aprime->getRangeMap());
00376     //   }
00377     // }
00378   }
00379 }
00380 
00381 
00382 
00383 template <class Scalar,
00384           class LocalOrdinal,
00385           class GlobalOrdinal,
00386           class Node,
00387           class SpMatOps >
00388 void Add(
00389   const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, SpMatOps>& A,
00390   bool transposeA,
00391   Scalar scalarA,
00392   CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, SpMatOps>& B,
00393   Scalar scalarB )
00394 {
00395   TEUCHOS_TEST_FOR_EXCEPTION(!A.isFillComplete(), std::runtime_error,
00396     "MatrixMatrix::Add ERROR, input matrix A.isFillComplete() is false; it is required to be true. (Result matrix B is not required to be isFillComplete()).");
00397   TEUCHOS_TEST_FOR_EXCEPTION(B.isFillComplete() , std::runtime_error,
00398     "MatrixMatrix::Add ERROR, input matrix B must not be fill complete!");
00399   TEUCHOS_TEST_FOR_EXCEPTION(B.isStaticGraph() , std::runtime_error,
00400     "MatrixMatrix::Add ERROR, input matrix B must not have static graph!");
00401   TEUCHOS_TEST_FOR_EXCEPTION(B.isLocallyIndexed() , std::runtime_error,
00402     "MatrixMatrix::Add ERROR, input matrix B must not be locally indexed!");
00403   TEUCHOS_TEST_FOR_EXCEPTION(B.getProfileType()!=DynamicProfile, std::runtime_error,
00404     "MatrixMatrix::Add ERROR, input matrix B must have a dynamic profile!");
00405   //Convience typedef
00406   typedef CrsMatrix<
00407     Scalar,
00408     LocalOrdinal,
00409     GlobalOrdinal,
00410     Node,
00411     SpMatOps> CrsMatrix_t;
00412   RCP<const CrsMatrix_t> Aprime = null;
00413   if( transposeA ){
00414           RowMatrixTransposer<Scalar, LocalOrdinal, GlobalOrdinal, Node, SpMatOps> theTransposer(Teuchos::rcpFromRef (A));
00415     Aprime = theTransposer.createTranspose();
00416   }
00417   else{
00418     Aprime = rcpFromRef(A);
00419   }
00420   size_t a_numEntries;
00421   Array<GlobalOrdinal> a_inds(A.getNodeMaxNumRowEntries());
00422   Array<Scalar> a_vals(A.getNodeMaxNumRowEntries());
00423   GlobalOrdinal row;
00424 
00425   if(scalarB != ScalarTraits<Scalar>::one()){
00426     B.scale(scalarB);
00427   }
00428 
00429   bool bFilled = B.isFillComplete();
00430   size_t numMyRows = B.getNodeNumRows();
00431   if(scalarA != ScalarTraits<Scalar>::zero()){
00432     for(LocalOrdinal i = 0; (size_t)i < numMyRows; ++i){
00433       row = B.getRowMap()->getGlobalElement(i);
00434       Aprime->getGlobalRowCopy(row, a_inds(), a_vals(), a_numEntries);
00435       if(scalarA != ScalarTraits<Scalar>::one()){
00436         for(size_t j =0; j<a_numEntries; ++j){
00437           a_vals[j] *= scalarA;
00438         }
00439       }
00440       if(bFilled){
00441         B.sumIntoGlobalValues(row, a_inds(0,a_numEntries), a_vals(0,a_numEntries));
00442       }
00443       else{
00444         B.insertGlobalValues(row, a_inds(0,a_numEntries), a_vals(0,a_numEntries));
00445       }
00446 
00447     }
00448   }
00449 }
00450 
00451 
00452 template <class Scalar,
00453           class LocalOrdinal,
00454           class GlobalOrdinal,
00455           class Node,
00456           class SpMatOps >
00457 Teuchos::RCP<CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, SpMatOps> >
00458 add (const Scalar& alpha,
00459      const bool transposeA,
00460      const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, SpMatOps>& A,
00461      const Scalar& beta,
00462      const bool transposeB,
00463      const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, SpMatOps>& B,
00464      const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >& domainMap,
00465      const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >& rangeMap,
00466      const Teuchos::RCP<Teuchos::ParameterList>& params)
00467 {
00468   using Teuchos::RCP;
00469   using Teuchos::rcp;
00470   using Teuchos::rcpFromRef;
00471   using Teuchos::rcp_dynamic_cast;
00472   using Teuchos::rcp_implicit_cast;
00473   typedef RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> row_matrix_type;
00474   typedef CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, SpMatOps> crs_matrix_type;
00475   typedef RowMatrixTransposer<Scalar, LocalOrdinal, GlobalOrdinal, Node, SpMatOps> transposer_type;
00476 
00477   TEUCHOS_TEST_FOR_EXCEPTION(
00478     ! A.isFillComplete () || ! B.isFillComplete (), std::invalid_argument,
00479     "Tpetra::MatrixMatrix::add: A and B must both be fill complete.");
00480 
00481 #ifdef HAVE_TPETRA_DEBUG
00482   // The matrices don't have domain or range Maps unless they are fill complete.
00483   if (A.isFillComplete () && B.isFillComplete ()) {
00484     const bool domainMapsSame =
00485       (! transposeA && ! transposeB && ! A.getDomainMap ()->isSameAs (* (B.getDomainMap ()))) ||
00486       (! transposeA && transposeB && ! A.getDomainMap ()->isSameAs (* (B.getRangeMap ()))) ||
00487       (transposeA && ! transposeB && ! A.getRangeMap ()->isSameAs (* (B.getDomainMap ())));
00488     TEUCHOS_TEST_FOR_EXCEPTION(
00489       domainMapsSame, std::invalid_argument,
00490       "Tpetra::MatrixMatrix::add: The domain Maps of Op(A) and Op(B) are not the same.");
00491 
00492     const bool rangeMapsSame =
00493       (! transposeA && ! transposeB && ! A.getRangeMap ()->isSameAs (* (B.getRangeMap ()))) ||
00494       (! transposeA && transposeB && ! A.getRangeMap ()->isSameAs (* (B.getDomainMap ()))) ||
00495       (transposeA && ! transposeB && ! A.getDomainMap ()->isSameAs (* (B.getRangeMap ())));
00496     TEUCHOS_TEST_FOR_EXCEPTION(
00497       rangeMapsSame, std::invalid_argument,
00498       "Tpetra::MatrixMatrix::add: The range Maps of Op(A) and Op(B) are not the same.");
00499   }
00500 #endif // HAVE_TPETRA_DEBUG
00501 
00502   // Form the explicit transpose of A if necessary.
00503   RCP<const crs_matrix_type> Aprime;
00504   if (transposeA) {
00505     transposer_type theTransposer (rcpFromRef (A));
00506     Aprime = theTransposer.createTranspose ();
00507   } else {
00508     Aprime = rcpFromRef (A);
00509   }
00510 
00511 #ifdef HAVE_TPETRA_DEBUG
00512   TEUCHOS_TEST_FOR_EXCEPTION(Aprime.is_null (), std::logic_error,
00513     "Tpetra::MatrixMatrix::Add: Failed to compute Op(A).  "
00514     "Please report this bug to the Tpetra developers.");
00515 #endif // HAVE_TPETRA_DEBUG
00516 
00517   // Form the explicit transpose of B if necessary.
00518   RCP<const crs_matrix_type> Bprime;
00519   if (transposeB) {
00520     transposer_type theTransposer (rcpFromRef (B));
00521     Bprime = theTransposer.createTranspose ();
00522   } else {
00523     Bprime = rcpFromRef (B);
00524   }
00525 
00526 #ifdef HAVE_TPETRA_DEBUG
00527   TEUCHOS_TEST_FOR_EXCEPTION(Bprime.is_null (), std::logic_error,
00528     "Tpetra::MatrixMatrix::Add: Failed to compute Op(B).  "
00529     "Please report this bug to the Tpetra developers.");
00530 
00531   TEUCHOS_TEST_FOR_EXCEPTION(
00532     ! Aprime->isFillComplete () || ! Bprime->isFillComplete (), std::invalid_argument,
00533     "Tpetra::MatrixMatrix::add: Aprime and Bprime must both be fill complete.  "
00534     "Please report this bug to the Tpetra developers.");
00535 #endif // HAVE_TPETRA_DEBUG
00536 
00537 
00538   RCP<row_matrix_type> C =
00539     Bprime->add (alpha, *rcp_implicit_cast<const row_matrix_type> (Aprime),
00540                  beta, domainMap, rangeMap, params);
00541   // FIXME (mfh 09 May 2013) This cast won't work if SpMatOps is not the default.
00542   return rcp_dynamic_cast<crs_matrix_type> (C);
00543 }
00544 
00545 
00546 
00547 
00548 template <class Scalar,
00549           class LocalOrdinal,
00550           class GlobalOrdinal,
00551           class Node,
00552           class SpMatOps>
00553 void Add(
00554   const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, SpMatOps>& A,
00555   bool transposeA,
00556   Scalar scalarA,
00557   const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, SpMatOps>& B,
00558   bool transposeB,
00559   Scalar scalarB,
00560   RCP<CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, SpMatOps> > C)
00561 {
00562   using Teuchos::as;
00563   using Teuchos::Array;
00564   using Teuchos::ArrayRCP;
00565   using Teuchos::ArrayView;
00566   using Teuchos::RCP;
00567   using Teuchos::rcp;
00568   using Teuchos::rcp_dynamic_cast;
00569   using Teuchos::rcpFromRef;
00570   using Teuchos::tuple;
00571   using std::endl;
00572   //  typedef typename ArrayView<const Scalar>::size_type size_type;
00573   typedef Teuchos::ScalarTraits<Scalar> STS;
00574   typedef Map<LocalOrdinal, GlobalOrdinal, Node> map_type;
00575   //  typedef Import<LocalOrdinal, GlobalOrdinal, Node> import_type;
00576   //  typedef RowGraph<LocalOrdinal, GlobalOrdinal, Node> row_graph_type;
00577   //  typedef CrsGraph<LocalOrdinal, GlobalOrdinal, Node, SpMatOps> crs_graph_type;
00578   typedef CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, SpMatOps> crs_matrix_type;
00579   typedef RowMatrixTransposer<Scalar, LocalOrdinal, GlobalOrdinal, Node, SpMatOps> transposer_type;
00580 
00581   TEUCHOS_TEST_FOR_EXCEPTION(C.is_null (), std::logic_error,
00582     "Tpetra::MatrixMatrix::Add: The case C == null does not actually work.  "
00583     "Fixing this will require an interface change.");
00584 
00585   TEUCHOS_TEST_FOR_EXCEPTION(
00586     ! A.isFillComplete () || ! B.isFillComplete (), std::invalid_argument,
00587     "Tpetra::MatrixMatrix::Add: Both input matrices must be fill complete "
00588     "before calling this function.");
00589 
00590 #ifdef HAVE_TPETRA_DEBUG
00591   {
00592     const bool domainMapsSame =
00593       (! transposeA && ! transposeB && ! A.getDomainMap ()->isSameAs (* (B.getDomainMap ()))) ||
00594       (! transposeA && transposeB && ! A.getDomainMap ()->isSameAs (* (B.getRangeMap ()))) ||
00595       (transposeA && ! transposeB && ! A.getRangeMap ()->isSameAs (* (B.getDomainMap ())));
00596     TEUCHOS_TEST_FOR_EXCEPTION(
00597       domainMapsSame, std::invalid_argument,
00598       "Tpetra::MatrixMatrix::Add: The domain Maps of Op(A) and Op(B) are not the same.");
00599 
00600     const bool rangeMapsSame =
00601       (! transposeA && ! transposeB && ! A.getRangeMap ()->isSameAs (* (B.getRangeMap ()))) ||
00602       (! transposeA && transposeB && ! A.getRangeMap ()->isSameAs (* (B.getDomainMap ()))) ||
00603       (transposeA && ! transposeB && ! A.getDomainMap ()->isSameAs (* (B.getRangeMap ())));
00604     TEUCHOS_TEST_FOR_EXCEPTION(
00605       rangeMapsSame, std::invalid_argument,
00606       "Tpetra::MatrixMatrix::Add: The range Maps of Op(A) and Op(B) are not the same.");
00607   }
00608 #endif // HAVE_TPETRA_DEBUG
00609 
00610   // Form the explicit transpose of A if necessary.
00611   RCP<const crs_matrix_type> Aprime;
00612   if (transposeA) {
00613     transposer_type theTransposer (rcpFromRef (A));
00614     Aprime = theTransposer.createTranspose ();
00615   } else {
00616     Aprime = rcpFromRef (A);
00617   }
00618 
00619 #ifdef HAVE_TPETRA_DEBUG
00620   TEUCHOS_TEST_FOR_EXCEPTION(Aprime.is_null (), std::logic_error,
00621     "Tpetra::MatrixMatrix::Add: Failed to compute Op(A).  "
00622     "Please report this bug to the Tpetra developers.");
00623 #endif // HAVE_TPETRA_DEBUG
00624 
00625   // Form the explicit transpose of B if necessary.
00626   RCP<const crs_matrix_type> Bprime;
00627   if (transposeB) {
00628     transposer_type theTransposer (rcpFromRef (B));
00629     Bprime = theTransposer.createTranspose ();
00630   } else {
00631     Bprime = rcpFromRef (B);
00632   }
00633 
00634 #ifdef HAVE_TPETRA_DEBUG
00635   TEUCHOS_TEST_FOR_EXCEPTION(Bprime.is_null (), std::logic_error,
00636     "Tpetra::MatrixMatrix::Add: Failed to compute Op(B).  "
00637     "Please report this bug to the Tpetra developers.");
00638 #endif // HAVE_TPETRA_DEBUG
00639 
00640   // Allocate or zero the entries of the result matrix.
00641   if (! C.is_null ()) {
00642     C->setAllToScalar (STS::zero ());
00643   } else {
00644 #if 0
00645     // If Aprime and Bprime have the same row Map, and if C is null,
00646     // we can optimize construction and fillComplete of C.  For now,
00647     // we just check pointer equality, to avoid the all-reduce in
00648     // isSameAs.  It may be worth that all-reduce to check, however.
00649     //if (Aprime->getRowMap ().getRawPtr () == Bprime->getRowMap ().getRawPtr ()) {
00650     if (Aprime->getRowMap ()->isSameAs (* (Bprime->getRowMap ())) {
00651       RCP<const map_type> rowMap = Aprime->getRowMap ();
00652 
00653       RCP<const crs_graph_type> A_graph =
00654         rcp_dynamic_cast<const crs_graph_type> (Aprime->getGraph (), true);
00655 #ifdef HAVE_TPETRA_DEBUG
00656       TEUCHOS_TEST_FOR_EXCEPTION(A_graph.is_null (), std::logic_error,
00657         "Tpetra::MatrixMatrix::Add: Graph of Op(A) is null.  "
00658         "Please report this bug to the Tpetra developers.");
00659 #endif // HAVE_TPETRA_DEBUG
00660       RCP<const crs_graph_type> B_graph =
00661         rcp_dynamic_cast<const crs_graph_type> (Bprime->getGraph (), true);
00662 #ifdef HAVE_TPETRA_DEBUG
00663       TEUCHOS_TEST_FOR_EXCEPTION(B_graph.is_null (), std::logic_error,
00664         "Tpetra::MatrixMatrix::Add: Graph of Op(B) is null.  "
00665         "Please report this bug to the Tpetra developers.");
00666 #endif // HAVE_TPETRA_DEBUG
00667       RCP<const map_type> A_colMap = A_graph->getColMap ();
00668 #ifdef HAVE_TPETRA_DEBUG
00669       TEUCHOS_TEST_FOR_EXCEPTION(A_colMap.is_null (), std::logic_error,
00670         "Tpetra::MatrixMatrix::Add: Column Map of Op(A) is null.  "
00671         "Please report this bug to the Tpetra developers.");
00672 #endif // HAVE_TPETRA_DEBUG
00673       RCP<const map_type> B_colMap = B_graph->getColMap ();
00674 #ifdef HAVE_TPETRA_DEBUG
00675       TEUCHOS_TEST_FOR_EXCEPTION(B_colMap.is_null (), std::logic_error,
00676         "Tpetra::MatrixMatrix::Add: Column Map of Op(B) is null.  "
00677         "Please report this bug to the Tpetra developers.");
00678       TEUCHOS_TEST_FOR_EXCEPTION(A_graph->getImporter ().is_null (),
00679         std::logic_error,
00680         "Tpetra::MatrixMatrix::Add: Op(A)'s Import is null.  "
00681         "Please report this bug to the Tpetra developers.");
00682       TEUCHOS_TEST_FOR_EXCEPTION(B_graph->getImporter ().is_null (),
00683         std::logic_error,
00684         "Tpetra::MatrixMatrix::Add: Op(B)'s Import is null.  "
00685         "Please report this bug to the Tpetra developers.");
00686 #endif // HAVE_TPETRA_DEBUG
00687 
00688       // Compute the (column Map and) Import of the matrix sum.
00689       RCP<const import_type> sumImport =
00690         A_graph->getImporter ()->setUnion (* (B_graph->getImporter ()));
00691       RCP<const map_type> C_colMap = sumImport->getTargetMap ();
00692 
00693       // First, count the number of entries in each row.  Then, go
00694       // back over the rows again, and compute the actual sum.
00695       // Remember that C may have a different column Map than Aprime
00696       // or Bprime, so its local indices may be different.  That's why
00697       // we have to convert from local to global indices.
00698 
00699       ArrayView<const LocalOrdinal> A_local_ind;
00700       Array<GlobalOrdinal> A_global_ind;
00701       ArrayView<const LocalOrdinal> B_local_ind;
00702       Array<GlobalOrdinal> B_global_ind;
00703 
00704       const size_t localNumRows = rowMap->getNodeNumElements ();
00705       ArrayRCP<size_t> numEntriesPerRow (localNumRows);
00706       // Compute the max number of entries in any row of A+B on this
00707       // process, so that we won't have to resize temporary arrays.
00708       size_t maxNumEntriesPerRow = 0;
00709       for (size_t localRow = 0; localRow < localNumRows; ++localRow) {
00710         // Get view of current row of A_graph, in its local indices.
00711         A_graph->getLocalRowView (as<LocalOrdinal> (localRow), A_local_ind);
00712         const size_type A_numEnt = A_local_ind.size ();
00713         if (A_numEnt > A_global_ind.size ()) {
00714           A_global_ind.resize (A_numEnt);
00715         }
00716         // Convert A's local indices to global indices.
00717         for (size_type k = 0; k < A_numEnt; ++k) {
00718           A_global_ind[k] = A_colMap->getGlobalElement (A_local_ind[k]);
00719         }
00720 
00721         // Get view of current row of B_graph, in its local indices.
00722         B_graph->getLocalRowView (as<LocalOrdinal> (localRow), B_local_ind);
00723         const size_type B_numEnt = B_local_ind.size ();
00724         if (B_numEnt > B_global_ind.size ()) {
00725           B_global_ind.resize (B_numEnt);
00726         }
00727         // Convert B's local indices to global indices.
00728         for (size_type k = 0; k < B_numEnt; ++k) {
00729           B_global_ind[k] = B_colMap->getGlobalElement (B_local_ind[k]);
00730         }
00731 
00732         // Count the number of entries in the merged row of A + B.
00733         const size_t curNumEntriesPerRow =
00734           keyMergeCount (A_global_ind.begin (), A_global_ind.end (),
00735                          B_global_ind.begin (), B_global_ind.end ());
00736         numEntriesPerRow[localRow] = curNumEntriesPerRow;
00737         maxNumEntriesPerRow = std::max (maxNumEntriesPerRow, curNumEntriesPerRow);
00738       }
00739 
00740       // Create C, using the sum column Map and number of entries per
00741       // row that we computed above.  Having the exact number of
00742       // entries per row lets us use static profile, making it valid
00743       // to call expertStaticFillComplete.
00744       C = rcp (new crs_matrix_type (rowMap, C_colMap, numEntriesPerRow, StaticProfile));
00745 
00746       // Go back through the rows and actually compute the sum.  We
00747       // don't ever have to resize A_global_ind or B_global_ind below,
00748       // since we've already done it above.
00749       ArrayView<const Scalar> A_val;
00750       ArrayView<const Scalar> B_val;
00751 
00752       Array<LocalOrdinal> AplusB_local_ind (maxNumEntriesPerRow);
00753       Array<GlobalOrdinal> AplusB_global_ind (maxNumEntriesPerRow);
00754       Array<Scalar> AplusB_val (maxNumEntriesPerRow);
00755 
00756       for (size_t localRow = 0; localRow < localNumRows; ++localRow) {
00757         // Get view of current row of A, in A's local indices.
00758         Aprime->getLocalRowView (as<LocalOrdinal> (localRow), A_local_ind, A_val);
00759         // Convert A's local indices to global indices.
00760         for (size_type k = 0; k < A_local_ind.size (); ++k) {
00761           A_global_ind[k] = A_colMap->getGlobalElement (A_local_ind[k]);
00762         }
00763 
00764         // Get view of current row of B, in B's local indices.
00765         Bprime->getLocalRowView (as<LocalOrdinal> (localRow), B_local_ind, B_val);
00766         // Convert B's local indices to global indices.
00767         for (size_type k = 0; k < B_local_ind.size (); ++k) {
00768           B_global_ind[k] = B_colMap->getGlobalElement (B_local_ind[k]);
00769         }
00770 
00771         const size_t curNumEntries = numEntriesPerRow[localRow];
00772         ArrayView<LocalOrdinal> C_local_ind = AplusB_local_ind (0, curNumEntries);
00773         ArrayView<GlobalOrdinal> C_global_ind = AplusB_global_ind (0, curNumEntries);
00774         ArrayView<Scalar> C_val = AplusB_val (0, curNumEntries);
00775 
00776         // Sum the entries in the current row of A plus B.
00777         keyValueMerge (A_global_ind.begin (), A_global_ind.end (),
00778                        A_val.begin (), A_val.end (),
00779                        B_global_ind.begin (), B_global_ind.end (),
00780                        B_val.begin (), B_val.end (),
00781                        C_global_ind.begin (), C_val.begin (),
00782                        std::plus<Scalar> ());
00783         // Convert the sum's global indices into C's local indices.
00784         for (size_type k = 0; k < as<size_type> (numEntriesPerRow[localRow]); ++k) {
00785           C_local_ind[k] = C_colMap->getLocalElement (C_global_ind[k]);
00786         }
00787         // Give the current row sum to C.
00788         C->replaceLocalValues (localRow, C_local_ind, C_val);
00789       }
00790 
00791       // Use "expert static fill complete" to bypass construction of
00792       // the Import and Export (if applicable) object(s).
00793       RCP<const map_type> domainMap = A_graph->getDomainMap ();
00794       RCP<const map_type> rangeMap = A_graph->getRangeMap ();
00795       C->expertStaticFillComplete (domainMap, rangeMap, sumImport, A_graph->getExporter ());
00796 
00797       return; // Now we're done!
00798     }
00799     else {
00800       // FIXME (mfh 08 May 2013) When I first looked at this method, I
00801       // noticed that C was being given the row Map of Aprime (the
00802       // possibly transposed version of A).  Is this what we want?
00803       C = rcp (new crs_matrix_type (Aprime->getRowMap (), null));
00804     }
00805 
00806 #else
00807     // FIXME (mfh 08 May 2013) When I first looked at this method, I
00808     // noticed that C was being given the row Map of Aprime (the
00809     // possibly transposed version of A).  Is this what we want?
00810     C = rcp (new crs_matrix_type (Aprime->getRowMap (), 0));
00811 #endif // 0
00812   }
00813 
00814 #ifdef HAVE_TPETRA_DEBUG
00815   TEUCHOS_TEST_FOR_EXCEPTION(Aprime.is_null (), std::logic_error,
00816     "Tpetra::MatrixMatrix::Add: At this point, Aprime is null.  "
00817     "Please report this bug to the Tpetra developers.");
00818   TEUCHOS_TEST_FOR_EXCEPTION(Bprime.is_null (), std::logic_error,
00819     "Tpetra::MatrixMatrix::Add: At this point, Bprime is null.  "
00820     "Please report this bug to the Tpetra developers.");
00821   TEUCHOS_TEST_FOR_EXCEPTION(C.is_null (), std::logic_error,
00822     "Tpetra::MatrixMatrix::Add: At this point, C is null.  "
00823     "Please report this bug to the Tpetra developers.");
00824 #endif // HAVE_TPETRA_DEBUG
00825 
00826   Array<RCP<const crs_matrix_type> > Mat =
00827     tuple<RCP<const crs_matrix_type> > (Aprime, Bprime);
00828   Array<Scalar> scalar = tuple<Scalar> (scalarA, scalarB);
00829 
00830   // do a loop over each matrix to add: A reordering might be more efficient
00831   for (int k = 0; k < 2; ++k) {
00832     Array<GlobalOrdinal> Indices;
00833     Array<Scalar> Values;
00834 
00835     // Loop over each locally owned row of the current matrix (either
00836     // Aprime or Bprime), and sum its entries into the corresponding
00837     // row of C.  This works regardless of whether Aprime or Bprime
00838     // has the same row Map as C, because both sumIntoGlobalValues and
00839     // insertGlobalValues allow summing resp. inserting into nonowned
00840     // rows of C.
00841 #ifdef HAVE_TPETRA_DEBUG
00842     TEUCHOS_TEST_FOR_EXCEPTION(Mat[k].is_null (), std::logic_error,
00843       "Tpetra::MatrixMatrix::Add: At this point, curRowMap is null.  "
00844       "Please report this bug to the Tpetra developers.");
00845 #endif // HAVE_TPETRA_DEBUG
00846     RCP<const map_type> curRowMap = Mat[k]->getRowMap ();
00847 #ifdef HAVE_TPETRA_DEBUG
00848     TEUCHOS_TEST_FOR_EXCEPTION(curRowMap.is_null (), std::logic_error,
00849       "Tpetra::MatrixMatrix::Add: At this point, curRowMap is null.  "
00850       "Please report this bug to the Tpetra developers.");
00851 #endif // HAVE_TPETRA_DEBUG
00852 
00853     const size_t localNumRows = Mat[k]->getNodeNumRows ();
00854     for (size_t i = 0; i < localNumRows; ++i) {
00855       const GlobalOrdinal globalRow = curRowMap->getGlobalElement (i);
00856       size_t numEntries = Mat[k]->getNumEntriesInGlobalRow (globalRow);
00857       if (numEntries > 0) {
00858         Indices.resize (numEntries);
00859         Values.resize (numEntries);
00860         Mat[k]->getGlobalRowCopy (globalRow, Indices (), Values (), numEntries);
00861 
00862         if (scalar[k] != STS::one ()) {
00863           for (size_t j = 0; j < numEntries; ++j) {
00864             Values[j] *= scalar[k];
00865           }
00866         }
00867 
00868         if (C->isFillComplete ()) {
00869           C->sumIntoGlobalValues (globalRow, Indices, Values);
00870         } else {
00871           C->insertGlobalValues (globalRow, Indices, Values);
00872         }
00873       }
00874     }
00875   }
00876 }
00877 
00878 
00879 
00880 } //End namespace MatrixMatrix
00881 
00882 namespace MMdetails{
00883 
00884 // Prints MMM-style statistics on communication done with an Import or Export object
00885 template <class TransferType>
00886 void printMultiplicationStatistics(Teuchos::RCP<TransferType > Transfer, const std::string &label){
00887   if(Transfer.is_null()) return;
00888 
00889   const Distributor & Distor                   = Transfer->getDistributor();
00890   Teuchos::RCP<const Teuchos::Comm<int> > Comm = Transfer->getSourceMap()->getComm();
00891 
00892   size_t rows_send   = Transfer->getNumExportIDs();
00893   size_t rows_recv   = Transfer->getNumRemoteIDs();
00894 
00895   size_t round1_send = Transfer->getNumExportIDs() * sizeof(size_t);
00896   size_t round1_recv = Transfer->getNumRemoteIDs() * sizeof(size_t);
00897   size_t num_send_neighbors = Distor.getNumSends();
00898   size_t num_recv_neighbors = Distor.getNumReceives();
00899   size_t round2_send, round2_recv;
00900   Distor.getLastDoStatistics(round2_send,round2_recv);
00901     
00902   int myPID    = Comm->getRank();
00903   int NumProcs = Comm->getSize();
00904 
00905   // Processor by processor statistics
00906   //    printf("[%d] %s Statistics: neigh[s/r]=%d/%d rows[s/r]=%d/%d r1bytes[s/r]=%d/%d r2bytes[s/r]=%d/%d\n",
00907   //  myPID,label.c_str(),num_send_neighbors,num_recv_neighbors,rows_send,rows_recv,round1_send,round1_recv,round2_send,round2_recv);
00908 
00909   // Global statistics
00910   size_t lstats[8] = {num_send_neighbors,num_recv_neighbors,rows_send,rows_recv,round1_send,round1_recv,round2_send,round2_recv};
00911   size_t gstats_min[8], gstats_max[8];
00912   
00913   double lstats_avg[8], gstats_avg[8];
00914   for(int i=0; i<8; i++)
00915     lstats_avg[i] = ((double)lstats[i])/NumProcs;
00916 
00917   Teuchos::reduceAll(*Comm(),Teuchos::REDUCE_MIN,8,lstats,gstats_min);
00918   Teuchos::reduceAll(*Comm(),Teuchos::REDUCE_MAX,8,lstats,gstats_max);
00919   Teuchos::reduceAll(*Comm(),Teuchos::REDUCE_SUM,8,lstats_avg,gstats_avg);
00920   
00921   if(!myPID) {
00922     printf("%s Send Statistics[min/avg/max]: neigh=%d/%4.1f/%d rows=%d/%4.1f/%d round1=%d/%4.1f/%d round2=%d/%4.1f/%d\n",label.c_str(),
00923      (int)gstats_min[0],gstats_avg[0],(int)gstats_max[0], (int)gstats_min[2],gstats_avg[2],(int)gstats_max[2],
00924      (int)gstats_min[4],gstats_avg[4],(int)gstats_max[4], (int)gstats_min[6],gstats_avg[6],(int)gstats_max[6]);
00925     printf("%s Recv Statistics[min/avg/max]: neigh=%d/%4.1f/%d rows=%d/%4.1f/%d round1=%d/%4.1f/%d round2=%d/%4.1f/%d\n",label.c_str(),
00926      (int)gstats_min[1],gstats_avg[1],(int)gstats_max[1], (int)gstats_min[3],gstats_avg[3],(int)gstats_max[3],
00927      (int)gstats_min[5],gstats_avg[5],(int)gstats_max[5], (int)gstats_min[7],gstats_avg[7],(int)gstats_max[7]);
00928   }
00929 }
00930 
00931 
00932 //kernel method for computing the local portion of C = A*B
00933 template<class Scalar,
00934          class LocalOrdinal,
00935          class GlobalOrdinal,
00936          class Node,
00937          class SpMatOps>
00938 void mult_AT_B_newmatrix(
00939   const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, SpMatOps>& A,
00940   const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, SpMatOps>& B,
00941   CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, SpMatOps>& C) {
00942 
00943   // Using &  Typedefs
00944   using Teuchos::RCP;
00945   using Teuchos::rcp;
00946   typedef CrsMatrixStruct<
00947     Scalar,
00948     LocalOrdinal,
00949     GlobalOrdinal,
00950     Node,
00951     SpMatOps> CrsMatrixStruct_t; 
00952 
00953 #ifdef HAVE_TPETRA_MMM_TIMINGS
00954   using Teuchos::TimeMonitor;
00955   Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("TpetraExt: MMM-T Transpose")));
00956 #endif
00957 
00958   /*************************************************************/
00959   /* 1) Local Transpose of A                                   */
00960   /*************************************************************/
00961   RowMatrixTransposer<Scalar, LocalOrdinal, GlobalOrdinal, Node, SpMatOps> at (Teuchos::rcpFromRef (A));
00962   RCP<CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, SpMatOps> > Atrans = at.createTransposeLocal();
00963 
00964   /*************************************************************/
00965   /* 2/3) Call mult_A_B_newmatrix w/ fillComplete              */
00966   /*************************************************************/
00967 #ifdef HAVE_TPETRA_MMM_TIMINGS
00968   MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("TpetraExt: MMM-T I&X")));
00969 #endif
00970 
00971   // Get views, asserting that no import is required to speed up computation
00972   CrsMatrixStruct_t Aview;
00973   CrsMatrixStruct_t Bview;
00974   RCP<const Import<LocalOrdinal,GlobalOrdinal, Node> > dummyImporter;
00975   MMdetails::import_and_extract_views(*Atrans, Atrans->getRowMap(), Aview, dummyImporter,true);
00976   MMdetails::import_and_extract_views(B, B.getRowMap(), Bview, dummyImporter,true);
00977 
00978 #ifdef HAVE_TPETRA_MMM_TIMINGS
00979   MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("TpetraExt: MMM-T AB-core")));
00980 #endif
00981 
00982   RCP<Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, SpMatOps> >Ctemp;
00983 
00984   // If Atrans has no Exporter, we can use C instead of having to create a temp matrix
00985   bool needs_final_export = !Atrans->getGraph()->getExporter().is_null();
00986   if(needs_final_export)
00987     Ctemp = rcp(new Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, SpMatOps>(Atrans->getRowMap(),0));
00988   else
00989     Ctemp = rcp(&C,false);// don't allow deallocation
00990 
00991   // Multiply
00992   mult_A_B_newmatrix(Aview,Bview,*Ctemp);
00993 
00994   /*************************************************************/
00995   /* 4) exportAndFillComplete matrix                           */
00996   /*************************************************************/
00997 #ifdef HAVE_TPETRA_MMM_TIMINGS
00998   MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("TpetraExt: MMM-T exportAndFillComplete")));
00999 #endif
01000   
01001   Teuchos::RCP<Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, SpMatOps> > Crcp(&C,false);
01002   if(needs_final_export) 
01003     Ctemp->exportAndFillComplete(Crcp,*Ctemp->getGraph()->getExporter(),
01004          B.getDomainMap(),A.getDomainMap());
01005   
01006 #ifdef COMPUTE_MMM_STATISTICS
01007   printMultiplicationStatistics(Ctemp->getGraph()->getExporter(),std::string("AT_B MMM"));
01008 #endif
01009 }
01010 
01011 
01012 //kernel method for computing the local portion of C = A*B
01013 template<class Scalar,
01014          class LocalOrdinal,
01015          class GlobalOrdinal,
01016          class Node,
01017          class SpMatOps>
01018 void mult_A_B(
01019   CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node, SpMatOps>& Aview,
01020   CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node, SpMatOps>& Bview,
01021   CrsWrapper<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C)
01022 {
01023   typedef Teuchos::ScalarTraits<Scalar> STS;
01024   //TEUCHOS_FUNC_TIME_MONITOR_DIFF("mult_A_B", mult_A_B);
01025   LocalOrdinal C_firstCol = Bview.colMap->getMinLocalIndex();
01026   LocalOrdinal C_lastCol = Bview.colMap->getMaxLocalIndex();
01027 
01028   LocalOrdinal C_firstCol_import = OrdinalTraits<LocalOrdinal>::zero();
01029   LocalOrdinal C_lastCol_import = OrdinalTraits<LocalOrdinal>::invalid();
01030 
01031   ArrayView<const GlobalOrdinal> bcols = Bview.colMap->getNodeElementList();
01032   ArrayView<const GlobalOrdinal> bcols_import = null;
01033   if (Bview.importColMap != null) {
01034     C_firstCol_import = Bview.importColMap->getMinLocalIndex();
01035     C_lastCol_import = Bview.importColMap->getMaxLocalIndex();
01036 
01037     bcols_import = Bview.importColMap->getNodeElementList();
01038   }
01039 
01040   size_t C_numCols = C_lastCol - C_firstCol +
01041                         OrdinalTraits<LocalOrdinal>::one();
01042   size_t C_numCols_import = C_lastCol_import - C_firstCol_import +
01043                                 OrdinalTraits<LocalOrdinal>::one();
01044 
01045   if (C_numCols_import > C_numCols) C_numCols = C_numCols_import;
01046 
01047   Array<Scalar> dwork = Array<Scalar>(C_numCols);
01048   Array<GlobalOrdinal> iwork = Array<GlobalOrdinal>(C_numCols);
01049   Array<size_t> iwork2 = Array<size_t>(C_numCols);
01050 
01051   Array<Scalar> C_row_i = dwork;
01052   Array<GlobalOrdinal> C_cols = iwork;
01053   Array<size_t> c_index = iwork2;
01054   Array<GlobalOrdinal> combined_index = Array<GlobalOrdinal>(2*C_numCols);
01055   Array<Scalar> combined_values = Array<Scalar>(2*C_numCols);
01056 
01057   size_t C_row_i_length, j, k, last_index;
01058 
01059   // Run through all the hash table lookups once and for all
01060   LocalOrdinal LO_INVALID = OrdinalTraits<LocalOrdinal>::invalid();
01061   Array<LocalOrdinal> Acol2Brow(Aview.colMap->getNodeNumElements(),LO_INVALID);
01062   Array<LocalOrdinal> Acol2Irow(Aview.colMap->getNodeNumElements(),LO_INVALID);
01063   if(Aview.colMap->isSameAs(*Bview.origMatrix->getRowMap())){
01064     // Maps are the same: Use local IDs as the hash
01065     for(LocalOrdinal i=Aview.colMap->getMinLocalIndex(); i <=
01066             Aview.colMap->getMaxLocalIndex(); i++)
01067       Acol2Brow[i]=i;
01068   }
01069   else {
01070     // Maps are not the same:  Use the map's hash
01071     for(LocalOrdinal i=Aview.colMap->getMinLocalIndex(); i <=
01072     Aview.colMap->getMaxLocalIndex(); i++) {
01073       GlobalOrdinal GID = Aview.colMap->getGlobalElement(i);
01074       LocalOrdinal BLID = Bview.origMatrix->getRowMap()->getLocalElement(GID);
01075       if(BLID != LO_INVALID) Acol2Brow[i] = BLID;
01076       else Acol2Irow[i] = Bview.importMatrix->getRowMap()->getLocalElement(GID);
01077     }    
01078   }
01079 
01080   //To form C = A*B we're going to execute this expression:
01081   //
01082   // C(i,j) = sum_k( A(i,k)*B(k,j) )
01083   //
01084   //Our goal, of course, is to navigate the data in A and B once, without
01085   //performing searches for column-indices, etc.
01086   ArrayRCP<const size_t> Arowptr_RCP, Browptr_RCP, Irowptr_RCP;
01087   ArrayRCP<const LocalOrdinal> Acolind_RCP, Bcolind_RCP, Icolind_RCP;
01088   ArrayRCP<const Scalar> Avals_RCP, Bvals_RCP, Ivals_RCP;
01089   ArrayView<const size_t> Arowptr, Browptr, Irowptr;
01090   ArrayView<const LocalOrdinal> Acolind, Bcolind, Icolind;
01091   ArrayView<const Scalar> Avals, Bvals, Ivals;
01092   Aview.origMatrix->getAllValues(Arowptr_RCP,Acolind_RCP,Avals_RCP);
01093   Bview.origMatrix->getAllValues(Browptr_RCP,Bcolind_RCP,Bvals_RCP);
01094   Arowptr = Arowptr_RCP();  Acolind = Acolind_RCP();  Avals = Avals_RCP();
01095   Browptr = Browptr_RCP();  Bcolind = Bcolind_RCP();  Bvals = Bvals_RCP();
01096   if(!Bview.importMatrix.is_null()) {
01097     Bview.importMatrix->getAllValues(Irowptr_RCP,Icolind_RCP,Ivals_RCP);
01098     Irowptr = Irowptr_RCP();  Icolind = Icolind_RCP();  Ivals = Ivals_RCP();
01099   }
01100   
01101   bool C_filled = C.isFillComplete();
01102 
01103   for (size_t i = 0; i < C_numCols; i++)
01104       c_index[i] = OrdinalTraits<size_t>::invalid();
01105 
01106   //loop over the rows of A.
01107   size_t Arows = Aview.rowMap->getNodeNumElements();
01108   for(size_t i=0; i<Arows; ++i) {
01109 
01110     //only navigate the local portion of Aview... which is, thankfully, all of A
01111     //since this routine doesn't do transpose modes
01112     GlobalOrdinal global_row = Aview.rowMap->getGlobalElement(i);
01113 
01114     //loop across the i-th row of A and for each corresponding row
01115     //in B, loop across colums and accumulate product
01116     //A(i,k)*B(k,j) into our partial sum quantities C_row_i. In other words,
01117     //as we stride across B(k,:) we're calculating updates for row i of the
01118     //result matrix C.
01119 
01120 
01121     C_row_i_length = OrdinalTraits<size_t>::zero();
01122 
01123     for(k = Arowptr[i]; k < Arowptr[i+1]; ++k) {
01124       LocalOrdinal Ak = Acol2Brow[Acolind[k]];
01125       Scalar Aval = Avals[k];
01126       if (Aval == STS::zero())
01127         continue;
01128 
01129       if (Ak==LO_INVALID) continue;
01130 
01131       for(j=Browptr[Ak]; j< Browptr[Ak+1]; ++j) {
01132           LocalOrdinal col = Bcolind[j];
01133           //assert(col >= 0 && col < C_numCols);
01134           if (c_index[col] == OrdinalTraits<size_t>::invalid()){
01135           //assert(C_row_i_length >= 0 && C_row_i_length < C_numCols);
01136             // This has to be a +=  so insertGlobalValue goes out
01137             C_row_i[C_row_i_length] = Aval*Bvals[j];
01138             C_cols[C_row_i_length] = col;
01139             c_index[col] = C_row_i_length;
01140             C_row_i_length++;
01141           }
01142           else {
01143             C_row_i[c_index[col]] += Aval*Bvals[j];
01144           }
01145         }
01146     }
01147 
01148     for (size_t ii = 0; ii < C_row_i_length; ii++) {
01149       c_index[C_cols[ii]] = OrdinalTraits<size_t>::invalid();
01150       C_cols[ii] = bcols[C_cols[ii]];
01151       combined_index[ii] = C_cols[ii];
01152       combined_values[ii] = C_row_i[ii];
01153     }
01154     last_index = C_row_i_length;
01155 
01156     //
01157     //Now put the C_row_i values into C.
01158     //
01159     // We might have to revamp this later.
01160     C_row_i_length = OrdinalTraits<size_t>::zero();
01161 
01162     for(k = Arowptr[i]; k < Arowptr[i+1]; ++k) {
01163       LocalOrdinal Ak = Acol2Brow[Acolind[k]];
01164       Scalar Aval = Avals[k];
01165       if (Aval == STS::zero())
01166         continue;
01167 
01168       if (Ak!=LO_INVALID) continue;
01169 
01170       Ak = Acol2Irow[Acolind[k]];
01171       for(j=Irowptr[Ak]; j< Irowptr[Ak+1]; ++j) {
01172           LocalOrdinal col = Icolind[j];
01173           //assert(col >= 0 && col < C_numCols);
01174           if (c_index[col] == OrdinalTraits<size_t>::invalid()){
01175           //assert(C_row_i_length >= 0 && C_row_i_length < C_numCols);
01176             // This has to be a +=  so insertGlobalValue goes out
01177             C_row_i[C_row_i_length] = Aval*Ivals[j];
01178             C_cols[C_row_i_length] = col;
01179             c_index[col] = C_row_i_length;
01180             C_row_i_length++;
01181             }
01182             else {
01183               // This has to be a +=  so insertGlobalValue goes out
01184               C_row_i[c_index[col]] += Aval*Ivals[j];
01185             }
01186         }
01187     }
01188 
01189     for (size_t ii = 0; ii < C_row_i_length; ii++) {
01190       c_index[C_cols[ii]] = OrdinalTraits<size_t>::invalid();
01191       C_cols[ii] = bcols_import[C_cols[ii]];
01192       combined_index[last_index] = C_cols[ii];
01193       combined_values[last_index] = C_row_i[ii];
01194       last_index++;
01195     }
01196 
01197       //
01198       //Now put the C_row_i values into C.
01199       //
01200       // We might have to revamp this later.
01201     C_filled ?
01202       C.sumIntoGlobalValues(
01203           global_row,
01204           combined_index.view(OrdinalTraits<size_t>::zero(), last_index),
01205           combined_values.view(OrdinalTraits<size_t>::zero(), last_index))
01206       :
01207       C.insertGlobalValues(
01208           global_row,
01209           combined_index.view(OrdinalTraits<size_t>::zero(), last_index),
01210           combined_values.view(OrdinalTraits<size_t>::zero(), last_index));
01211 
01212   }
01213 
01214 }
01215 
01216 template<class Scalar,
01217          class LocalOrdinal,
01218          class GlobalOrdinal,
01219          class Node,
01220          class SpMatOps>
01221 void setMaxNumEntriesPerRow(
01222   CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node, SpMatOps>& Mview)
01223 {
01224   typedef typename Array<ArrayView<const LocalOrdinal> >::size_type  local_length_size;
01225   Mview.maxNumRowEntries = OrdinalTraits<local_length_size>::zero();
01226   if(Mview.indices.size() > OrdinalTraits<local_length_size>::zero() ){
01227     Mview.maxNumRowEntries = Mview.indices[0].size();
01228     for(local_length_size i = 1; i<Mview.indices.size(); ++i){
01229       if(Mview.indices[i].size() > Mview.maxNumRowEntries){
01230         Mview.maxNumRowEntries = Mview.indices[i].size();
01231       }
01232     }
01233   }
01234 }
01235 
01236 
01237 template<class CrsMatrixType>
01238 size_t C_estimate_nnz(CrsMatrixType & A, CrsMatrixType &B){
01239   // Follows the NZ estimate in ML's ml_matmatmult.c
01240   size_t Aest = 100, Best=100;
01241   if(A.getNodeNumEntries() > 0)
01242     Aest = (A.getNodeNumRows()>0)? A.getNodeNumEntries()/A.getNodeNumEntries():100;
01243   if(B.getNodeNumEntries() > 0)
01244     Best=(B.getNodeNumRows()>0)? B.getNodeNumEntries()/B.getNodeNumEntries():100;
01245 
01246   size_t nnzperrow=(size_t)(sqrt((double)Aest) + sqrt((double)Best) - 1);
01247   nnzperrow*=nnzperrow;
01248 
01249   return (size_t)(A.getNodeNumRows()*nnzperrow*0.75 + 100);
01250 }
01251 
01252 
01253 
01254 //kernel method for computing the local portion of C = A*B
01255 template<class Scalar,
01256          class LocalOrdinal,
01257          class GlobalOrdinal,
01258          class Node,
01259          class SpMatOps>
01260 void mult_A_B_newmatrix(
01261   CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node, SpMatOps>& Aview,
01262   CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node, SpMatOps>& Bview,
01263   CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C)
01264 {
01265   using Teuchos::RCP;
01266   using Teuchos::rcp;
01267   using Teuchos::ArrayView;
01268   typedef Import<LocalOrdinal, GlobalOrdinal, Node> import_type;
01269   typedef Map<LocalOrdinal, GlobalOrdinal, Node> map_type;
01270 
01271 #ifdef HAVE_TPETRA_MMM_TIMINGS
01272   using Teuchos::TimeMonitor;
01273   RCP<TimeMonitor> MM =
01274     rcp (new TimeMonitor (* (TimeMonitor::getNewTimer ("TpetraExt: MMM M5 Cmap"))));
01275 #endif
01276   size_t ST_INVALID = Teuchos::OrdinalTraits<LocalOrdinal>::invalid();
01277   LocalOrdinal LO_INVALID = Teuchos::OrdinalTraits<LocalOrdinal>::invalid();
01278 
01279 
01280   // Build the final importer / column map, hash table lookups for C
01281   RCP<const import_type> Cimport;
01282   RCP<const map_type> Ccolmap;
01283   RCP<const import_type> Bimport = Bview.origMatrix->getGraph()->getImporter();
01284   RCP<const import_type> Iimport = Bview.importMatrix.is_null() ? Teuchos::null :  Bview.importMatrix->getGraph()->getImporter();
01285   Array<LocalOrdinal> Bcol2Ccol(Bview.colMap->getNodeNumElements()), Icol2Ccol;
01286 
01287   if(Bview.importMatrix.is_null()) {
01288     Cimport = Bimport;
01289     Ccolmap = Bview.colMap;
01290     // Bcol2Ccol is trivial
01291     for(size_t i=0; i<Bview.colMap->getNodeNumElements(); i++) {
01292       Bcol2Ccol[i] = Teuchos::as<LocalOrdinal>(i);
01293     }
01294   }
01295   else {
01296     // Choose the right variant of setUnion
01297     if(!Bimport.is_null() && !Iimport.is_null()){
01298       Cimport = Bimport->setUnion(*Iimport);
01299       Ccolmap = Cimport->getTargetMap();
01300     }
01301     else if(!Bimport.is_null() && Iimport.is_null()) {
01302       Cimport = Bimport->setUnion();
01303     }
01304     else if(Bimport.is_null() && !Iimport.is_null()) {
01305       Cimport = Iimport->setUnion();
01306     }
01307     else
01308       throw std::runtime_error("TpetraExt::MMM status of matrix importers is nonsensical");
01309 
01310     Ccolmap = Cimport->getTargetMap();
01311 
01312     if(!Cimport->getSourceMap()->isSameAs(*Bview.origMatrix->getDomainMap()))
01313       throw std::runtime_error("Tpetra::MMM: Import setUnion messed with the DomainMap in an unfortunate way");
01314 
01315     // NOTE: This is not efficient and should be folded into setUnion
01316     Icol2Ccol.resize(Bview.importMatrix->getColMap()->getNodeNumElements());
01317     ArrayView<const GlobalOrdinal> Bgid = Bview.origMatrix->getColMap()->getNodeElementList();
01318     ArrayView<const GlobalOrdinal> Igid = Bview.importMatrix->getColMap()->getNodeElementList();
01319 
01320     for(size_t i=0; i<Bview.origMatrix->getColMap()->getNodeNumElements(); i++)
01321       Bcol2Ccol[i] = Ccolmap->getLocalElement(Bgid[i]);
01322     for(size_t i=0; i<Bview.importMatrix->getColMap()->getNodeNumElements(); i++)
01323       Icol2Ccol[i] = Ccolmap->getLocalElement(Igid[i]);
01324   }
01325 
01326 #ifdef HAVE_TPETRA_MMM_TIMINGS
01327   MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("TpetraExt: MMM Newmatrix SerialCore")));
01328 #endif
01329 
01330   // Sizes
01331   size_t m=Aview.origMatrix->getNodeNumRows();
01332   size_t n=Ccolmap->getNodeNumElements();
01333 
01334   // Get Data Pointers
01335   ArrayRCP<const size_t> Arowptr_RCP, Browptr_RCP, Irowptr_RCP;
01336   ArrayRCP<size_t> Crowptr_RCP;
01337   ArrayRCP<const LocalOrdinal> Acolind_RCP, Bcolind_RCP, Icolind_RCP;
01338   ArrayRCP<LocalOrdinal> Ccolind_RCP;
01339   ArrayRCP<const Scalar> Avals_RCP, Bvals_RCP, Ivals_RCP;
01340   ArrayRCP<Scalar> Cvals_RCP;
01341 
01342   Aview.origMatrix->getAllValues(Arowptr_RCP,Acolind_RCP,Avals_RCP);
01343   Bview.origMatrix->getAllValues(Browptr_RCP,Bcolind_RCP,Bvals_RCP);
01344   if(!Bview.importMatrix.is_null()) Bview.importMatrix->getAllValues(Irowptr_RCP,Icolind_RCP,Ivals_RCP);
01345 
01346 
01347   // For efficiency
01348   ArrayView<const size_t> Arowptr, Browptr, Irowptr;
01349   ArrayView<const LocalOrdinal> Acolind, Bcolind, Icolind;
01350   ArrayView<const Scalar> Avals, Bvals, Ivals;
01351   ArrayView<size_t> Crowptr;
01352   ArrayView<LocalOrdinal> Ccolind;
01353   ArrayView<Scalar> Cvals;
01354   Arowptr = Arowptr_RCP();  Acolind = Acolind_RCP();  Avals = Avals_RCP();
01355   Browptr = Browptr_RCP();  Bcolind = Bcolind_RCP();  Bvals = Bvals_RCP();
01356   if(!Bview.importMatrix.is_null()) {
01357     Irowptr = Irowptr_RCP();  Icolind = Icolind_RCP();  Ivals = Ivals_RCP();
01358   }
01359 
01360   // The status array will contain the index into colind where this entry was last deposited.
01361   // c_status[i] < CSR_ip - not in the row yet.
01362   // c_status[i] >= CSR_ip, this is the entry where you can find the data
01363   // We start with this filled with INVALID's indicating that there are no entries yet.
01364   // Sadly, this complicates the code due to the fact that size_t's are unsigned.
01365   size_t INVALID = Teuchos::OrdinalTraits<size_t>::invalid();
01366   Array<size_t> c_status(n, ST_INVALID);
01367 
01368   // Classic csr assembly (low memory edition)
01369   size_t CSR_alloc=std::max(C_estimate_nnz(*Aview.origMatrix,*Bview.origMatrix),n);
01370   size_t CSR_ip=0,OLD_ip=0;
01371   Crowptr_RCP.resize(m+1);       Crowptr = Crowptr_RCP();
01372   Ccolind_RCP.resize(CSR_alloc); Ccolind = Ccolind_RCP();
01373   Cvals_RCP.resize(CSR_alloc);   Cvals   = Cvals_RCP();
01374 
01375   // Run through all the hash table lookups once and for all
01376   Array<LocalOrdinal> targetMapToOrigRow(Aview.colMap->getNodeNumElements(),LO_INVALID);
01377   Array<LocalOrdinal> targetMapToImportRow(Aview.colMap->getNodeNumElements(),LO_INVALID);
01378 
01379   if(Aview.colMap->isSameAs(*Bview.rowMap)){
01380     // Maps are the same: Use local IDs as the hash
01381     for(LocalOrdinal i=Aview.colMap->getMinLocalIndex(); i <= Aview.colMap->getMaxLocalIndex(); i++) {
01382       LocalOrdinal B_LID = Bview.origMatrix->getRowMap()->getLocalElement(Aview.colMap->getGlobalElement(i));
01383       if(B_LID != LO_INVALID) targetMapToOrigRow[i] = B_LID;
01384       else {
01385         LocalOrdinal I_LID = Bview.importMatrix->getRowMap()->getLocalElement(Aview.colMap->getGlobalElement(i));
01386         targetMapToImportRow[i] = I_LID;
01387       }
01388     }
01389   }
01390   else {
01391     // Maps are not the same:  Use the map's hash
01392     for(LocalOrdinal i=Aview.colMap->getMinLocalIndex(); i <= Aview.colMap->getMaxLocalIndex(); i++) {
01393       LocalOrdinal B_LID = Bview.origMatrix->getRowMap()->getLocalElement(Aview.colMap->getGlobalElement(i));
01394       if(B_LID != LO_INVALID) targetMapToOrigRow[i] = B_LID;
01395       else {
01396         LocalOrdinal I_LID = Bview.importMatrix->getRowMap()->getLocalElement(Aview.colMap->getGlobalElement(i));
01397         targetMapToImportRow[i] = I_LID;
01398       }
01399     }
01400   }
01401 
01402   const Scalar SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
01403 
01404   // For each row of A/C
01405   for(size_t i=0; i<m; i++){
01406     Crowptr[i]=CSR_ip;
01407 
01408     for(size_t k=Arowptr[i]; k<Arowptr[i+1]; k++){
01409       LocalOrdinal Ak      = Acolind[k];
01410       Scalar       Aval    = Avals[k];
01411       if(Aval==SC_ZERO) continue;
01412 
01413       if(targetMapToOrigRow[Ak] != LO_INVALID){
01414         // Local matrix
01415         size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Ak]);
01416 
01417         for(size_t j=Browptr[Bk]; j<Browptr[Bk+1]; ++j) {
01418           LocalOrdinal Cj=Bcol2Ccol[Bcolind[j]];
01419 
01420           if(c_status[Cj]==INVALID || c_status[Cj]<OLD_ip){
01421             // New entry
01422             c_status[Cj]      = CSR_ip;
01423             Ccolind[CSR_ip]= Cj;
01424             Cvals[CSR_ip]  = Aval*Bvals[j];
01425             CSR_ip++;
01426           }
01427           else
01428             Cvals[c_status[Cj]]+=Aval*Bvals[j];
01429         }
01430       }
01431       else{
01432         // Remote matrix
01433         size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Ak]);
01434         for(size_t j=Irowptr[Ik]; j<Irowptr[Ik+1]; ++j) {
01435           LocalOrdinal Cj=Icol2Ccol[Icolind[j]];
01436 
01437           if(c_status[Cj]==INVALID || c_status[Cj]<OLD_ip){
01438             // New entry
01439             c_status[Cj]=CSR_ip;
01440             Ccolind[CSR_ip]=Cj;
01441             Cvals[CSR_ip]=Aval*Ivals[j];
01442             CSR_ip++;
01443           }
01444           else
01445             Cvals[c_status[Cj]]+=Aval*Ivals[j];
01446         }
01447       }
01448     }
01449 
01450     // Resize for next pass if needed
01451     if(CSR_ip + n > CSR_alloc){
01452       CSR_alloc*=2;
01453       Ccolind_RCP.resize(CSR_alloc); Ccolind = Ccolind_RCP();
01454       Cvals_RCP.resize(CSR_alloc);   Cvals   = Cvals_RCP();
01455     }
01456     OLD_ip=CSR_ip;
01457   }
01458 
01459   Crowptr[m]=CSR_ip;
01460 
01461   // Downward resize
01462   Cvals_RCP.resize(CSR_ip);
01463   Ccolind_RCP.resize(CSR_ip);
01464 
01465 
01466 #ifdef HAVE_TPETRA_MMM_TIMINGS
01467   MM = rcp (new TimeMonitor (* (TimeMonitor::getNewTimer("TpetraExt: MMM Newmatrix Final Sort"))));
01468 #endif
01469 
01470   // Replace the column map
01471   C.replaceColMap(Ccolmap);
01472 
01473   // Final sort & set of CRS arrays
01474   Import_Util::sortCrsEntries(Crowptr_RCP(),Ccolind_RCP(),Cvals_RCP());
01475   C.setAllValues(Crowptr_RCP,Ccolind_RCP,Cvals_RCP);
01476 
01477 #ifdef HAVE_TPETRA_MMM_TIMINGS
01478   MM = rcp (new TimeMonitor (* (TimeMonitor::getNewTimer("TpetraExt: MMM Newmatrix ESFC"))));
01479 #endif
01480 
01481   // Final FillComplete
01482   C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(),Aview.origMatrix->getRangeMap(),Cimport);
01483 }
01484 
01485 
01486 //kernel method for computing the local portion of C = (I-omega D^{-1} A)*B
01487 template<class Scalar,
01488          class LocalOrdinal,
01489          class GlobalOrdinal,
01490          class Node,
01491          class SpMatOps>
01492 void jacobi_A_B_newmatrix(
01493   Scalar omega,
01494   const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> & Dinv,
01495   CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node, SpMatOps>& Aview,
01496   CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node, SpMatOps>& Bview,
01497   CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C)
01498 {
01499   using Teuchos::RCP;
01500   using Teuchos::rcp;
01501   using Teuchos::ArrayView;
01502   typedef Import<LocalOrdinal, GlobalOrdinal, Node> import_type;
01503   typedef Map<LocalOrdinal, GlobalOrdinal, Node> map_type;
01504 
01505 #ifdef HAVE_TPETRA_MMM_TIMINGS
01506   using Teuchos::TimeMonitor;
01507   RCP<TimeMonitor> MM =
01508     rcp (new TimeMonitor (* (TimeMonitor::getNewTimer ("TpetraExt: Jacobi M5 Cmap"))));
01509 #endif
01510   size_t ST_INVALID = Teuchos::OrdinalTraits<LocalOrdinal>::invalid();
01511   LocalOrdinal LO_INVALID = Teuchos::OrdinalTraits<LocalOrdinal>::invalid();
01512 
01513 
01514   // Build the final importer / column map, hash table lookups for C
01515   RCP<const import_type> Cimport;
01516   RCP<const map_type> Ccolmap;
01517   RCP<const import_type> Bimport = Bview.origMatrix->getGraph()->getImporter();
01518   RCP<const import_type> Iimport = Bview.importMatrix.is_null() ? Teuchos::null :  Bview.importMatrix->getGraph()->getImporter();
01519   Array<LocalOrdinal> Bcol2Ccol(Bview.colMap->getNodeNumElements()), Icol2Ccol;
01520 
01521   if(Bview.importMatrix.is_null()) {
01522     Cimport = Bimport;
01523     Ccolmap = Bview.colMap;
01524     // Bcol2Ccol is trivial
01525     for(size_t i=0; i<Bview.colMap->getNodeNumElements(); i++) {
01526       Bcol2Ccol[i] = Teuchos::as<LocalOrdinal>(i);
01527     }
01528   }
01529   else {
01530     // Choose the right variant of setUnion
01531     if(!Bimport.is_null() && !Iimport.is_null()){
01532       Cimport = Bimport->setUnion(*Iimport);
01533       Ccolmap = Cimport->getTargetMap();
01534     }
01535     else if(!Bimport.is_null() && Iimport.is_null()) {
01536       Cimport = Bimport->setUnion();
01537     }
01538     else if(Bimport.is_null() && !Iimport.is_null()) {
01539       Cimport = Iimport->setUnion();
01540     }
01541     else
01542       throw std::runtime_error("TpetraExt::Jacobi status of matrix importers is nonsensical");
01543 
01544     Ccolmap = Cimport->getTargetMap();
01545 
01546     if(!Cimport->getSourceMap()->isSameAs(*Bview.origMatrix->getDomainMap()))
01547       throw std::runtime_error("Tpetra:Jacobi Import setUnion messed with the DomainMap in an unfortunate way");
01548 
01549     // NOTE: This is not efficient and should be folded into setUnion
01550     Icol2Ccol.resize(Bview.importMatrix->getColMap()->getNodeNumElements());
01551     ArrayView<const GlobalOrdinal> Bgid = Bview.origMatrix->getColMap()->getNodeElementList();
01552     ArrayView<const GlobalOrdinal> Igid = Bview.importMatrix->getColMap()->getNodeElementList();
01553 
01554     for(size_t i=0; i<Bview.origMatrix->getColMap()->getNodeNumElements(); i++)
01555       Bcol2Ccol[i] = Ccolmap->getLocalElement(Bgid[i]);
01556     for(size_t i=0; i<Bview.importMatrix->getColMap()->getNodeNumElements(); i++)
01557       Icol2Ccol[i] = Ccolmap->getLocalElement(Igid[i]);
01558   }
01559 
01560 #ifdef HAVE_TPETRA_MMM_TIMINGS
01561   MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("TpetraExt: Jacobi Newmatrix SerialCore")));
01562 #endif
01563 
01564   // Sizes
01565   size_t m=Aview.origMatrix->getNodeNumRows();
01566   size_t n=Ccolmap->getNodeNumElements();
01567 
01568   // Get Data Pointers
01569   ArrayRCP<const size_t> Arowptr_RCP, Browptr_RCP, Irowptr_RCP;
01570   ArrayRCP<size_t> Crowptr_RCP;
01571   ArrayRCP<const LocalOrdinal> Acolind_RCP, Bcolind_RCP, Icolind_RCP;
01572   ArrayRCP<LocalOrdinal> Ccolind_RCP;
01573   ArrayRCP<const Scalar> Avals_RCP, Bvals_RCP, Ivals_RCP;
01574   ArrayRCP<Scalar> Cvals_RCP;
01575   ArrayRCP<const Scalar> Dvals_RCP;
01576 
01577   Aview.origMatrix->getAllValues(Arowptr_RCP,Acolind_RCP,Avals_RCP);
01578   Bview.origMatrix->getAllValues(Browptr_RCP,Bcolind_RCP,Bvals_RCP);
01579   if(!Bview.importMatrix.is_null()) Bview.importMatrix->getAllValues(Irowptr_RCP,Icolind_RCP,Ivals_RCP);
01580   Dvals_RCP = Dinv.getData();
01581 
01582   // For efficiency
01583   ArrayView<const size_t> Arowptr, Browptr, Irowptr;
01584   ArrayView<const LocalOrdinal> Acolind, Bcolind, Icolind;
01585   ArrayView<const Scalar> Avals, Bvals, Ivals;
01586   ArrayView<size_t> Crowptr;
01587   ArrayView<LocalOrdinal> Ccolind;
01588   ArrayView<Scalar> Cvals;
01589   ArrayView<const Scalar> Dvals;
01590   Arowptr = Arowptr_RCP();  Acolind = Acolind_RCP();  Avals = Avals_RCP();
01591   Browptr = Browptr_RCP();  Bcolind = Bcolind_RCP();  Bvals = Bvals_RCP();
01592   if(!Bview.importMatrix.is_null()) {
01593     Irowptr = Irowptr_RCP();  Icolind = Icolind_RCP();  Ivals = Ivals_RCP();
01594   }
01595   Dvals = Dvals_RCP();
01596 
01597   // The status array will contain the index into colind where this entry was last deposited.
01598   // c_status[i] < CSR_ip - not in the row yet.
01599   // c_status[i] >= CSR_ip, this is the entry where you can find the data
01600   // We start with this filled with INVALID's indicating that there are no entries yet.
01601   // Sadly, this complicates the code due to the fact that size_t's are unsigned.
01602   size_t INVALID = Teuchos::OrdinalTraits<size_t>::invalid();
01603   Array<size_t> c_status(n, ST_INVALID);
01604 
01605   // Classic csr assembly (low memory edition)
01606   size_t CSR_alloc=std::max(C_estimate_nnz(*Aview.origMatrix,*Bview.origMatrix),n);
01607   size_t CSR_ip=0,OLD_ip=0;
01608   Crowptr_RCP.resize(m+1);       Crowptr = Crowptr_RCP();
01609   Ccolind_RCP.resize(CSR_alloc); Ccolind = Ccolind_RCP();
01610   Cvals_RCP.resize(CSR_alloc);   Cvals   = Cvals_RCP();
01611 
01612   // Run through all the hash table lookups once and for all
01613   Array<LocalOrdinal> targetMapToOrigRow(Aview.colMap->getNodeNumElements(),LO_INVALID);
01614   Array<LocalOrdinal> targetMapToImportRow(Aview.colMap->getNodeNumElements(),LO_INVALID);
01615 
01616   if(Aview.colMap->isSameAs(*Bview.rowMap)){
01617     // Maps are the same: Use local IDs as the hash
01618     for(LocalOrdinal i=Aview.colMap->getMinLocalIndex(); i <= Aview.colMap->getMaxLocalIndex(); i++) {
01619       LocalOrdinal B_LID = Bview.origMatrix->getRowMap()->getLocalElement(Aview.colMap->getGlobalElement(i));
01620       if(B_LID != LO_INVALID) targetMapToOrigRow[i] = B_LID;
01621       else {
01622         LocalOrdinal I_LID = Bview.importMatrix->getRowMap()->getLocalElement(Aview.colMap->getGlobalElement(i));
01623         targetMapToImportRow[i] = I_LID;
01624       }
01625     }
01626   }
01627   else {
01628     // Maps are not the same:  Use the map's hash
01629     for(LocalOrdinal i=Aview.colMap->getMinLocalIndex(); i <= Aview.colMap->getMaxLocalIndex(); i++) {
01630       LocalOrdinal B_LID = Bview.origMatrix->getRowMap()->getLocalElement(Aview.colMap->getGlobalElement(i));
01631       if(B_LID != LO_INVALID) targetMapToOrigRow[i] = B_LID;
01632       else {
01633         LocalOrdinal I_LID = Bview.importMatrix->getRowMap()->getLocalElement(Aview.colMap->getGlobalElement(i));
01634         targetMapToImportRow[i] = I_LID;
01635       }
01636     }
01637   }
01638 
01639   const Scalar SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
01640 
01641   // For each row of A/C
01642   for(size_t i=0; i<m; i++){
01643     Crowptr[i]=CSR_ip;
01644     Scalar Dval = Dvals[i];
01645 
01646     // Entries of B
01647     for(size_t k=Browptr[i]; k<Browptr[i+1]; k++){
01648       Scalar Bval = Bvals[k];
01649       if(Bval==SC_ZERO) continue;
01650       LocalOrdinal Ck=Bcol2Ccol[Bcolind[k]];
01651 
01652       // Assume no repeated entries in B
01653       c_status[Ck]    = CSR_ip;
01654       Ccolind[CSR_ip] = Ck;
01655       Cvals[CSR_ip]   = Bvals[k];
01656       CSR_ip++;
01657     }
01658 
01659 
01660     // Entries of -omega * Dinv * A * B
01661     for(size_t k=Arowptr[i]; k<Arowptr[i+1]; k++){
01662       LocalOrdinal Ak      = Acolind[k];
01663       Scalar       Aval    = Avals[k];
01664       if(Aval==SC_ZERO) continue;
01665 
01666       if(targetMapToOrigRow[Ak] != LO_INVALID){
01667         // Local matrix
01668         size_t Bk = Teuchos::as<size_t>(targetMapToOrigRow[Ak]);
01669 
01670         for(size_t j=Browptr[Bk]; j<Browptr[Bk+1]; ++j) {
01671           LocalOrdinal Cj=Bcol2Ccol[Bcolind[j]];
01672 
01673           if(c_status[Cj]==INVALID || c_status[Cj]<OLD_ip){
01674             // New entry
01675             c_status[Cj]    = CSR_ip;
01676             Ccolind[CSR_ip] = Cj;
01677             Cvals[CSR_ip]   = - omega * Dval* Aval * Bvals[j];
01678             CSR_ip++;
01679           }
01680           else
01681             Cvals[c_status[Cj]] -= omega * Dval* Aval * Bvals[j];
01682         }
01683       }
01684       else{
01685         // Remote matrix
01686         size_t Ik = Teuchos::as<size_t>(targetMapToImportRow[Ak]);
01687         for(size_t j=Irowptr[Ik]; j<Irowptr[Ik+1]; ++j) {
01688           LocalOrdinal Cj=Icol2Ccol[Icolind[j]];
01689 
01690           if(c_status[Cj]==INVALID || c_status[Cj]<OLD_ip){
01691             // New entry
01692             c_status[Cj]    = CSR_ip;
01693             Ccolind[CSR_ip] = Cj;
01694             Cvals[CSR_ip]   = - omega * Dval* Aval * Ivals[j];
01695             CSR_ip++;
01696           }
01697           else
01698             Cvals[c_status[Cj]] -= omega * Dval* Aval * Ivals[j];
01699         }
01700       }
01701     }
01702 
01703     // Resize for next pass if needed
01704     if(CSR_ip + n > CSR_alloc){
01705       CSR_alloc*=2;
01706       Ccolind_RCP.resize(CSR_alloc); Ccolind = Ccolind_RCP();
01707       Cvals_RCP.resize(CSR_alloc);   Cvals   = Cvals_RCP();
01708     }
01709     OLD_ip=CSR_ip;
01710   }
01711 
01712   Crowptr[m]=CSR_ip;
01713 
01714   // Downward resize
01715   Cvals_RCP.resize(CSR_ip);
01716   Ccolind_RCP.resize(CSR_ip);
01717 
01718 
01719 #ifdef HAVE_TPETRA_MMM_TIMINGS
01720   MM = rcp (new TimeMonitor (* (TimeMonitor::getNewTimer("TpetraExt: Jacobi Newmatrix Final Sort"))));
01721 #endif
01722 
01723   // Replace the column map
01724   C.replaceColMap(Ccolmap);
01725 
01726   // Final sort & set of CRS arrays
01727   Import_Util::sortCrsEntries(Crowptr_RCP(),Ccolind_RCP(),Cvals_RCP());
01728   C.setAllValues(Crowptr_RCP,Ccolind_RCP,Cvals_RCP);
01729 
01730 #ifdef HAVE_TPETRA_MMM_TIMINGS
01731   MM = rcp (new TimeMonitor (* (TimeMonitor::getNewTimer("TpetraExt: Jacobi Newmatrix ESFC"))));
01732 #endif
01733 
01734   // Final FillComplete
01735   C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(),Aview.origMatrix->getRangeMap(),Cimport);
01736 }
01737 
01738 
01739 template<class Scalar,
01740          class LocalOrdinal,
01741          class GlobalOrdinal,
01742          class Node,
01743          class SpMatOps>
01744 void import_and_extract_views(
01745   const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, SpMatOps>& M,
01746   RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > targetMap,
01747   CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node, SpMatOps>& Mview,
01748   RCP<const Import<LocalOrdinal, GlobalOrdinal, Node> > prototypeImporter,
01749   bool userAssertsThereAreNoRemotes)
01750 {
01751 #ifdef HAVE_TPETRA_MMM_TIMINGS
01752   using Teuchos::TimeMonitor;
01753   Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("TpetraExt: MMM I&X Alloc")));
01754 #endif
01755 
01756   //Convience typedef
01757   typedef Map<LocalOrdinal, GlobalOrdinal, Node> Map_t;
01758   typedef CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, SpMatOps> CrsMatrix_t;
01759   // The goal of this method is to populate the 'Mview' struct with views of the
01760   // rows of M, including all rows that correspond to elements in 'targetMap'.
01761   //
01762   // If targetMap includes local elements that correspond to remotely-owned rows
01763   // of M, then those remotely-owned rows will be imported into
01764   // 'Mview.importMatrix', and views of them will be included in 'Mview'.
01765   Mview.deleteContents();
01766 
01767   RCP<const Map_t> Mrowmap = M.getRowMap();
01768   RCP<const Map_t> MremoteRowMap;
01769   const int numProcs = Mrowmap->getComm()->getSize();
01770 
01771   ArrayView<const GlobalOrdinal> Mrows = targetMap->getNodeElementList();
01772 
01773   size_t numRemote = 0;
01774   size_t numRows   = targetMap->getNodeNumElements();
01775   Mview.origMatrix = Teuchos::rcp(&M,false);
01776   Mview.origRowMap = M.getRowMap();
01777   Mview.rowMap = targetMap;
01778   Mview.colMap = M.getColMap();
01779   Mview.domainMap = M.getDomainMap();
01780   Mview.importColMap = null;
01781 
01782   // Short circuit if the user swears there are no remotes
01783   if(userAssertsThereAreNoRemotes) return;
01784 
01785 #ifdef HAVE_TPETRA_MMM_TIMINGS
01786   MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("TpetraExt: MMM I&X RemoteMap")));
01787 #endif
01788 
01789   // mark each row in targetMap as local or remote, and go ahead and get a view for the local rows
01790   int mode = 0;
01791   if(!prototypeImporter.is_null() && prototypeImporter->getSourceMap()->isSameAs(*Mrowmap) && prototypeImporter->getTargetMap()->isSameAs(*targetMap)) {
01792     // We have a valid prototype importer --- ask it for the remotes
01793     numRemote = prototypeImporter->getNumRemoteIDs();
01794     Array<GlobalOrdinal> MremoteRows(numRemote);
01795     ArrayView<const LocalOrdinal> RemoteLIDs = prototypeImporter->getRemoteLIDs();
01796     for(size_t i=0; i<numRemote; i++) {
01797       MremoteRows[i] = targetMap->getGlobalElement(RemoteLIDs[i]);
01798     }
01799 
01800     MremoteRowMap=rcp(new Map_t(OrdinalTraits<global_size_t>::invalid(), MremoteRows(), Mrowmap->getIndexBase(), Mrowmap->getComm(), Mrowmap->getNode()));
01801     mode=1;
01802   }
01803   else if(prototypeImporter.is_null()) {
01804     // No prototype importer --- count the remotes the hard way
01805     Array<GlobalOrdinal> MremoteRows(numRows);
01806     for(size_t i=0; i < numRows; ++i) {
01807       const LocalOrdinal mlid = Mrowmap->getLocalElement(Mrows[i]);
01808       
01809       if (mlid == OrdinalTraits<LocalOrdinal>::invalid()) {
01810   MremoteRows[numRemote]=Mrows[i];
01811   ++numRemote;
01812       }
01813     }
01814     MremoteRows.resize(numRemote);    
01815     MremoteRowMap=rcp(new Map_t(OrdinalTraits<global_size_t>::invalid(), MremoteRows(), Mrowmap->getIndexBase(), Mrowmap->getComm(), Mrowmap->getNode()));
01816     mode=2;
01817   }
01818   else {
01819     // prototypeImporter is bad.  But if we're in serial that's OK.
01820     mode=3;
01821   }
01822 
01823   if (numProcs < 2) {
01824     TEUCHOS_TEST_FOR_EXCEPTION(numRemote > 0, std::runtime_error,
01825       "MatrixMatrix::import_and_extract_views ERROR, numProcs < 2 but attempting to import remote matrix rows." <<std::endl);
01826     //If only one processor we don't need to import any remote rows, so return.
01827     return;
01828   }
01829 
01830   //
01831   // Now we will import the needed remote rows of M, if the global maximum
01832   // value of numRemote is greater than 0.
01833   //
01834 #ifdef HAVE_TPETRA_MMM_TIMINGS
01835   MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("TpetraExt: MMM I&X Collective-0")));
01836 #endif
01837 
01838   global_size_t globalMaxNumRemote = 0;
01839   Teuchos::reduceAll(*(Mrowmap->getComm()) , Teuchos::REDUCE_MAX, (global_size_t)numRemote, Teuchos::outArg(globalMaxNumRemote) );
01840 
01841 
01842   if (globalMaxNumRemote > 0) {
01843 #ifdef HAVE_TPETRA_MMM_TIMINGS
01844     MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("TpetraExt: MMM I&X Import-2")));
01845 #endif
01846     // Create an importer with target-map MremoteRowMap and source-map Mrowmap.
01847     RCP<const Import<LocalOrdinal, GlobalOrdinal, Node> > importer;
01848 
01849     if(mode==1) 
01850       importer = prototypeImporter->createRemoteOnlyImport(MremoteRowMap);
01851     else if(mode==2)
01852       importer=rcp(new Import<LocalOrdinal, GlobalOrdinal, Node>(Mrowmap, MremoteRowMap));
01853     else
01854       throw std::runtime_error("prototypeImporter->SourceMap() does not match M.getRowMap()!");
01855     
01856 #ifdef HAVE_TPETRA_MMM_TIMINGS
01857     MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("TpetraExt: MMM I&X Import-3")));
01858 #endif
01859 
01860     // Now create a new matrix into which we can import the remote rows of M that we need.
01861     Mview.importMatrix = Tpetra::importAndFillCompleteCrsMatrix<CrsMatrix_t>(Teuchos::rcp(&M,false),*importer,M.getDomainMap(),M.getRangeMap(),Teuchos::null);
01862 
01863 #ifdef COMPUTE_MMM_STATISTICS
01864     printMultiplicationStatistics(importer,std::string("I&X MMM"));
01865 #endif
01866 
01867 
01868 #ifdef HAVE_TPETRA_MMM_TIMINGS
01869     MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("TpetraExt: MMM I&X Import-4")));
01870 #endif
01871 
01872     // Save the column map of the imported matrix, so that we can convert indices back to global for arithmetic later
01873     Mview.importColMap = Mview.importMatrix->getColMap();
01874   }
01875 }
01876 
01877 
01878 
01879 } //End namepsace MMdetails
01880 
01881 } //End namespace Tpetra
01882 //
01883 // Explicit instantiation macro
01884 //
01885 // Must be expanded from within the Tpetra namespace!
01886 //
01887 
01888 #define TPETRA_MATRIXMATRIX_INSTANT(SCALAR,LO,GO,NODE) \
01889   \
01890   template \
01891   void MatrixMatrix::Multiply( \
01892     const CrsMatrix< SCALAR , LO , GO , NODE >& A, \
01893     bool transposeA, \
01894     const CrsMatrix< SCALAR , LO , GO , NODE >& B, \
01895     bool transposeB, \
01896     CrsMatrix< SCALAR , LO , GO , NODE >& C, \
01897     bool call_FillComplete_on_result); \
01898 \
01899 template \
01900   void MatrixMatrix::Jacobi( \
01901     SCALAR omega, \
01902     const Vector< SCALAR, LO, GO, NODE > & Dinv, \
01903     const CrsMatrix< SCALAR , LO , GO , NODE >& A, \
01904     const CrsMatrix< SCALAR , LO , GO , NODE >& B, \
01905     CrsMatrix< SCALAR , LO , GO , NODE >& C, \
01906     bool call_FillComplete_on_result); \
01907 \
01908   template \
01909   void MatrixMatrix::Add( \
01910     const CrsMatrix< SCALAR , LO , GO , NODE >& A, \
01911     bool transposeA, \
01912     SCALAR scalarA, \
01913     const CrsMatrix< SCALAR , LO , GO , NODE >& B, \
01914     bool transposeB, \
01915     SCALAR scalarB, \
01916     RCP<CrsMatrix< SCALAR , LO , GO , NODE > > C); \
01917   \
01918   template \
01919   void MatrixMatrix::Add( \
01920     const CrsMatrix<SCALAR, LO, GO, NODE>& A, \
01921     bool transposeA, \
01922     SCALAR scalarA, \
01923     CrsMatrix<SCALAR, LO, GO, NODE>& B, \
01924     SCALAR scalarB ); \
01925   \
01926   template \
01927   Teuchos::RCP<CrsMatrix< SCALAR , LO , GO , NODE > > \
01928   MatrixMatrix::add (const SCALAR & alpha, \
01929                      const bool transposeA, \
01930                      const CrsMatrix< SCALAR , LO , GO , NODE >& A, \
01931                      const SCALAR & beta, \
01932                      const bool transposeB, \
01933                      const CrsMatrix< SCALAR , LO , GO , NODE >& B, \
01934                      const Teuchos::RCP<const Map< LO , GO , NODE > >& domainMap, \
01935                      const Teuchos::RCP<const Map< LO , GO , NODE > >& rangeMap, \
01936                      const Teuchos::RCP<Teuchos::ParameterList>& params); \
01937 \
01938 
01939 
01940 #endif // TPETRA_MATRIXMATRIX_DEF_HPP
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines