Tpetra Matrix/Vector Services Version of the Day
TpetraExt_MatrixMatrix_def.hpp
Go to the documentation of this file.
00001 //@HEADER
00002 // ************************************************************************
00003 // 
00004 //               Tpetra: Templated Linear Algebra Services Package 
00005 //                 Copyright (2008) Sandia Corporation
00006 // 
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 // 
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //  
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //  
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00025 // 
00026 // ************************************************************************
00027 //@HEADER
00028 
00029 #ifndef TPETRA_MATRIXMATRIX_DEF_HPP
00030 #define TPETRA_MATRIXMATRIX_DEF_HPP
00031 
00032 #include "TpetraExt_MatrixMatrix_decl.hpp"
00033 #include "Teuchos_VerboseObject.hpp"
00034 #include "Teuchos_Array.hpp"
00035 #include "Tpetra_Util.hpp"
00036 #include "Tpetra_ConfigDefs.hpp"
00037 #include "Tpetra_CrsMatrix.hpp"
00038 #include "TpetraExt_MMHelpers_def.hpp"
00039 #include "Tpetra_RowMatrixTransposer.hpp"
00040 #include "Tpetra_ConfigDefs.hpp"
00041 #include "Tpetra_Map.hpp"
00042 #include <algorithm>
00043 #include "Teuchos_FancyOStream.hpp"
00044 
00045 
00051 namespace Tpetra {
00052 
00053 
00054 namespace MatrixMatrix{
00055 
00056 template <class Scalar, 
00057            class LocalOrdinal,
00058            class GlobalOrdinal,
00059            class Node,
00060            class SpMatOps >
00061 void Multiply(
00062   const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, SpMatOps>& A,
00063   bool transposeA,
00064   const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, SpMatOps>& B,
00065   bool transposeB,
00066   CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, SpMatOps>& C,
00067   bool call_FillComplete_on_result)
00068 {
00069   typedef CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, SpMatOps> Matrix_t;
00070   //
00071   //This method forms the matrix-matrix product C = op(A) * op(B), where
00072   //op(A) == A   if transposeA is false,
00073   //op(A) == A^T if transposeA is true,
00074   //and similarly for op(B).
00075   //
00076 
00077   //A and B should already be Filled.
00078   //(Should we go ahead and call FillComplete() on them if necessary?
00079   // or error out? For now, we choose to error out.)
00080   TEST_FOR_EXCEPTION(!A.isFillComplete(), std::runtime_error,
00081     "Uh oh. Looks like there's a bit of a problem here. No worries though. We'll help you figure it out. You're "
00082     "a fantastic programer and this just a minor bump in the road! Maybe the information below can help you out a bit."
00083     "\n\n MatrixMatrix::Multiply(): Matrix A is not fill complete.");
00084   TEST_FOR_EXCEPTION(!B.isFillComplete(), std::runtime_error,
00085     "Uh oh. Looks like there's a bit of a problem here. No worries though. We'll help you figure it out. You're "
00086     "a fantastic programer and this just a minor bump in the road! Maybe the information below can help you out a bit."
00087     "\n\n MatrixMatrix::Multiply(): Matrix B is not fill complete.");
00088 
00089   //Convience typedefs
00090   typedef CrsMatrixStruct<
00091     Scalar, 
00092     LocalOrdinal,
00093     GlobalOrdinal,
00094     Node,
00095     SpMatOps> CrsMatrixStruct_t;
00096   typedef Map<LocalOrdinal, GlobalOrdinal, Node> Map_t;
00097 
00098   RCP<const Matrix_t > Aprime = null;
00099   RCP<const Matrix_t > Bprime = null;
00100   if(transposeA){
00101     RowMatrixTransposer<Scalar, LocalOrdinal, GlobalOrdinal, Node, SpMatOps>  at(A);
00102     Aprime = at.createTranspose();
00103   }
00104   else{
00105     Aprime = rcpFromRef(A);
00106   }
00107   if(transposeB){
00108     RowMatrixTransposer<Scalar, LocalOrdinal, GlobalOrdinal, Node, SpMatOps>  bt(B);
00109     Bprime=bt.createTranspose();
00110   }
00111   else{
00112     Bprime = rcpFromRef(B);
00113   }
00114     
00115 
00116   //now check size compatibility
00117   global_size_t numACols = A.getDomainMap()->getGlobalNumElements();
00118   global_size_t numBCols = B.getDomainMap()->getGlobalNumElements();
00119   global_size_t Aouter = transposeA ? numACols : A.getGlobalNumRows();
00120   global_size_t Bouter = transposeB ? B.getGlobalNumRows() : numBCols;
00121   global_size_t Ainner = transposeA ? A.getGlobalNumRows() : numACols;
00122   global_size_t Binner = transposeB ? numBCols : B.getGlobalNumRows();
00123   TEST_FOR_EXCEPTION(!A.isFillComplete(), std::runtime_error,
00124     "MatrixMatrix::Multiply: ERROR, inner dimensions of op(A) and op(B) "
00125     "must match for matrix-matrix product. op(A) is "
00126     <<Aouter<<"x"<<Ainner << ", op(B) is "<<Binner<<"x"<<Bouter<<std::endl);
00127 
00128   //The result matrix C must at least have a row-map that reflects the
00129   //correct row-size. Don't check the number of columns because rectangular
00130   //matrices which were constructed with only one map can still end up
00131   //having the correct capacity and dimensions when filled.
00132   TEST_FOR_EXCEPTION(Aouter > C.getGlobalNumRows(), std::runtime_error,
00133     "MatrixMatrix::Multiply: ERROR, dimensions of result C must "
00134     "match dimensions of op(A) * op(B). C has "<<C.getGlobalNumRows()
00135      << " rows, should have at least "<<Aouter << std::endl);
00136 
00137   //It doesn't matter whether C is already Filled or not. If it is already
00138   //Filled, it must have space allocated for the positions that will be
00139   //referenced in forming C = op(A)*op(B). If it doesn't have enough space,
00140   //we'll error out later when trying to store result values.
00141   
00142   // CGB: However, matrix must be in active-fill
00143   TEST_FOR_EXCEPT( C.isFillActive() == false );
00144 
00145   //We're going to need to import remotely-owned sections of A and/or B
00146   //if more than 1 processor is performing this run, depending on the scenario.
00147   int numProcs = A.getComm()->getSize();
00148 
00149   //Declare a couple of structs that will be used to hold views of the data
00150   //of A and B, to be used for fast access during the matrix-multiplication.
00151   CrsMatrixStruct_t Aview;
00152   CrsMatrixStruct_t Bview;
00153 
00154   RCP<const Map_t > targetMap_A = Aprime->getRowMap();
00155   RCP<const Map_t > targetMap_B = Bprime->getRowMap();
00156 
00157   //Now import any needed remote rows and populate the Aview struct.
00158   MMdetails::import_and_extract_views(*Aprime, targetMap_A, Aview);
00159  
00160 
00161   //We will also need local access to all rows of B that correspond to the
00162   //column-map of op(A).
00163   if (numProcs > 1) {
00164     targetMap_B = Aprime->getColMap(); //colmap_op_A;
00165   }
00166 
00167   //Now import any needed remote rows and populate the Bview struct.
00168   MMdetails::import_and_extract_views(*Bprime, targetMap_B, Bview);
00169 
00170 
00171   //If the result matrix C is not already FillComplete'd, we will do a
00172   //preprocessing step to create the nonzero structure,
00173   if (!C.isFillComplete()) {
00174     CrsWrapper_GraphBuilder<Scalar, LocalOrdinal, GlobalOrdinal, Node> crsgraphbuilder(C.getRowMap());
00175 
00176     //pass the graph-builder object to the multiplication kernel to fill in all
00177     //the nonzero positions that will be used in the result matrix.
00178     MMdetails::mult_A_B(Aview, Bview, crsgraphbuilder, true);
00179 
00180     //now insert all of the nonzero positions into the result matrix.
00181     insert_matrix_locations(crsgraphbuilder, C);
00182 
00183   }
00184 
00185   //Now call the appropriate method to perform the actual multiplication.
00186 
00187   CrsWrapper_CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> crsmat(C);
00188 
00189   MMdetails::mult_A_B(Aview, Bview, crsmat);
00190 
00191   if (call_FillComplete_on_result) {
00192     //We'll call FillComplete on the C matrix before we exit, and give
00193     //it a domain-map and a range-map.
00194     //The domain-map will be the domain-map of B, unless
00195     //op(B)==transpose(B), in which case the range-map of B will be used.
00196     //The range-map will be the range-map of A, unless
00197     //op(A)==transpose(A), in which case the domain-map of A will be used.
00198     if (!C.isFillComplete()) {
00199       C.fillComplete(Bprime->getDomainMap(), Aprime->getRangeMap());
00200     }
00201   }
00202 
00203 }
00204 
00205 template <class Scalar, 
00206           class LocalOrdinal,
00207           class GlobalOrdinal,
00208           class Node,
00209           class SpMatOps >
00210 void Add(
00211   const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, SpMatOps>& A,
00212   bool transposeA,
00213   Scalar scalarA,
00214   CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, SpMatOps>& B,
00215   Scalar scalarB )
00216 {
00217   TEST_FOR_EXCEPTION(!A.isFillComplete(), std::runtime_error,
00218     "MatrixMatrix::Add ERROR, input matrix A.isFillComplete() is false; it is required to be true. (Result matrix B is not required to be isFillComplete()).");
00219   TEST_FOR_EXCEPTION(B.isFillComplete() , std::runtime_error,
00220     "MatrixMatrix::Add ERROR, input matrix B must not be fill complete!");
00221   TEST_FOR_EXCEPTION(B.getProfileType()!=DynamicProfile, std::runtime_error,
00222     "MatrixMatrix::Add ERROR, input matrix B must have a dynamic profile!");
00223   //Convience typedef
00224   typedef CrsMatrix<
00225     Scalar, 
00226     LocalOrdinal,
00227     GlobalOrdinal,
00228     Node,
00229     SpMatOps> CrsMatrix_t;
00230   RCP<const CrsMatrix_t> Aprime = null;
00231   if( transposeA ){
00232     RowMatrixTransposer<Scalar, LocalOrdinal, GlobalOrdinal, Node, SpMatOps> theTransposer(A);
00233     Aprime = theTransposer.createTranspose(DoOptimizeStorage); 
00234   }
00235   else{
00236     Aprime = rcpFromRef(A);
00237   }
00238   size_t a_numEntries;
00239   Array<GlobalOrdinal> a_inds(A.getNodeMaxNumRowEntries());
00240   Array<Scalar> a_vals(A.getNodeMaxNumRowEntries());
00241   GlobalOrdinal row;
00242 
00243   if(scalarB != ScalarTraits<Scalar>::one()){
00244     B.scale(scalarB);
00245   }
00246 
00247   bool bFilled = B.isFillComplete();
00248   size_t numMyRows = B.getNodeNumRows();
00249   if(scalarA != ScalarTraits<Scalar>::zero()){
00250     for(LocalOrdinal i = 0; (size_t)i < numMyRows; ++i){
00251       row = B.getRowMap()->getGlobalElement(i);
00252       Aprime->getGlobalRowCopy(row, a_inds(), a_vals(), a_numEntries);
00253       if(scalarA != ScalarTraits<Scalar>::one()){
00254         for(size_t j =0; j<a_numEntries; ++j){
00255           a_vals[j] *= scalarA;
00256         }
00257       }
00258       if(bFilled){
00259         B.sumIntoGlobalValues(row, a_inds(0,a_numEntries), a_vals(0,a_numEntries));
00260       }
00261       else{
00262         B.insertGlobalValues(row, a_inds(0,a_numEntries), a_vals(0,a_numEntries));
00263       }
00264 
00265     }
00266   }
00267 }
00268 
00269 template <class Scalar, 
00270           class LocalOrdinal,
00271           class GlobalOrdinal,
00272           class Node,
00273           class SpMatOps>
00274 void Add(
00275   const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, SpMatOps>& A,
00276   bool transposeA,
00277   Scalar scalarA,
00278   const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, SpMatOps>& B,
00279   bool transposeB,
00280   Scalar scalarB,
00281   RCP<CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, SpMatOps> > C)
00282 {
00283   //
00284   //This method forms the matrix-matrix sum C = scalarA * op(A) + scalarB * op(B), where
00285 
00286   //Convience typedef
00287   typedef CrsMatrix<
00288     Scalar, 
00289     LocalOrdinal,
00290     GlobalOrdinal,
00291     Node,
00292     SpMatOps> CrsMatrix_t;
00293 
00294   //A and B should already be Filled. C should be an empty pointer.
00295 
00296 
00297   TEST_FOR_EXCEPTION(!A.isFillComplete() || !B.isFillComplete(), std::runtime_error,
00298     "EpetraExt::MatrixMatrix::Add ERROR, input matrix A.Filled() or B.Filled() is false,"
00299     "they are required to be true. (Result matrix C should be an empty pointer)" << std::endl);
00300 
00301 
00302   RCP<const CrsMatrix_t> Aprime = null;
00303   RCP<const CrsMatrix_t> Bprime = null;
00304 
00305 
00306   //explicit tranpose A formed as necessary
00307   if( transposeA ) {
00308     RowMatrixTransposer<Scalar, LocalOrdinal, GlobalOrdinal, Node, SpMatOps> theTransposer(A);
00309     Aprime = theTransposer.createTranspose(DoOptimizeStorage);
00310   }
00311   else{
00312     Aprime = rcpFromRef(A);
00313   }
00314 
00315   //explicit tranpose B formed as necessary
00316   if( transposeB ) {
00317     RowMatrixTransposer<Scalar, LocalOrdinal, GlobalOrdinal, Node, SpMatOps> theTransposer(B);
00318     Bprime = theTransposer.createTranspose(DoOptimizeStorage);
00319   }
00320   else{
00321     Bprime = rcpFromRef(B);
00322   }
00323 
00324   // allocate or zero the new matrix
00325   if(C != null)
00326      C->setAllToScalar(ScalarTraits<Scalar>::zero());
00327   else
00328     C = rcp(new CrsMatrix_t(Aprime->getRowMap(), null));
00329 
00330   Array<RCP<const CrsMatrix_t> > Mat = 
00331     Teuchos::tuple<RCP<const CrsMatrix_t> >(Aprime, Bprime);
00332   Array<Scalar> scalar = Teuchos::tuple<Scalar>(scalarA, scalarB);
00333 
00334   // do a loop over each matrix to add: A reordering might be more efficient
00335   for(int k=0;k<2;++k) {
00336     size_t NumEntries;
00337     Array<GlobalOrdinal> Indices;
00338     Array<Scalar> Values;
00339    
00340      size_t NumMyRows = Mat[k]->getNodeNumRows();
00341      GlobalOrdinal Row;
00342    
00343      //Loop over rows and sum into C
00344      for( size_t i = OrdinalTraits<size_t>::zero(); i < NumMyRows; ++i ) {
00345         Row = Mat[k]->getRowMap()->getGlobalElement(i);
00346         NumEntries = Mat[k]->getNumEntriesInGlobalRow(Row);
00347         if(NumEntries == OrdinalTraits<global_size_t>::zero()){
00348           continue;
00349         }
00350 
00351         Indices.resize(NumEntries);
00352         Values.resize(NumEntries);
00353         Mat[k]->getGlobalRowCopy(Row, Indices(), Values(), NumEntries);
00354    
00355         if( scalar[k] != ScalarTraits<Scalar>::one() )
00356            for( size_t j = OrdinalTraits<size_t>::zero(); j < NumEntries; ++j ) Values[j] *= scalar[k];
00357    
00358         if(C->isFillComplete()) { // Sum in values
00359            C->sumIntoGlobalValues( Row, Indices, Values);
00360         } else { // just add it to the unfilled CRS Matrix
00361            C->insertGlobalValues( Row, Indices, Values);
00362         }
00363      }
00364   }
00365 }
00366 
00367 } //End namespace MatrixMatrix
00368 
00369 namespace MMdetails{
00370 
00371 
00372 //kernel method for computing the local portion of C = A*B
00373 template<class Scalar, 
00374          class LocalOrdinal, 
00375          class GlobalOrdinal, 
00376          class Node, 
00377          class SpMatOps>
00378 void mult_A_B(
00379   CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node, SpMatOps>& Aview, 
00380   CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node, SpMatOps>& Bview, 
00381   CrsWrapper<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
00382   bool onlyCalculateStructure)
00383 {
00384   LocalOrdinal C_firstCol = Bview.colMap->getMinLocalIndex();
00385   LocalOrdinal C_lastCol = Bview.colMap->getMaxLocalIndex();
00386 
00387   LocalOrdinal C_firstCol_import = OrdinalTraits<LocalOrdinal>::zero();
00388   LocalOrdinal C_lastCol_import = OrdinalTraits<LocalOrdinal>::invalid();
00389 
00390   ArrayView<const GlobalOrdinal> bcols =Bview.colMap->getNodeElementList();
00391   ArrayView<const GlobalOrdinal> bcols_import = null;
00392   if (Bview.importColMap != null) {
00393     C_firstCol_import = Bview.importColMap->getMinLocalIndex();
00394     C_lastCol_import = Bview.importColMap->getMaxLocalIndex();
00395 
00396     bcols_import = Bview.importColMap->getNodeElementList();
00397   }
00398 
00399   size_t C_numCols = C_lastCol - C_firstCol + OrdinalTraits<LocalOrdinal>::one();
00400   size_t C_numCols_import = C_lastCol_import - C_firstCol_import + OrdinalTraits<LocalOrdinal>::one();
00401 
00402   if (C_numCols_import > C_numCols) C_numCols = C_numCols_import;
00403 
00404   Array<Scalar> dwork = onlyCalculateStructure ? Array<Scalar>() : Array<Scalar>(C_numCols);
00405   Array<GlobalOrdinal> iwork = Array<GlobalOrdinal>(C_numCols);
00406 
00407   Array<Scalar> C_row_i = dwork;
00408   Array<GlobalOrdinal> C_cols = iwork;
00409 
00410   size_t C_row_i_length, i, j, k;
00411 
00412   //To form C = A*B we're going to execute this expression:
00413   //
00414   // C(i,j) = sum_k( A(i,k)*B(k,j) )
00415   //
00416   //Our goal, of course, is to navigate the data in A and B once, without
00417   //performing searches for column-indices, etc.
00418 
00419   bool C_filled = C.isFillComplete();
00420 
00421   //loop over the rows of A.
00422   for(i=0; i<Aview.numRows; ++i) {
00423 
00424     //only navigate the local portion of Aview... (It's probable that we
00425     //imported more of A than we need for A*B, because other cases like A^T*B 
00426     //need the extra rows.)
00427     if (Aview.remote[i]) {
00428       continue;
00429     }
00430 
00431     ArrayView<const LocalOrdinal> Aindices_i = Aview.indices[i];
00432     ArrayView<const Scalar> Aval_i  = onlyCalculateStructure ? null : Aview.values[i];
00433 
00434     GlobalOrdinal global_row = Aview.rowMap->getGlobalElement(i);
00435 
00436 
00437     //loop across the i-th row of A and for each corresponding row
00438     //in B, loop across colums and accumulate product
00439     //A(i,k)*B(k,j) into our partial sum quantities C_row_i. In other words,
00440     //as we stride across B(k,:) we're calculating updates for row i of the
00441     //result matrix C.
00442 
00443     for(k=OrdinalTraits<size_t>::zero(); k<Aview.numEntriesPerRow[i]; ++k) {
00444       LocalOrdinal Ak = Bview.rowMap->getLocalElement(Aview.colMap->getGlobalElement(Aindices_i[k]));
00445       Scalar Aval = onlyCalculateStructure ? Teuchos::as<Scalar>(0) : Aval_i[k];
00446 
00447       ArrayView<const LocalOrdinal> Bcol_inds = Bview.indices[Ak];
00448       ArrayView<const Scalar> Bvals_k = onlyCalculateStructure ? null : Bview.values[Ak];
00449 
00450       C_row_i_length = OrdinalTraits<size_t>::zero();
00451 
00452       if (Bview.remote[Ak]) {
00453         for(j=OrdinalTraits<size_t>::zero(); j<Bview.numEntriesPerRow[Ak]; ++j) {
00454           if(!onlyCalculateStructure){
00455             C_row_i[C_row_i_length] = Aval*Bvals_k[j];
00456           }
00457           C_cols[C_row_i_length++] = bcols_import[Bcol_inds[j]];
00458         }
00459       }
00460       else {
00461         for(j=OrdinalTraits<size_t>::zero(); j<Bview.numEntriesPerRow[Ak]; ++j) {
00462           if(!onlyCalculateStructure){
00463             C_row_i[C_row_i_length] = Aval*Bvals_k[j];
00464           }
00465           C_cols[C_row_i_length++] = bcols[Bcol_inds[j]];
00466         }
00467       }
00468 
00469       //
00470       //Now put the C_row_i values into C.
00471       //
00472 
00473       C_filled ?
00474         C.sumIntoGlobalValues(
00475           global_row, 
00476           C_cols.view(OrdinalTraits<size_t>::zero(), C_row_i_length), 
00477           onlyCalculateStructure ? null : C_row_i.view(OrdinalTraits<size_t>::zero(), C_row_i_length))
00478         :
00479         C.insertGlobalValues(
00480           global_row, 
00481           C_cols.view(OrdinalTraits<size_t>::zero(), C_row_i_length), 
00482           onlyCalculateStructure ? null : C_row_i.view(OrdinalTraits<size_t>::zero(), C_row_i_length));
00483 
00484     }
00485   }
00486 
00487 }
00488 
00489 template<class Scalar,
00490          class LocalOrdinal, 
00491          class GlobalOrdinal, 
00492          class Node,
00493          class SpMatOps>
00494 void setMaxNumEntriesPerRow(
00495   CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node, SpMatOps>& Mview)
00496 {
00497   typedef typename Array<ArrayView<const LocalOrdinal> >::size_type  local_length_size;
00498   Mview.maxNumRowEntries = OrdinalTraits<local_length_size>::zero();
00499   if(Mview.indices.size() > OrdinalTraits<local_length_size>::zero() ){
00500     Mview.maxNumRowEntries = Mview.indices[0].size();
00501     for(local_length_size i = 1; i<Mview.indices.size(); ++i){
00502       if(Mview.indices[i].size() > Mview.maxNumRowEntries){
00503         Mview.maxNumRowEntries = Mview.indices[i].size();
00504       }
00505     }
00506   }
00507 }
00508 
00509 template<class Scalar,
00510          class LocalOrdinal, 
00511          class GlobalOrdinal, 
00512          class Node,
00513          class SpMatOps>
00514 void import_and_extract_views(
00515   const CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, SpMatOps>& M,
00516   RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> > targetMap,
00517   CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Node, SpMatOps>& Mview)
00518 {
00519   //Convience typedef
00520   typedef Map<LocalOrdinal, GlobalOrdinal, Node> Map_t;
00521   // The goal of this method is to populate the 'Mview' struct with views of the
00522   // rows of M, including all rows that correspond to elements in 'targetMap'.
00523   // 
00524   // If targetMap includes local elements that correspond to remotely-owned rows
00525   // of M, then those remotely-owned rows will be imported into
00526   // 'Mview.importMatrix', and views of them will be included in 'Mview'.
00527   Mview.deleteContents();
00528 
00529   RCP<const Map_t> Mrowmap = M.getRowMap();
00530 
00531   const int numProcs = Mrowmap->getComm()->getSize();
00532 
00533   ArrayView<const GlobalOrdinal> Mrows = targetMap->getNodeElementList();
00534 
00535   Mview.numRemote = 0;
00536   Mview.numRows = targetMap->getNodeNumElements();
00537   Mview.numEntriesPerRow.resize(Mview.numRows);
00538   Mview.indices.resize(         Mview.numRows);
00539   Mview.values.resize(          Mview.numRows);
00540   Mview.remote.resize(          Mview.numRows);
00541 
00542 
00543   Mview.origRowMap = M.getRowMap();
00544   Mview.rowMap = targetMap;
00545   Mview.colMap = M.getColMap();
00546   Mview.domainMap = M.getDomainMap();
00547   Mview.importColMap = null;
00548 
00549 
00550   // mark each row in targetMap as local or remote, and go ahead and get a view for the local rows
00551 
00552   for(size_t i=0; i < Mview.numRows; ++i) 
00553   {
00554     const LocalOrdinal mlid = Mrowmap->getLocalElement(Mrows[i]);
00555 
00556     if (mlid == OrdinalTraits<LocalOrdinal>::invalid()) {
00557       Mview.remote[i] = true;
00558       ++Mview.numRemote;
00559     }
00560     else {
00561       Mview.remote[i] = false;
00562       M.getLocalRowView(mlid, Mview.indices[i], Mview.values[i]);
00563       Mview.numEntriesPerRow[i] = Mview.indices[i].size();
00564     }
00565   }
00566 
00567   if (numProcs < 2) {
00568     TEST_FOR_EXCEPTION(Mview.numRemote > 0, std::runtime_error,
00569       "MatrixMatrix::import_and_extract_views ERROR, numProcs < 2 but attempting to import remote matrix rows." <<std::endl);
00570     setMaxNumEntriesPerRow(Mview);
00571     //If only one processor we don't need to import any remote rows, so return.
00572     return;
00573   }
00574 
00575   //
00576   // Now we will import the needed remote rows of M, if the global maximum
00577   // value of numRemote is greater than 0.
00578   //
00579 
00580   global_size_t globalMaxNumRemote = 0;
00581   Teuchos::reduceAll(*(Mrowmap->getComm()) , Teuchos::REDUCE_MAX, Mview.numRemote, Teuchos::outArg(globalMaxNumRemote) );
00582 
00583   if (globalMaxNumRemote > 0) {
00584     // Create a map that describes the remote rows of M that we need.
00585 
00586     Array<GlobalOrdinal> MremoteRows(Mview.numRemote);
00587 
00588 
00589     global_size_t offset = 0;
00590     for(size_t i=0; i < Mview.numRows; ++i) {
00591       if (Mview.remote[i]) {
00592         MremoteRows[offset++] = Mrows[i];
00593       }
00594     }
00595 
00596     RCP<const Map_t> MremoteRowMap = rcp(new Map_t(
00597       OrdinalTraits<GlobalOrdinal>::invalid(), 
00598       MremoteRows(), 
00599       Mrowmap->getIndexBase(), 
00600       Mrowmap->getComm(), 
00601       Mrowmap->getNode()));
00602 
00603     // Create an importer with target-map MremoteRowMap and source-map Mrowmap.
00604     Import<LocalOrdinal, GlobalOrdinal, Node> importer(Mrowmap, MremoteRowMap);
00605 
00606     // Now create a new matrix into which we can import the remote rows of M that we need.
00607     Mview.importMatrix = rcp(new CrsMatrix<Scalar,LocalOrdinal, GlobalOrdinal, Node, SpMatOps>( MremoteRowMap, 1 ));
00608     Mview.importMatrix->doImport(M, importer, INSERT);
00609     Mview.importMatrix->fillComplete(M.getDomainMap(), M.getRangeMap());
00610 
00611     // Save the column map of the imported matrix, so that we can convert indices back to global for arithmetic later
00612     Mview.importColMap = Mview.importMatrix->getColMap();
00613 
00614     // Finally, use the freshly imported data to fill in the gaps in our views of rows of M.
00615     for(size_t i=0; i < Mview.numRows; ++i) 
00616     {
00617       if (Mview.remote[i]) {
00618         const LocalOrdinal importLID = MremoteRowMap->getLocalElement(Mrows[i]);
00619         Mview.importMatrix->getLocalRowView(importLID,
00620                                              Mview.indices[i],
00621                                              Mview.values[i]);
00622         Mview.numEntriesPerRow[i] = Mview.indices[i].size();
00623       }
00624     }
00625   }
00626   setMaxNumEntriesPerRow(Mview);
00627 }
00628 
00629 } //End namepsace MMdetails
00630 
00631 } //End namespace Tpetra
00632 //
00633 // Explicit instantiation macro
00634 //
00635 // Must be expanded from within the Tpetra namespace!
00636 //
00637 
00638 #define TPETRA_MATRIXMATRIX_INSTANT(SCALAR,LO,GO,NODE) \
00639   \
00640   template \
00641   void MatrixMatrix::Multiply( \
00642     const CrsMatrix< SCALAR , LO , GO , NODE >& A, \
00643     bool transposeA, \
00644     const CrsMatrix< SCALAR , LO , GO , NODE >& B, \
00645     bool transposeB, \
00646     CrsMatrix< SCALAR , LO , GO , NODE >& C, \
00647     bool call_FillComplete_on_result); \
00648 \
00649   template \
00650   void MatrixMatrix::Add( \
00651     const CrsMatrix< SCALAR , LO , GO , NODE >& A, \
00652     bool transposeA, \
00653     SCALAR scalarA, \
00654     const CrsMatrix< SCALAR , LO , GO , NODE >& B, \
00655     bool transposeB, \
00656     SCALAR scalarB, \
00657     RCP<CrsMatrix< SCALAR , LO , GO , NODE > > C); \
00658   \
00659   template  \
00660   void MatrixMatrix::Add( \
00661     const CrsMatrix<SCALAR, LO, GO, NODE>& A, \
00662     bool transposeA, \
00663     SCALAR scalarA, \
00664     CrsMatrix<SCALAR, LO, GO, NODE>& B, \
00665     SCALAR scalarB ); \
00666   \
00667 
00668 
00669 #endif // TPETRA_MATRIXMATRIX_DEF_HPP
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines