EpetraExt Development
EpetraExt_MatrixMatrix.cpp
Go to the documentation of this file.
00001 //@HEADER
00002 // ***********************************************************************
00003 //
00004 //     EpetraExt: Epetra Extended - Linear Algebra Services Package
00005 //                 Copyright (2011) Sandia Corporation
00006 //
00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00008 // the U.S. Government retains certain rights in this software.
00009 //
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00038 //
00039 // ***********************************************************************
00040 //@HEADER
00041 
00042 #include <EpetraExt_ConfigDefs.h>
00043 #include <EpetraExt_MatrixMatrix.h>
00044 
00045 #include <EpetraExt_MMHelpers.h>
00046 
00047 #include <EpetraExt_Transpose_RowMatrix.h>
00048 
00049 #include <Epetra_Export.h>
00050 #include <Epetra_Import.h>
00051 #include <Epetra_Util.h>
00052 #include <Epetra_Map.h>
00053 #include <Epetra_Comm.h>
00054 #include <Epetra_CrsMatrix.h>
00055 #include <Epetra_Vector.h>
00056 #include <Epetra_Directory.h>
00057 #include <Epetra_HashTable.h>
00058 #include <Epetra_Distributor.h>
00059 
00060 #include <Teuchos_TimeMonitor.hpp>
00061 
00062 #ifdef HAVE_VECTOR
00063 #include <vector>
00064 #endif
00065 
00066 
00067 
00068 namespace EpetraExt {
00069 //=========================================================================
00070 //
00071 //Method for internal use... sparsedot forms a dot-product between two
00072 //sparsely-populated 'vectors'.
00073 //Important assumption: assumes the indices in u_ind and v_ind are sorted.
00074 //
00075 template<typename int_type>
00076 double sparsedot(double* u, int_type* u_ind, int u_len,
00077      double* v, int_type* v_ind, int v_len)
00078 {
00079   double result = 0.0;
00080 
00081   int v_idx = 0;
00082   int u_idx = 0;
00083 
00084   while(v_idx < v_len && u_idx < u_len) {
00085     int_type ui = u_ind[u_idx];
00086     int_type vi = v_ind[v_idx];
00087 
00088     if (ui < vi) {
00089       ++u_idx;
00090     }
00091     else if (ui > vi) {
00092       ++v_idx;
00093     }
00094     else {
00095       result += u[u_idx++]*v[v_idx++];
00096     }
00097   }
00098 
00099   return(result);
00100 }
00101 
00102 //=========================================================================
00103 //kernel method for computing the local portion of C = A*B^T
00104 template<typename int_type>
00105 int mult_A_Btrans(CrsMatrixStruct& Aview,
00106       CrsMatrixStruct& Bview,
00107       CrsWrapper& C)
00108 {
00109   int i, j, k;
00110   int returnValue = 0;
00111 
00112   int maxlen = 0;
00113   for(i=0; i<Aview.numRows; ++i) {
00114     if (Aview.numEntriesPerRow[i] > maxlen) maxlen = Aview.numEntriesPerRow[i];
00115   }
00116   for(i=0; i<Bview.numRows; ++i) {
00117     if (Bview.numEntriesPerRow[i] > maxlen) maxlen = Bview.numEntriesPerRow[i];
00118   }
00119 
00120   //std::cout << "Aview: " << std::endl;
00121   //dumpCrsMatrixStruct(Aview);
00122 
00123   //std::cout << "Bview: " << std::endl;
00124   //dumpCrsMatrixStruct(Bview);
00125 
00126   int numBcols = Bview.colMap->NumMyElements();
00127   int numBrows = Bview.numRows;
00128 
00129   int iworklen = maxlen*2 + numBcols;
00130   int_type* iwork = new int_type[iworklen];
00131 
00132   int_type * bcols = iwork+maxlen*2;
00133   int_type* bgids = 0;
00134   Bview.colMap->MyGlobalElementsPtr(bgids);
00135   double* bvals = new double[maxlen*2];
00136   double* avals = bvals+maxlen;
00137 
00138   int_type max_all_b = (int_type) Bview.colMap->MaxAllGID64();
00139   int_type min_all_b = (int_type) Bview.colMap->MinAllGID64();
00140 
00141   //bcols will hold the GIDs from B's column-map for fast access
00142   //during the computations below
00143   for(i=0; i<numBcols; ++i) {
00144     int blid = Bview.colMap->LID(bgids[i]);
00145     bcols[blid] = bgids[i];
00146   }
00147 
00148   //next create arrays indicating the first and last column-index in
00149   //each row of B, so that we can know when to skip certain rows below.
00150   //This will provide a large performance gain for banded matrices, and
00151   //a somewhat smaller gain for *most* other matrices.
00152   int_type* b_firstcol = new int_type[2*numBrows];
00153   int_type* b_lastcol = b_firstcol+numBrows;
00154   int_type temp;
00155   for(i=0; i<numBrows; ++i) {
00156     b_firstcol[i] = max_all_b;
00157     b_lastcol[i] = min_all_b;
00158 
00159     int Blen_i = Bview.numEntriesPerRow[i];
00160     if (Blen_i < 1) continue;
00161     int* Bindices_i = Bview.indices[i];
00162 
00163     if (Bview.remote[i]) {
00164       for(k=0; k<Blen_i; ++k) {
00165         temp = (int_type) Bview.importColMap->GID64(Bindices_i[k]);
00166         if (temp < b_firstcol[i]) b_firstcol[i] = temp;
00167         if (temp > b_lastcol[i]) b_lastcol[i] = temp;
00168       }
00169     }
00170     else {
00171       for(k=0; k<Blen_i; ++k) {
00172         temp = bcols[Bindices_i[k]];
00173         if (temp < b_firstcol[i]) b_firstcol[i] = temp;
00174         if (temp > b_lastcol[i]) b_lastcol[i] = temp;
00175       }
00176     }
00177   }
00178 
00179   Epetra_Util util;
00180 
00181   int_type* Aind = iwork;
00182   int_type* Bind = iwork+maxlen;
00183 
00184   bool C_filled = C.Filled();
00185 
00186   //To form C = A*B^T, we're going to execute this expression:
00187   //
00188   // C(i,j) = sum_k( A(i,k)*B(j,k) )
00189   //
00190   //This is the easiest case of all to code (easier than A*B, A^T*B, A^T*B^T).
00191   //But it requires the use of a 'sparsedot' function (we're simply forming
00192   //dot-products with row A_i and row B_j for all i and j).
00193 
00194   //loop over the rows of A.
00195   for(i=0; i<Aview.numRows; ++i) {
00196     if (Aview.remote[i]) {
00197       continue;
00198     }
00199 
00200     int* Aindices_i = Aview.indices[i];
00201     double* Aval_i  = Aview.values[i];
00202     int A_len_i = Aview.numEntriesPerRow[i];
00203     if (A_len_i < 1) {
00204       continue;
00205     }
00206 
00207     for(k=0; k<A_len_i; ++k) {
00208       Aind[k] = (int_type) Aview.colMap->GID64(Aindices_i[k]);
00209       avals[k] = Aval_i[k];
00210     }
00211 
00212     util.Sort<int_type>(true, A_len_i, Aind, 1, &avals, 0, NULL, 0, 0);
00213 
00214     int_type mina = Aind[0];
00215     int_type maxa = Aind[A_len_i-1];
00216 
00217     if (mina > max_all_b || maxa < min_all_b) {
00218       continue;
00219     }
00220 
00221     int_type global_row = (int_type) Aview.rowMap->GID64(i);
00222 
00223     //loop over the rows of B and form results C_ij = dot(A(i,:),B(j,:))
00224     for(j=0; j<Bview.numRows; ++j) {
00225       if (b_firstcol[j] > maxa || b_lastcol[j] < mina) {
00226         continue;
00227       }
00228 
00229       int* Bindices_j = Bview.indices[j];
00230       int B_len_j = Bview.numEntriesPerRow[j];
00231       if (B_len_j < 1) {
00232         continue;
00233       }
00234 
00235       int_type tmp;
00236       int Blen = 0;
00237 
00238       if (Bview.remote[j]) {
00239         for(k=0; k<B_len_j; ++k) {
00240     tmp = (int_type) Bview.importColMap->GID64(Bindices_j[k]);
00241           if (tmp < mina || tmp > maxa) {
00242             continue;
00243           }
00244 
00245           bvals[Blen] = Bview.values[j][k];
00246           Bind[Blen++] = tmp;
00247   }
00248       }
00249       else {
00250         for(k=0; k<B_len_j; ++k) {
00251     tmp = bcols[Bindices_j[k]];
00252           if (tmp < mina || tmp > maxa) {
00253             continue;
00254           }
00255 
00256           bvals[Blen] = Bview.values[j][k];
00257           Bind[Blen++] = tmp;
00258   }
00259       }
00260 
00261       if (Blen < 1) {
00262         continue;
00263       }
00264 
00265       util.Sort<int_type>(true, Blen, Bind, 1, &bvals, 0, NULL, 0, 0);
00266 
00267       double C_ij = sparsedot(avals, Aind, A_len_i,
00268             bvals, Bind, Blen);
00269 
00270       if (C_ij == 0.0) {
00271   continue;
00272       }
00273       int_type global_col = (int_type) Bview.rowMap->GID64(j);
00274 
00275       int err = C_filled ?
00276         C.SumIntoGlobalValues(global_row, 1, &C_ij, &global_col)
00277         :
00278         C.InsertGlobalValues(global_row, 1, &C_ij, &global_col);
00279 
00280       if (err < 0) {
00281   return(err);
00282       }
00283       if (err > 0) {
00284   if (C_filled) {
00285     //C.Filled()==true, and C doesn't have all the necessary nonzero
00286           //locations, or global_row or global_col is out of range (less
00287           //than 0 or non local).
00288           std::cerr << "EpetraExt::MatrixMatrix::Multiply Warning: failed "
00289               << "to insert value in result matrix at position "<<global_row
00290              <<"," <<global_col<<", possibly because result matrix has a "
00291              << "column-map that doesn't include column "<<global_col
00292              <<" on this proc." <<std::endl;
00293     return(err);
00294   }
00295       }
00296     }
00297   }
00298 
00299   delete [] iwork;
00300   delete [] bvals;
00301   delete [] b_firstcol;
00302 
00303   return(returnValue);
00304 }
00305 
00306 int mult_A_Btrans(CrsMatrixStruct& Aview,
00307        CrsMatrixStruct& Bview,
00308        CrsWrapper& C)
00309 {
00310 #ifndef EPETRA_NO_32BIT_GLOBAL_INDICES
00311   if(Aview.rowMap->GlobalIndicesInt() &&
00312      Aview.colMap->GlobalIndicesInt() &&
00313      Aview.rowMap->GlobalIndicesInt() &&
00314      Aview.colMap->GlobalIndicesInt()) {
00315     return mult_A_Btrans<int>(Aview, Bview, C);
00316   }
00317   else
00318 #endif
00319 #ifndef EPETRA_NO_64BIT_GLOBAL_INDICES
00320   if(Aview.rowMap->GlobalIndicesLongLong() &&
00321      Aview.colMap->GlobalIndicesLongLong() &&
00322      Aview.rowMap->GlobalIndicesLongLong() &&
00323      Aview.colMap->GlobalIndicesLongLong()) {
00324     return mult_A_Btrans<long long>(Aview, Bview, C);
00325   }
00326   else
00327 #endif
00328     throw "EpetraExt::mult_A_Btrans: GlobalIndices type unknown";
00329 }
00330 
00331 //=========================================================================
00332 //kernel method for computing the local portion of C = A^T*B
00333 template<typename int_type>
00334 int mult_Atrans_B(CrsMatrixStruct& Aview,
00335       CrsMatrixStruct& Bview,
00336       CrsWrapper& C)
00337 {
00338   int C_firstCol = Bview.colMap->MinLID();
00339   int C_lastCol = Bview.colMap->MaxLID();
00340 
00341   int C_firstCol_import = 0;
00342   int C_lastCol_import = -1;
00343 
00344   if (Bview.importColMap != NULL) {
00345     C_firstCol_import = Bview.importColMap->MinLID();
00346     C_lastCol_import = Bview.importColMap->MaxLID();
00347   }
00348 
00349   int C_numCols = C_lastCol - C_firstCol + 1;
00350   int C_numCols_import = C_lastCol_import - C_firstCol_import + 1;
00351 
00352   if (C_numCols_import > C_numCols) C_numCols = C_numCols_import;
00353 
00354   double* C_row_i = new double[C_numCols];
00355   int_type* C_colInds = new int_type[C_numCols];
00356 
00357   int i, j, k;
00358 
00359   for(j=0; j<C_numCols; ++j) {
00360     C_row_i[j] = 0.0;
00361     C_colInds[j] = 0;
00362   }
00363 
00364   //To form C = A^T*B, compute a series of outer-product updates.
00365   //
00366   // for (ith column of A^T) { 
00367   //   C_i = outer product of A^T(:,i) and B(i,:)
00368   // Where C_i is the ith matrix update,
00369   //       A^T(:,i) is the ith column of A^T, and
00370   //       B(i,:) is the ith row of B.
00371   //
00372 
00373   //dumpCrsMatrixStruct(Aview);
00374   //dumpCrsMatrixStruct(Bview);
00375   int localProc = Bview.colMap->Comm().MyPID();
00376 
00377   int_type* Arows = 0;
00378   Aview.rowMap->MyGlobalElementsPtr(Arows);
00379 
00380   bool C_filled = C.Filled();
00381 
00382   //loop over the rows of A (which are the columns of A^T).
00383   for(i=0; i<Aview.numRows; ++i) {
00384 
00385     int* Aindices_i = Aview.indices[i];
00386     double* Aval_i  = Aview.values[i];
00387 
00388     //we'll need to get the row of B corresponding to Arows[i],
00389     //where Arows[i] is the GID of A's ith row.
00390     int Bi = Bview.rowMap->LID(Arows[i]);
00391     if (Bi<0) {
00392       std::cout << "mult_Atrans_B ERROR, proc "<<localProc<<" needs row "
00393      <<Arows[i]<<" of matrix B, but doesn't have it."<<std::endl;
00394       return(-1);
00395     }
00396 
00397     int* Bcol_inds = Bview.indices[Bi];
00398     double* Bvals_i = Bview.values[Bi];
00399 
00400     //for each column-index Aj in the i-th row of A, we'll update
00401     //global-row GID(Aj) of the result matrix C. In that row of C,
00402     //we'll update column-indices given by the column-indices in the
00403     //ith row of B that we're now holding (Bcol_inds).
00404 
00405     //First create a list of GIDs for the column-indices
00406     //that we'll be updating.
00407 
00408     int Blen = Bview.numEntriesPerRow[Bi];
00409     if (Bview.remote[Bi]) {
00410       for(j=0; j<Blen; ++j) {
00411         C_colInds[j] = (int_type) Bview.importColMap->GID64(Bcol_inds[j]);
00412       }
00413     }
00414     else {
00415       for(j=0; j<Blen; ++j) {
00416         C_colInds[j] = (int_type) Bview.colMap->GID64(Bcol_inds[j]);
00417       }
00418     }
00419 
00420     //loop across the i-th row of A (column of A^T)
00421     for(j=0; j<Aview.numEntriesPerRow[i]; ++j) {
00422 
00423       int Aj = Aindices_i[j];
00424       double Aval = Aval_i[j];
00425 
00426       int_type global_row;
00427       if (Aview.remote[i]) {
00428   global_row = (int_type) Aview.importColMap->GID64(Aj);
00429       }
00430       else {
00431   global_row = (int_type) Aview.colMap->GID64(Aj);
00432       }
00433 
00434       if (!C.RowMap().MyGID(global_row)) {
00435         continue;
00436       }
00437 
00438       for(k=0; k<Blen; ++k) {
00439         C_row_i[k] = Aval*Bvals_i[k];
00440       }
00441 
00442       //
00443       //Now add this row-update to C.
00444       //
00445 
00446       int err = C_filled ?
00447         C.SumIntoGlobalValues(global_row, Blen, C_row_i, C_colInds)
00448         :
00449         C.InsertGlobalValues(global_row, Blen, C_row_i, C_colInds);
00450 
00451       if (err < 0) {
00452         return(err);
00453       }
00454       if (err > 0) {
00455         if (C_filled) {
00456           //C is Filled, and doesn't have all the necessary nonzero locations.
00457           return(err);
00458         }
00459       }
00460     }
00461   }
00462 
00463   delete [] C_row_i;
00464   delete [] C_colInds;
00465 
00466   return(0);
00467 }
00468 
00469 int mult_Atrans_B(CrsMatrixStruct& Aview,
00470        CrsMatrixStruct& Bview,
00471        CrsWrapper& C)
00472 {
00473 #ifndef EPETRA_NO_32BIT_GLOBAL_INDICES
00474   if(Aview.rowMap->GlobalIndicesInt() &&
00475      Aview.colMap->GlobalIndicesInt() &&
00476      Aview.rowMap->GlobalIndicesInt() &&
00477      Aview.colMap->GlobalIndicesInt()) {
00478     return mult_Atrans_B<int>(Aview, Bview, C);
00479   }
00480   else
00481 #endif
00482 #ifndef EPETRA_NO_64BIT_GLOBAL_INDICES
00483   if(Aview.rowMap->GlobalIndicesLongLong() &&
00484      Aview.colMap->GlobalIndicesLongLong() &&
00485      Aview.rowMap->GlobalIndicesLongLong() &&
00486      Aview.colMap->GlobalIndicesLongLong()) {
00487     return mult_Atrans_B<long long>(Aview, Bview, C);
00488   }
00489   else
00490 #endif
00491     throw "EpetraExt::mult_Atrans_B: GlobalIndices type unknown";
00492 }
00493 
00494 //kernel method for computing the local portion of C = A^T*B^T
00495 template<typename int_type>
00496 int mult_Atrans_Btrans(CrsMatrixStruct& Aview,
00497            CrsMatrixStruct& Bview,
00498            CrsWrapper& C)
00499 {
00500   int C_firstCol = Aview.rowMap->MinLID();
00501   int C_lastCol = Aview.rowMap->MaxLID();
00502 
00503   int C_firstCol_import = 0;
00504   int C_lastCol_import = -1;
00505 
00506   if (Aview.importColMap != NULL) {
00507     C_firstCol_import = Aview.importColMap->MinLID();
00508     C_lastCol_import = Aview.importColMap->MaxLID();
00509   }
00510 
00511   int C_numCols = C_lastCol - C_firstCol + 1;
00512   int C_numCols_import = C_lastCol_import - C_firstCol_import + 1;
00513 
00514   if (C_numCols_import > C_numCols) C_numCols = C_numCols_import;
00515 
00516   double* dwork = new double[C_numCols];
00517   int_type* iwork = new int_type[C_numCols];
00518 
00519   double* C_col_j = dwork;
00520   int_type* C_inds = iwork;
00521 
00522   //std::cout << "Aview: " << std::endl;
00523   //dumpCrsMatrixStruct(Aview);
00524 
00525   //std::cout << "Bview: " << std::endl;
00526   //dumpCrsMatrixStruct(Bview);
00527 
00528 
00529   int i, j, k;
00530 
00531   for(j=0; j<C_numCols; ++j) {
00532     C_col_j[j] = 0.0;
00533     C_inds[j] = -1;
00534   }
00535 
00536   int_type* A_col_inds = 0;
00537   Aview.colMap->MyGlobalElementsPtr(A_col_inds);
00538   int_type* A_col_inds_import = 0;
00539   if(Aview.importColMap)
00540     Aview.importColMap->MyGlobalElementsPtr(A_col_inds_import);
00541 
00542   const Epetra_Map* Crowmap = &(C.RowMap());
00543 
00544   //To form C = A^T*B^T, we're going to execute this expression:
00545   //
00546   // C(i,j) = sum_k( A(k,i)*B(j,k) )
00547   //
00548   //Our goal, of course, is to navigate the data in A and B once, without
00549   //performing searches for column-indices, etc. In other words, we avoid
00550   //column-wise operations like the plague...
00551 
00552   int_type* Brows = 0;
00553   Bview.rowMap->MyGlobalElementsPtr(Brows);
00554 
00555   //loop over the rows of B
00556   for(j=0; j<Bview.numRows; ++j) {
00557     int* Bindices_j = Bview.indices[j];
00558     double* Bvals_j = Bview.values[j];
00559 
00560     int_type global_col = Brows[j];
00561 
00562     //loop across columns in the j-th row of B and for each corresponding
00563     //row in A, loop across columns and accumulate product
00564     //A(k,i)*B(j,k) into our partial sum quantities in C_col_j. In other
00565     //words, as we stride across B(j,:), we use selected rows in A to
00566     //calculate updates for column j of the result matrix C.
00567 
00568     for(k=0; k<Bview.numEntriesPerRow[j]; ++k) {
00569 
00570       int bk = Bindices_j[k];
00571       double Bval = Bvals_j[k];
00572 
00573       int_type global_k;
00574       if (Bview.remote[j]) {
00575   global_k = (int_type) Bview.importColMap->GID64(bk);
00576       }
00577       else {
00578   global_k = (int_type) Bview.colMap->GID64(bk);
00579       }
00580 
00581       //get the corresponding row in A
00582       int ak = Aview.rowMap->LID(global_k);
00583       if (ak<0) {
00584   continue;
00585       }
00586 
00587       int* Aindices_k = Aview.indices[ak];
00588       double* Avals_k = Aview.values[ak];
00589 
00590       int C_len = 0;
00591 
00592       if (Aview.remote[ak]) {
00593   for(i=0; i<Aview.numEntriesPerRow[ak]; ++i) {
00594     C_col_j[C_len] = Avals_k[i]*Bval;
00595           C_inds[C_len++] = A_col_inds_import[Aindices_k[i]];
00596   }
00597       }
00598       else {
00599   for(i=0; i<Aview.numEntriesPerRow[ak]; ++i) {
00600     C_col_j[C_len] = Avals_k[i]*Bval;
00601           C_inds[C_len++] = A_col_inds[Aindices_k[i]];
00602   }
00603       }
00604 
00605       //Now loop across the C_col_j values and put non-zeros into C.
00606 
00607       for(i=0; i < C_len ; ++i) {
00608   if (C_col_j[i] == 0.0) continue;
00609 
00610   int_type global_row = C_inds[i];
00611   if (!Crowmap->MyGID(global_row)) {
00612     continue;
00613   }
00614 
00615   int err = C.SumIntoGlobalValues(global_row, 1, &(C_col_j[i]), &global_col);
00616 
00617   if (err < 0) {
00618     return(err);
00619   }
00620   else {
00621           if (err > 0) {
00622       err = C.InsertGlobalValues(global_row, 1, &(C_col_j[i]), &global_col);
00623       if (err < 0) {
00624               return(err);
00625             }
00626     }
00627   }
00628       }
00629 
00630     }
00631   }
00632 
00633   delete [] dwork;
00634   delete [] iwork;
00635 
00636   return(0);
00637 }
00638 
00639 int mult_Atrans_Btrans(CrsMatrixStruct& Aview,
00640        CrsMatrixStruct& Bview,
00641        CrsWrapper& C)
00642 {
00643 #ifndef EPETRA_NO_32BIT_GLOBAL_INDICES
00644   if(Aview.rowMap->GlobalIndicesInt() &&
00645      Aview.colMap->GlobalIndicesInt() &&
00646      Aview.rowMap->GlobalIndicesInt() &&
00647      Aview.colMap->GlobalIndicesInt()) {
00648     return mult_Atrans_Btrans<int>(Aview, Bview, C);
00649   }
00650   else
00651 #endif
00652 #ifndef EPETRA_NO_64BIT_GLOBAL_INDICES
00653   if(Aview.rowMap->GlobalIndicesLongLong() &&
00654      Aview.colMap->GlobalIndicesLongLong() &&
00655      Aview.rowMap->GlobalIndicesLongLong() &&
00656      Aview.colMap->GlobalIndicesLongLong()) {
00657     return mult_Atrans_Btrans<long long>(Aview, Bview, C);
00658   }
00659   else
00660 #endif
00661     throw "EpetraExt::mult_Atrans_Btrans: GlobalIndices type unknown";
00662 }
00663 
00664 // ==============================================================
00665 template<typename int_type>
00666 int import_and_extract_views(const Epetra_CrsMatrix& M,
00667            const Epetra_Map& targetMap,
00668            CrsMatrixStruct& Mview,
00669            const Epetra_Import * prototypeImporter=0)
00670 {
00671   //The goal of this method is to populate the 'Mview' struct with views of the
00672   //rows of M, including all rows that correspond to elements in 'targetMap'.
00673   //
00674   //If targetMap includes local elements that correspond to remotely-owned rows
00675   //of M, then those remotely-owned rows will be imported into
00676   //'Mview.importMatrix', and views of them will be included in 'Mview'.
00677 
00678   // The prototype importer, if used, has to know who owns all of the PIDs in M's rowmap.  
00679   // The typical use of this is to provide A's column map when I&XV is called for B, for
00680   // a C = A * B multiply.  
00681 
00682 #ifdef ENABLE_MMM_TIMINGS
00683   using Teuchos::TimeMonitor;
00684   Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM I&X Alloc")));
00685 #endif
00686   Mview.deleteContents();
00687 
00688   Mview.origMatrix          = &M;
00689   const Epetra_Map& Mrowmap = M.RowMap();
00690   int numProcs              = Mrowmap.Comm().NumProc();
00691   Mview.numRows             = targetMap.NumMyElements();
00692   int_type* Mrows           = 0;
00693   targetMap.MyGlobalElementsPtr(Mrows);
00694 
00695   if (Mview.numRows > 0) {
00696     Mview.numEntriesPerRow = new int[Mview.numRows];
00697     Mview.indices = new int*[Mview.numRows];
00698     Mview.values  = new double*[Mview.numRows];
00699     Mview.remote  = new bool[Mview.numRows];
00700   }
00701 
00702   Mview.origRowMap   = &(M.RowMap());
00703   Mview.rowMap       = &targetMap;
00704   Mview.colMap       = &(M.ColMap());
00705   Mview.domainMap    = &(M.DomainMap());
00706   Mview.importColMap = NULL;
00707   Mview.numRemote    = 0;
00708 
00709 
00710 #ifdef ENABLE_MMM_TIMINGS
00711   MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM I&X Extract")));
00712 #endif
00713 
00714   int i;
00715   int *rowptr=0, *colind=0;
00716   double *vals=0;
00717 
00718   EPETRA_CHK_ERR( M.ExtractCrsDataPointers(rowptr,colind,vals) );
00719 
00720   if(Mrowmap.SameAs(targetMap)) {
00721     // Short Circuit: The Row and Target Maps are the Same
00722     for(i=0; i<Mview.numRows; ++i) {
00723       Mview.numEntriesPerRow[i] = rowptr[i+1]-rowptr[i];
00724       Mview.indices[i]          = colind + rowptr[i];
00725       Mview.values[i]           = vals + rowptr[i];
00726       Mview.remote[i]           = false;
00727     }
00728     return 0;
00729   }
00730   else if(prototypeImporter && prototypeImporter->SourceMap().SameAs(M.RowMap()) && prototypeImporter->TargetMap().SameAs(targetMap)){
00731     // The prototypeImporter can tell me what is local and what is remote
00732     int * PermuteToLIDs   = prototypeImporter->PermuteToLIDs();
00733     int * PermuteFromLIDs = prototypeImporter->PermuteFromLIDs();
00734     int * RemoteLIDs      = prototypeImporter->RemoteLIDs();
00735     for(int i=0; i<prototypeImporter->NumSameIDs();i++){    
00736       Mview.numEntriesPerRow[i] = rowptr[i+1]-rowptr[i];
00737       Mview.indices[i]          = colind + rowptr[i];
00738       Mview.values[i]           = vals + rowptr[i];
00739       Mview.remote[i]           = false;
00740     }
00741     for(int i=0; i<prototypeImporter->NumPermuteIDs();i++){
00742       int to                     = PermuteToLIDs[i];
00743       int from                   = PermuteFromLIDs[i];
00744       Mview.numEntriesPerRow[to] = rowptr[from+1]-rowptr[from];
00745       Mview.indices[to]          = colind + rowptr[from];
00746       Mview.values[to]           = vals + rowptr[from];
00747       Mview.remote[to]           = false;
00748     }
00749     for(int i=0; i<prototypeImporter->NumRemoteIDs();i++){
00750       Mview.remote[RemoteLIDs[i]] = true;
00751       ++Mview.numRemote;
00752     }
00753   }
00754   else {
00755     // Only LID can tell me who is local and who is remote
00756     for(i=0; i<Mview.numRows; ++i) {
00757       int mlid = Mrowmap.LID(Mrows[i]);
00758       if (mlid < 0) {
00759   Mview.remote[i] = true;
00760   ++Mview.numRemote;
00761       }
00762       else {
00763   Mview.numEntriesPerRow[i] = rowptr[mlid+1]-rowptr[mlid];
00764   Mview.indices[i]          = colind + rowptr[mlid];
00765   Mview.values[i]           = vals + rowptr[mlid];
00766   Mview.remote[i]           = false;
00767       }
00768     }
00769   }
00770 
00771 #ifdef ENABLE_MMM_TIMINGS
00772   MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM I&X Collective-0")));
00773 #endif
00774 
00775   if (numProcs < 2) {
00776     if (Mview.numRemote > 0) {
00777       std::cerr << "EpetraExt::MatrixMatrix::Multiply ERROR, numProcs < 2 but "
00778      << "attempting to import remote matrix rows."<<std::endl;
00779       return(-1);
00780     }
00781 
00782     //If only one processor we don't need to import any remote rows, so return.
00783     return(0);
00784   }
00785 
00786   //
00787   //Now we will import the needed remote rows of M, if the global maximum
00788   //value of numRemote is greater than 0.
00789   //
00790 
00791   int globalMaxNumRemote = 0;
00792   Mrowmap.Comm().MaxAll(&Mview.numRemote, &globalMaxNumRemote, 1);
00793 
00794   if (globalMaxNumRemote > 0) {
00795 #ifdef ENABLE_MMM_TIMINGS
00796     MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM I&X Import-1")));
00797 #endif
00798     //Create a map that describes the remote rows of M that we need.
00799 
00800     int_type* MremoteRows = Mview.numRemote>0 ? new int_type[Mview.numRemote] : NULL;
00801     int offset = 0;
00802     for(i=0; i<Mview.numRows; ++i) {
00803       if (Mview.remote[i]) {
00804   MremoteRows[offset++] = Mrows[i];
00805       }
00806     }
00807 
00808   LightweightMap MremoteRowMap((int_type) -1, Mview.numRemote, MremoteRows, (int_type) Mrowmap.IndexBase64());
00809 
00810 #ifdef ENABLE_MMM_TIMINGS
00811     MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM I&X Import-2")));
00812 #endif
00813     //Create an importer with target-map MremoteRowMap and 
00814     //source-map Mrowmap.
00815     Epetra_Import    * importer=0;
00816     RemoteOnlyImport * Rimporter=0;
00817     if(prototypeImporter && prototypeImporter->SourceMap().SameAs(M.RowMap()) && prototypeImporter->TargetMap().SameAs(targetMap)) {
00818       Rimporter = new RemoteOnlyImport(*prototypeImporter,MremoteRowMap);
00819     }
00820     else if(!prototypeImporter) {
00821       Epetra_Map MremoteRowMap2((int_type) -1, Mview.numRemote, MremoteRows, (int_type) Mrowmap.IndexBase64(), Mrowmap.Comm());
00822       importer=new Epetra_Import(MremoteRowMap2, Mrowmap);
00823     }
00824     else
00825       throw std::runtime_error("prototypeImporter->SourceMap() does not match M.RowMap()!");
00826 
00827 
00828 #ifdef ENABLE_MMM_TIMINGS
00829     MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM I&X Import-3")));
00830 #endif
00831 
00832     if(Mview.importMatrix) delete Mview.importMatrix;
00833     if(Rimporter) Mview.importMatrix = new LightweightCrsMatrix(M,*Rimporter);
00834     else Mview.importMatrix = new LightweightCrsMatrix(M,*importer);
00835 
00836 #ifdef ENABLE_MMM_TIMINGS
00837     MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM I&X Import-4")));
00838 #endif
00839 
00840     //Finally, use the freshly imported data to fill in the gaps in our views
00841     int N;
00842     if(Mview.importMatrix->use_lw) N = Mview.importMatrix->RowMapLW_->NumMyElements();
00843     else N = Mview.importMatrix->RowMapEP_->NumMyElements();
00844 
00845     if(N > 0) {
00846       rowptr = &Mview.importMatrix->rowptr_[0];
00847       colind = &Mview.importMatrix->colind_[0];
00848       vals   = &Mview.importMatrix->vals_[0];
00849     }
00850 
00851 
00852     for(i=0; i<Mview.numRows; ++i) {
00853       if (Mview.remote[i]) {
00854   int importLID = MremoteRowMap.LID(Mrows[i]);
00855   Mview.numEntriesPerRow[i] = rowptr[importLID+1]-rowptr[importLID];
00856   Mview.indices[i]          = colind + rowptr[importLID];
00857   Mview.values[i]           = vals + rowptr[importLID];
00858       }
00859     }
00860 
00861 
00862     int_type * MyColGIDs = 0;
00863   if(Mview.importMatrix->ColMap_.NumMyElements()>0)
00864     Mview.importMatrix->ColMap_.MyGlobalElementsPtr(MyColGIDs);
00865     Mview.importColMap = new Epetra_Map((int_type) -1,Mview.importMatrix->ColMap_.NumMyElements(),MyColGIDs,(int_type) Mview.importMatrix->ColMap_.IndexBase64(),M.Comm());
00866     delete [] MremoteRows;
00867 #ifdef ENABLE_MMM_TIMINGS
00868     MM=Teuchos::null;
00869 #endif   
00870 
00871     // Cleanup
00872     delete Rimporter;
00873     delete importer;
00874 
00875   }
00876   return(0);
00877 }
00878 
00879 
00880 
00881 
00882 // ==============================================================
00883 template<typename int_type>
00884 int import_only(const Epetra_CrsMatrix& M,
00885     const Epetra_Map& targetMap,
00886     CrsMatrixStruct& Mview,
00887     const Epetra_Import * prototypeImporter=0)
00888 {
00889   // The goal of this method to populare the Mview object with ONLY the rows of M
00890   // that correspond to elements in 'targetMap.'  There will be no population of the
00891   // numEntriesPerRow, indices, values, remote or numRemote arrays.
00892 
00893 
00894   // The prototype importer, if used, has to know who owns all of the PIDs in M's rowmap.  
00895   // The typical use of this is to provide A's column map when I&XV is called for B, for
00896   // a C = A * B multiply.  
00897 
00898 #ifdef ENABLE_MMM_TIMINGS
00899   using Teuchos::TimeMonitor;
00900   Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Ionly Setup")));
00901 #endif
00902 
00903   Mview.deleteContents();
00904 
00905   Mview.origMatrix          = &M;
00906   const Epetra_Map& Mrowmap = M.RowMap();
00907   int numProcs              = Mrowmap.Comm().NumProc();
00908   Mview.numRows             = targetMap.NumMyElements();
00909 
00910   Mview.origRowMap   = &(M.RowMap());
00911   Mview.rowMap       = &targetMap;
00912   Mview.colMap       = &(M.ColMap());
00913   Mview.domainMap    = &(M.DomainMap());
00914   Mview.importColMap = NULL;
00915 
00916   int i;
00917   int numRemote =0;
00918   int N = Mview.rowMap->NumMyElements();
00919 
00920   if(Mrowmap.SameAs(targetMap)) {
00921     numRemote = 0;
00922     Mview.targetMapToOrigRow.resize(N); 
00923     for(i=0;i<N; i++) Mview.targetMapToOrigRow[i]=i;
00924     return 0;
00925   }
00926   else if(prototypeImporter && prototypeImporter->SourceMap().SameAs(M.RowMap()) && prototypeImporter->TargetMap().SameAs(targetMap)){
00927     numRemote = prototypeImporter->NumRemoteIDs();
00928 
00929     Mview.targetMapToOrigRow.resize(N);    Mview.targetMapToOrigRow.assign(N,-1);
00930     Mview.targetMapToImportRow.resize(N);  Mview.targetMapToImportRow.assign(N,-1);
00931 
00932     const int * PermuteToLIDs   = prototypeImporter->PermuteToLIDs();
00933     const int * PermuteFromLIDs = prototypeImporter->PermuteFromLIDs();
00934     const int * RemoteLIDs      = prototypeImporter->RemoteLIDs();
00935 
00936     for(i=0; i<prototypeImporter->NumSameIDs();i++)
00937       Mview.targetMapToOrigRow[i] = i;
00938 
00939     // NOTE: These are reversed on purpose since we're doing a revere map.
00940     for(i=0; i<prototypeImporter->NumPermuteIDs();i++)
00941       Mview.targetMapToOrigRow[PermuteToLIDs[i]] = PermuteFromLIDs[i];
00942     
00943     for(i=0; i<prototypeImporter->NumRemoteIDs();i++)
00944       Mview.targetMapToImportRow[RemoteLIDs[i]] = i;
00945 
00946   }
00947   else
00948     throw "import_only: This routine only works if you either have the right map or no prototypeImporter";
00949 
00950   if (numProcs < 2) {
00951     if (Mview.numRemote > 0) {
00952       std::cerr << "EpetraExt::MatrixMatrix::Multiply ERROR, numProcs < 2 but "
00953      << "attempting to import remote matrix rows."<<std::endl;
00954       return(-1);
00955     }
00956 
00957     //If only one processor we don't need to import any remote rows, so return.
00958     return(0);
00959   }
00960 
00961 #ifdef ENABLE_MMM_TIMINGS
00962   MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Ionly Import-1")));
00963 #endif
00964   const int * RemoteLIDs = prototypeImporter->RemoteLIDs();
00965   
00966     //Create a map that describes the remote rows of M that we need.
00967   int_type* MremoteRows = numRemote>0 ? new int_type[prototypeImporter->NumRemoteIDs()] : 0;
00968   for(i=0; i<prototypeImporter->NumRemoteIDs(); i++)
00969     MremoteRows[i] = (int_type) targetMap.GID64(RemoteLIDs[i]);
00970   
00971   LightweightMap MremoteRowMap((int_type) -1, numRemote, MremoteRows, (int_type)Mrowmap.IndexBase64());
00972 
00973 #ifdef ENABLE_MMM_TIMINGS
00974   MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Ionly Import-2")));
00975 #endif
00976   //Create an importer with target-map MremoteRowMap and 
00977   //source-map Mrowmap.
00978   RemoteOnlyImport * Rimporter=0;
00979   Rimporter = new RemoteOnlyImport(*prototypeImporter,MremoteRowMap);
00980   
00981 #ifdef ENABLE_MMM_TIMINGS
00982   MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Ionly Import-3")));
00983 #endif
00984   
00985   Mview.importMatrix = new LightweightCrsMatrix(M,*Rimporter);
00986   
00987 #ifdef ENABLE_MMM_TIMINGS
00988   MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Ionly Import-4")));
00989 #endif
00990 
00991   // Cleanup
00992   delete Rimporter;
00993   delete [] MremoteRows;
00994 
00995   return(0);
00996 }
00997 
00998 
00999 
01000 
01001 //=========================================================================
01002 template<typename int_type>
01003 int form_map_union(const Epetra_Map* map1,
01004        const Epetra_Map* map2,
01005        const Epetra_Map*& mapunion)
01006 {
01007   //form the union of two maps
01008 
01009   if (map1 == NULL) {
01010     mapunion = new Epetra_Map(*map2);
01011     return(0);
01012   }
01013 
01014   if (map2 == NULL) {
01015     mapunion = new Epetra_Map(*map1);
01016     return(0);
01017   }
01018 
01019   int map1_len       = map1->NumMyElements();
01020   int_type* map1_elements = 0;
01021   map1->MyGlobalElementsPtr(map1_elements);
01022   int map2_len       = map2->NumMyElements();
01023   int_type* map2_elements = 0;
01024   map2->MyGlobalElementsPtr(map2_elements);
01025 
01026   int_type* union_elements = new int_type[map1_len+map2_len];
01027 
01028   int map1_offset = 0, map2_offset = 0, union_offset = 0;
01029 
01030   while(map1_offset < map1_len && map2_offset < map2_len) {
01031     int_type map1_elem = map1_elements[map1_offset];
01032     int_type map2_elem = map2_elements[map2_offset];
01033 
01034     if (map1_elem < map2_elem) {
01035       union_elements[union_offset++] = map1_elem;
01036       ++map1_offset;
01037     }
01038     else if (map1_elem > map2_elem) {
01039       union_elements[union_offset++] = map2_elem;
01040       ++map2_offset;
01041     }
01042     else {
01043       union_elements[union_offset++] = map1_elem;
01044       ++map1_offset;
01045       ++map2_offset;
01046     }
01047   }
01048 
01049   int i;
01050   for(i=map1_offset; i<map1_len; ++i) {
01051     union_elements[union_offset++] = map1_elements[i];
01052   }
01053 
01054   for(i=map2_offset; i<map2_len; ++i) {
01055     union_elements[union_offset++] = map2_elements[i];
01056   }
01057 
01058   mapunion = new Epetra_Map((int_type) -1, union_offset, union_elements,
01059           (int_type) map1->IndexBase64(), map1->Comm());
01060 
01061   delete [] union_elements;
01062 
01063   return(0);
01064 }
01065 
01066 //=========================================================================
01067 template<typename int_type>
01068 Epetra_Map* Tfind_rows_containing_cols(const Epetra_CrsMatrix& M,
01069                                       const Epetra_Map& column_map)
01070 {
01071   //The goal of this function is to find all rows in the matrix M that contain
01072   //column-indices which are in 'column_map'. A map containing those rows is
01073   //returned. (The returned map will then be used as the source row-map for
01074   //importing those rows into an overlapping distribution.)
01075 
01076   std::pair<int_type,int_type> my_col_range = get_col_range<int_type>(column_map);
01077 
01078   const Epetra_Comm& Comm = M.RowMap().Comm();
01079   int num_procs = Comm.NumProc();
01080   int my_proc = Comm.MyPID();
01081   std::vector<int> send_procs;
01082   send_procs.reserve(num_procs-1);
01083   std::vector<int_type> col_ranges;
01084   col_ranges.reserve(2*(num_procs-1));
01085   for(int p=0; p<num_procs; ++p) {
01086     if (p == my_proc) continue;
01087     send_procs.push_back(p);
01088     col_ranges.push_back(my_col_range.first);
01089     col_ranges.push_back(my_col_range.second);
01090   }
01091 
01092   Epetra_Distributor* distor = Comm.CreateDistributor();
01093 
01094   int num_recv_procs = 0;
01095   int num_send_procs = send_procs.size();
01096   bool deterministic = true;
01097   int* send_procs_ptr = send_procs.size()>0 ? &send_procs[0] : NULL;
01098   distor->CreateFromSends(num_send_procs, send_procs_ptr, deterministic, num_recv_procs);
01099 
01100   int len_import_chars = 0;
01101   char* import_chars = NULL;
01102 
01103   char* export_chars = col_ranges.size()>0 ? reinterpret_cast<char*>(&col_ranges[0]) : NULL;
01104   int err = distor->Do(export_chars, 2*sizeof(int_type), len_import_chars, import_chars);
01105   if (err != 0) {
01106     std::cout << "ERROR returned from Distributor::Do."<<std::endl;
01107   }
01108  
01109   int_type* IntImports = reinterpret_cast<int_type*>(import_chars);
01110   int num_import_pairs = len_import_chars/(2*sizeof(int_type));
01111   int offset = 0;
01112   std::vector<int> send_procs2;
01113   send_procs2.reserve(num_procs);
01114   std::vector<int_type> proc_col_ranges;
01115   std::pair<int_type,int_type> M_col_range = get_col_range<int_type>(M);
01116   for(int i=0; i<num_import_pairs; ++i) {
01117     int_type first_col = IntImports[offset++];
01118     int_type last_col =  IntImports[offset++];
01119     if (first_col <= M_col_range.second && last_col >= M_col_range.first) {
01120       send_procs2.push_back(send_procs[i]);
01121       proc_col_ranges.push_back(first_col);
01122       proc_col_ranges.push_back(last_col);
01123     }
01124   }
01125 
01126   std::vector<int_type> send_rows;
01127   std::vector<int> rows_per_send_proc;
01128   pack_outgoing_rows(M, proc_col_ranges, send_rows, rows_per_send_proc);
01129 
01130   Epetra_Distributor* distor2 = Comm.CreateDistributor();
01131 
01132   int* send_procs2_ptr = send_procs2.size()>0 ? &send_procs2[0] : NULL;
01133   num_recv_procs = 0;
01134   err = distor2->CreateFromSends(send_procs2.size(), send_procs2_ptr, deterministic, num_recv_procs);
01135 
01136   export_chars = send_rows.size()>0 ? reinterpret_cast<char*>(&send_rows[0]) : NULL;
01137   int* rows_per_send_proc_ptr = rows_per_send_proc.size()>0 ? &rows_per_send_proc[0] : NULL;
01138   len_import_chars = 0;
01139   err = distor2->Do(export_chars, (int)sizeof(int_type), rows_per_send_proc_ptr, len_import_chars, import_chars);
01140 
01141   int_type* recvd_row_ints = reinterpret_cast<int_type*>(import_chars);
01142   int num_recvd_rows = len_import_chars/sizeof(int_type);
01143 
01144   Epetra_Map recvd_rows((int_type) -1, num_recvd_rows, recvd_row_ints, (int_type) column_map.IndexBase64(), Comm);
01145 
01146   delete distor;
01147   delete distor2;
01148   delete [] import_chars;
01149 
01150   Epetra_Map* result_map = NULL;
01151 
01152   err = form_map_union<int_type>(&(M.RowMap()), &recvd_rows, (const Epetra_Map*&)result_map);
01153   if (err != 0) {
01154     return(NULL);
01155   }
01156 
01157   return(result_map);
01158 }
01159 
01160 Epetra_Map* find_rows_containing_cols(const Epetra_CrsMatrix& M,
01161                                       const Epetra_Map& column_map)
01162 {
01163 #ifndef EPETRA_NO_32BIT_GLOBAL_INDICES
01164   if(M.RowMatrixRowMap().GlobalIndicesInt() && column_map.GlobalIndicesInt()) {
01165     return Tfind_rows_containing_cols<int>(M, column_map);
01166   }
01167   else
01168 #endif
01169 #ifndef EPETRA_NO_64BIT_GLOBAL_INDICES
01170   if(M.RowMatrixRowMap().GlobalIndicesLongLong() && column_map.GlobalIndicesLongLong()) {
01171     return Tfind_rows_containing_cols<long long>(M, column_map);
01172   }
01173   else
01174 #endif
01175     throw "EpetraExt::find_rows_containing_cols: GlobalIndices type unknown";
01176 }
01177 
01178 //=========================================================================
01179 template<typename int_type>
01180 int MatrixMatrix::TMultiply(const Epetra_CrsMatrix& A,
01181          bool transposeA,
01182          const Epetra_CrsMatrix& B,
01183          bool transposeB,
01184          Epetra_CrsMatrix& C,
01185                            bool call_FillComplete_on_result)
01186 {
01187   // DEBUG
01188   //  bool NewFlag=!C.IndicesAreLocal() && !C.IndicesAreGlobal();
01189 #ifdef ENABLE_MMM_TIMINGS
01190   using Teuchos::TimeMonitor;
01191   Teuchos::RCP<TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM All Setup")));
01192 #endif
01193   // end DEBUG
01194 
01195   //
01196   //This method forms the matrix-matrix product C = op(A) * op(B), where
01197   //op(A) == A   if transposeA is false,
01198   //op(A) == A^T if transposeA is true,
01199   //and similarly for op(B).
01200   //
01201 
01202   //A and B should already be Filled.
01203   //(Should we go ahead and call FillComplete() on them if necessary?
01204   // or error out? For now, we choose to error out.)
01205   if (!A.Filled() || !B.Filled()) {
01206     EPETRA_CHK_ERR(-1);
01207   }
01208 
01209   //We're going to refer to the different combinations of op(A) and op(B)
01210   //as scenario 1 through 4.
01211 
01212   int scenario = 1;//A*B
01213   if (transposeB && !transposeA) scenario = 2;//A*B^T
01214   if (transposeA && !transposeB) scenario = 3;//A^T*B
01215   if (transposeA && transposeB)  scenario = 4;//A^T*B^T
01216 
01217   //now check size compatibility
01218   long long Aouter = transposeA ? A.NumGlobalCols64() : A.NumGlobalRows64();
01219   long long Bouter = transposeB ? B.NumGlobalRows64() : B.NumGlobalCols64();
01220   long long Ainner = transposeA ? A.NumGlobalRows64() : A.NumGlobalCols64();
01221   long long Binner = transposeB ? B.NumGlobalCols64() : B.NumGlobalRows64();
01222   if (Ainner != Binner) {
01223     std::cerr << "MatrixMatrix::Multiply: ERROR, inner dimensions of op(A) and op(B) "
01224          << "must match for matrix-matrix product. op(A) is "
01225          <<Aouter<<"x"<<Ainner << ", op(B) is "<<Binner<<"x"<<Bouter<<std::endl;
01226     return(-1);
01227   }
01228 
01229   //The result matrix C must at least have a row-map that reflects the
01230   //correct row-size. Don't check the number of columns because rectangular
01231   //matrices which were constructed with only one map can still end up
01232   //having the correct capacity and dimensions when filled.
01233   if (Aouter > C.NumGlobalRows64()) {
01234     std::cerr << "MatrixMatrix::Multiply: ERROR, dimensions of result C must "
01235          << "match dimensions of op(A) * op(B). C has "<<C.NumGlobalRows64()
01236          << " rows, should have at least "<<Aouter << std::endl;
01237     return(-1);
01238   }
01239 
01240   //It doesn't matter whether C is already Filled or not. If it is already
01241   //Filled, it must have space allocated for the positions that will be
01242   //referenced in forming C = op(A)*op(B). If it doesn't have enough space,
01243   //we'll error out later when trying to store result values.
01244 
01245   //We're going to need to import remotely-owned sections of A and/or B
01246   //if more than 1 processor is performing this run, depending on the scenario.
01247   int numProcs = A.Comm().NumProc();
01248 
01249   //If we are to use the transpose of A and/or B, we'll need to be able to 
01250   //access, on the local processor, all rows that contain column-indices in
01251   //the domain-map.
01252   const Epetra_Map* domainMap_A = &(A.DomainMap());
01253   const Epetra_Map* domainMap_B = &(B.DomainMap());
01254 
01255   const Epetra_Map* rowmap_A = &(A.RowMap());
01256   const Epetra_Map* rowmap_B = &(B.RowMap());
01257 
01258   //Declare some 'work-space' maps which may be created depending on
01259   //the scenario, and which will be deleted before exiting this function.
01260   const Epetra_Map* workmap1 = NULL;
01261   const Epetra_Map* workmap2 = NULL;
01262   const Epetra_Map* mapunion1 = NULL;
01263 
01264   //Declare a couple of structs that will be used to hold views of the data
01265   //of A and B, to be used for fast access during the matrix-multiplication.
01266   CrsMatrixStruct Aview;
01267   CrsMatrixStruct Bview;
01268 
01269   const Epetra_Map* targetMap_A = rowmap_A;
01270   const Epetra_Map* targetMap_B = rowmap_B;
01271 
01272 #ifdef ENABLE_MMM_TIMINGS
01273   MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM All I&X")));
01274 #endif
01275   if (numProcs > 1) {
01276     //If op(A) = A^T, find all rows of A that contain column-indices in the
01277     //local portion of the domain-map. (We'll import any remote rows
01278     //that fit this criteria onto the local processor.)
01279     if (transposeA) {
01280       workmap1 = Tfind_rows_containing_cols<int_type>(A, *domainMap_A);
01281       targetMap_A = workmap1;
01282     }
01283   }
01284 
01285   //Now import any needed remote rows and populate the Aview struct.
01286   if(scenario==1 && call_FillComplete_on_result) {
01287     EPETRA_CHK_ERR(import_only<int_type>(A,*targetMap_A,Aview));
01288   }
01289   else  {
01290     EPETRA_CHK_ERR( import_and_extract_views<int_type>(A, *targetMap_A, Aview));
01291   }
01292 
01293 
01294   // NOTE:  Next up is to switch to import_only for B as well, and then modify the THREE SerialCores
01295   // to add a Acol2Brow and Acol2Bimportrow array for in-algorithm lookups.  
01296   
01297 
01298   // Make sure B's views are consistent with A even in serial.
01299   const Epetra_Map* colmap_op_A = NULL;
01300   if(scenario==1 || numProcs > 1){
01301     if (transposeA) {
01302       colmap_op_A = targetMap_A;
01303     }
01304     else {
01305       colmap_op_A = &(A.ColMap());
01306     }
01307     targetMap_B = colmap_op_A;
01308   }
01309 
01310   if (numProcs > 1) {
01311     //If op(B) = B^T, find all rows of B that contain column-indices in the
01312     //local-portion of the domain-map, or in the column-map of op(A).
01313     //We'll import any remote rows that fit this criteria onto the
01314     //local processor.
01315     if (transposeB) {
01316       EPETRA_CHK_ERR( form_map_union<int_type>(colmap_op_A, domainMap_B, mapunion1) );
01317       workmap2 = Tfind_rows_containing_cols<int_type>(B, *mapunion1);
01318       targetMap_B = workmap2;
01319     }
01320   }
01321 
01322   //Now import any needed remote rows and populate the Bview struct.  
01323   if(scenario==1 && call_FillComplete_on_result) {
01324     EPETRA_CHK_ERR(import_only<int_type>(B,*targetMap_B,Bview,A.Importer()));
01325   }
01326   else {
01327     EPETRA_CHK_ERR( import_and_extract_views<int_type>(B, *targetMap_B, Bview) );
01328   }
01329 
01330 #ifdef ENABLE_MMM_TIMINGS
01331   MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM All Multiply")));
01332 #endif
01333 
01334   // Zero if filled
01335   if(C.Filled()) C.PutScalar(0.0);
01336 
01337   //Now call the appropriate method to perform the actual multiplication.
01338   CrsWrapper_Epetra_CrsMatrix ecrsmat(C);
01339 
01340   switch(scenario) {
01341   case 1:    EPETRA_CHK_ERR( mult_A_B(A,Aview,B,Bview,C,call_FillComplete_on_result) );
01342     break;
01343   case 2:    EPETRA_CHK_ERR( mult_A_Btrans(Aview, Bview, ecrsmat) );
01344     break;
01345   case 3:    EPETRA_CHK_ERR( mult_Atrans_B(Aview, Bview, ecrsmat) );
01346     break;
01347   case 4:    EPETRA_CHK_ERR( mult_Atrans_Btrans(Aview, Bview, ecrsmat) );
01348     break;
01349   }
01350 
01351 
01352   if (scenario != 1 && call_FillComplete_on_result) {
01353     //We'll call FillComplete on the C matrix before we exit, and give
01354     //it a domain-map and a range-map.
01355     //The domain-map will be the domain-map of B, unless
01356     //op(B)==transpose(B), in which case the range-map of B will be used.
01357     //The range-map will be the range-map of A, unless
01358     //op(A)==transpose(A), in which case the domain-map of A will be used.
01359 
01360     const Epetra_Map* domainmap =
01361       transposeB ? &(B.RangeMap()) : &(B.DomainMap());
01362 
01363     const Epetra_Map* rangemap =
01364       transposeA ? &(A.DomainMap()) : &(A.RangeMap());
01365 
01366     if (!C.Filled()) {
01367       EPETRA_CHK_ERR( C.FillComplete(*domainmap, *rangemap) );
01368     }
01369   }
01370 
01371   //Finally, delete the objects that were potentially created
01372   //during the course of importing remote sections of A and B.
01373 
01374   delete mapunion1; mapunion1 = NULL;
01375   delete workmap1; workmap1 = NULL;
01376   delete workmap2; workmap2 = NULL;
01377 
01378   return(0);
01379 }
01380 
01381 int MatrixMatrix::Multiply(const Epetra_CrsMatrix& A,
01382          bool transposeA,
01383          const Epetra_CrsMatrix& B,
01384          bool transposeB,
01385          Epetra_CrsMatrix& C,
01386                            bool call_FillComplete_on_result)
01387 {
01388 #ifndef EPETRA_NO_32BIT_GLOBAL_INDICES
01389   if(A.RowMap().GlobalIndicesInt() && B.RowMap().GlobalIndicesInt()) {
01390   return TMultiply<int>(A, transposeA, B, transposeB, C, call_FillComplete_on_result);
01391   }
01392   else
01393 #endif
01394 #ifndef EPETRA_NO_64BIT_GLOBAL_INDICES
01395   if(A.RowMap().GlobalIndicesLongLong() && B.RowMap().GlobalIndicesLongLong()) {
01396   return TMultiply<long long>(A, transposeA, B, transposeB, C, call_FillComplete_on_result);
01397   }
01398   else
01399 #endif
01400     throw "EpetraExt::MatrixMatrix::Add: GlobalIndices type unknown";
01401 }
01402 
01403 //=========================================================================
01404 template<typename int_type>
01405 int MatrixMatrix::TAdd(const Epetra_CrsMatrix& A,
01406                       bool transposeA,
01407                       double scalarA,
01408                       Epetra_CrsMatrix& B,
01409                       double scalarB )
01410 {
01411   //
01412   //This method forms the matrix-matrix sum B = scalarA * op(A) + scalarB * B, where
01413 
01414   //A should already be Filled. It doesn't matter whether B is
01415   //already Filled, but if it is, then its graph must already contain
01416   //all nonzero locations that will be referenced in forming the
01417   //sum.
01418 
01419   if (!A.Filled() ) {
01420     std::cerr << "EpetraExt::MatrixMatrix::Add ERROR, input matrix A.Filled() is false, it is required to be true. (Result matrix B is not required to be Filled())."<<std::endl;
01421     EPETRA_CHK_ERR(-1);
01422   }
01423 
01424   //explicit tranpose A formed as necessary
01425   Epetra_CrsMatrix * Aprime = 0;
01426   EpetraExt::RowMatrix_Transpose * Atrans = 0;
01427   if( transposeA )
01428   {
01429     Atrans = new EpetraExt::RowMatrix_Transpose();
01430     Aprime = &(dynamic_cast<Epetra_CrsMatrix&>(((*Atrans)(const_cast<Epetra_CrsMatrix&>(A)))));
01431   }
01432   else
01433     Aprime = const_cast<Epetra_CrsMatrix*>(&A);
01434 
01435   int MaxNumEntries = EPETRA_MAX( A.MaxNumEntries(), B.MaxNumEntries() );
01436   int A_NumEntries, B_NumEntries;
01437   int_type * A_Indices = new int_type[MaxNumEntries];
01438   double * A_Values = new double[MaxNumEntries];
01439   int* B_Indices_local;
01440   int_type* B_Indices_global;
01441   double* B_Values;
01442 
01443   int NumMyRows = B.NumMyRows();
01444   int_type Row;
01445   int err;
01446   int ierr = 0;
01447 
01448   if( scalarA )
01449   {
01450     //Loop over B's rows and sum into
01451     for( int i = 0; i < NumMyRows; ++i )
01452     {
01453       Row = (int_type) B.GRID64(i);
01454       EPETRA_CHK_ERR( Aprime->ExtractGlobalRowCopy( Row, MaxNumEntries, A_NumEntries, A_Values, A_Indices ) );
01455 
01456       if (scalarB != 1.0) {
01457         if (!B.Filled()) {
01458           EPETRA_CHK_ERR( B.ExtractGlobalRowView( Row, B_NumEntries,
01459                                                   B_Values, B_Indices_global));
01460         }
01461         else {
01462           EPETRA_CHK_ERR( B.ExtractMyRowView( i, B_NumEntries,
01463                                               B_Values, B_Indices_local));
01464         }
01465 
01466         for(int jj=0; jj<B_NumEntries; ++jj) {
01467           B_Values[jj] = scalarB*B_Values[jj];
01468         }
01469       }
01470 
01471       if( scalarA != 1.0 ) {
01472         for( int j = 0; j < A_NumEntries; ++j ) A_Values[j] *= scalarA;
01473       }
01474 
01475       if( B.Filled() ) {//Sum In Values
01476         err = B.SumIntoGlobalValues( Row, A_NumEntries, A_Values, A_Indices );
01477         assert( err >= 0 );
01478         if (err < 0) ierr = err;
01479       }
01480       else {
01481         err = B.InsertGlobalValues( Row, A_NumEntries, A_Values, A_Indices );
01482         assert( err == 0 || err == 1 || err == 3 );
01483         if (err < 0) ierr = err;
01484       }
01485     }
01486   }
01487   else {
01488     EPETRA_CHK_ERR( B.Scale(scalarB) );
01489   }
01490 
01491   delete [] A_Indices;
01492   delete [] A_Values;
01493 
01494   if( Atrans ) delete Atrans;
01495 
01496   return(ierr);
01497 }
01498 
01499 int MatrixMatrix::Add(const Epetra_CrsMatrix& A,
01500                       bool transposeA,
01501                       double scalarA,
01502                       Epetra_CrsMatrix& B,
01503                       double scalarB )
01504 {
01505 #ifndef EPETRA_NO_32BIT_GLOBAL_INDICES
01506   if(A.RowMap().GlobalIndicesInt() && B.RowMap().GlobalIndicesInt()) {
01507   return TAdd<int>(A, transposeA, scalarA, B, scalarB);
01508   }
01509   else
01510 #endif
01511 #ifndef EPETRA_NO_64BIT_GLOBAL_INDICES
01512   if(A.RowMap().GlobalIndicesLongLong() && B.RowMap().GlobalIndicesLongLong()) {
01513   return TAdd<long long>(A, transposeA, scalarA, B, scalarB);
01514   }
01515   else
01516 #endif
01517     throw "EpetraExt::MatrixMatrix::Add: GlobalIndices type unknown";
01518 }
01519 
01520 template<typename int_type>
01521 int MatrixMatrix::TAdd(const Epetra_CrsMatrix& A,
01522                       bool transposeA,
01523                       double scalarA,
01524                       const Epetra_CrsMatrix & B,
01525                       bool transposeB,
01526                       double scalarB,
01527                       Epetra_CrsMatrix * & C)
01528 {
01529   //
01530   //This method forms the matrix-matrix sum C = scalarA * op(A) + scalarB * op(B), where
01531 
01532   //A and B should already be Filled. C should be an empty pointer.
01533 
01534   if (!A.Filled() || !B.Filled() ) {
01535      std::cerr << "EpetraExt::MatrixMatrix::Add ERROR, input matrix A.Filled() or B.Filled() is false,"
01536                << "they are required to be true. (Result matrix C should be an empty pointer)" << std::endl;
01537      EPETRA_CHK_ERR(-1);
01538   }
01539 
01540   Epetra_CrsMatrix * Aprime = 0, * Bprime=0;
01541   EpetraExt::RowMatrix_Transpose * Atrans = 0,* Btrans = 0;
01542 
01543   //explicit tranpose A formed as necessary
01544   if( transposeA ) {
01545      Atrans = new EpetraExt::RowMatrix_Transpose();
01546      Aprime = &(dynamic_cast<Epetra_CrsMatrix&>(((*Atrans)(const_cast<Epetra_CrsMatrix&>(A)))));
01547   }
01548   else
01549      Aprime = const_cast<Epetra_CrsMatrix*>(&A);
01550 
01551   //explicit tranpose B formed as necessary
01552   if( transposeB ) {
01553      Btrans = new EpetraExt::RowMatrix_Transpose();
01554      Bprime = &(dynamic_cast<Epetra_CrsMatrix&>(((*Btrans)(const_cast<Epetra_CrsMatrix&>(B)))));
01555   }
01556   else
01557      Bprime = const_cast<Epetra_CrsMatrix*>(&B);
01558 
01559   // allocate or zero the new matrix
01560   if(C!=0)
01561      C->PutScalar(0.0);
01562   else
01563      C = new Epetra_CrsMatrix(Copy,Aprime->RowMap(),0);
01564 
01565   // build arrays  for easy resuse
01566   int ierr = 0;
01567   Epetra_CrsMatrix * Mat[] = { Aprime,Bprime};
01568   double scalar[] = { scalarA, scalarB};
01569 
01570   // do a loop over each matrix to add: A reordering might be more efficient
01571   for(int k=0;k<2;k++) {
01572      int MaxNumEntries = Mat[k]->MaxNumEntries();
01573      int NumEntries;
01574      int_type * Indices = new int_type[MaxNumEntries];
01575      double * Values = new double[MaxNumEntries];
01576    
01577      int NumMyRows = Mat[k]->NumMyRows();
01578      int err;
01579      int_type Row;
01580      int ierr = 0;
01581    
01582      //Loop over rows and sum into C
01583      for( int i = 0; i < NumMyRows; ++i ) {
01584         Row = (int_type) Mat[k]->GRID64(i);
01585         EPETRA_CHK_ERR( Mat[k]->ExtractGlobalRowCopy( Row, MaxNumEntries, NumEntries, Values, Indices));
01586    
01587         if( scalar[k] != 1.0 )
01588            for( int j = 0; j < NumEntries; ++j ) Values[j] *= scalar[k];
01589    
01590         if(C->Filled()) { // Sum in values
01591            err = C->SumIntoGlobalValues( Row, NumEntries, Values, Indices );
01592            if (err < 0) ierr = err;
01593         } else { // just add it to the unfilled CRS Matrix
01594            err = C->InsertGlobalValues( Row, NumEntries, Values, Indices );
01595            if (err < 0) ierr = err;
01596         }
01597      }
01598 
01599      delete [] Indices;
01600      delete [] Values;
01601   }
01602 
01603   if( Atrans ) delete Atrans;
01604   if( Btrans ) delete Btrans;
01605 
01606   return(ierr);
01607 }
01608 
01609 int MatrixMatrix::Add(const Epetra_CrsMatrix& A,
01610                       bool transposeA,
01611                       double scalarA,
01612                       const Epetra_CrsMatrix & B,
01613                       bool transposeB,
01614                       double scalarB,
01615                       Epetra_CrsMatrix * & C)
01616 {
01617 #ifndef EPETRA_NO_32BIT_GLOBAL_INDICES
01618   if(A.RowMap().GlobalIndicesInt() && B.RowMap().GlobalIndicesInt()) {
01619   return TAdd<int>(A, transposeA, scalarA, B, transposeB, scalarB, C);
01620   }
01621   else
01622 #endif
01623 #ifndef EPETRA_NO_64BIT_GLOBAL_INDICES
01624   if(A.RowMap().GlobalIndicesLongLong() && B.RowMap().GlobalIndicesLongLong()) {
01625   return TAdd<long long>(A, transposeA, scalarA, B, transposeB, scalarB, C);
01626   }
01627   else
01628 #endif
01629     throw "EpetraExt::MatrixMatrix::Add: GlobalIndices type unknown";
01630 }
01631 
01632 
01633 
01634 //=========================================================================
01635 template<typename int_type>
01636 int MatrixMatrix::TJacobi(double omega,
01637         const Epetra_Vector & Dinv,
01638         const Epetra_CrsMatrix& A,
01639         const Epetra_CrsMatrix& B,
01640         Epetra_CrsMatrix& C,
01641         bool call_FillComplete_on_result)
01642 {
01643 #ifdef ENABLE_MMM_TIMINGS
01644   using Teuchos::TimeMonitor;
01645   Teuchos::RCP<TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi All Setup")));
01646 #endif
01647 
01648   //A and B should already be Filled.
01649   if (!A.Filled() || !B.Filled()) {
01650     EPETRA_CHK_ERR(-1);
01651   }
01652 
01653   //now check size compatibility
01654   long long Aouter = A.NumGlobalRows64();
01655   long long Bouter = B.NumGlobalCols64();
01656   long long Ainner = A.NumGlobalCols64();
01657   long long Binner = B.NumGlobalRows64();
01658   long long Dlen   = Dinv.GlobalLength64();
01659   if (Ainner != Binner) {
01660     std::cerr << "MatrixMatrix::Jacobi: ERROR, inner dimensions of A and B "
01661          << "must match for matrix-matrix product. A is "
01662          <<Aouter<<"x"<<Ainner << ", B is "<<Binner<<"x"<<Bouter<<std::endl;
01663     return(-1);
01664   }
01665 
01666   //The result matrix C must at least have a row-map that reflects the
01667   //correct row-size. Don't check the number of columns because rectangular
01668   //matrices which were constructed with only one map can still end up
01669   //having the correct capacity and dimensions when filled.
01670   if (Aouter > C.NumGlobalRows64()) {
01671     std::cerr << "MatrixMatrix::Jacobi: ERROR, dimensions of result C must "
01672          << "match dimensions of A * B. C has "<<C.NumGlobalRows64()
01673          << " rows, should have at least "<<Aouter << std::endl;
01674     return(-1);
01675   }
01676 
01677   // Check against the D matrix
01678   if(Dlen != Aouter) {
01679     std::cerr << "MatrixMatrix::Jacboi: ERROR, dimensions of result D must "
01680         << "match dimensions of A's rows. D has "<< Dlen
01681         << " rows, should have " << Aouter << std::endl;
01682     return(-1);
01683   }
01684   
01685   if(!A.RowMap().SameAs(B.RowMap()) || !A.RowMap().SameAs(Dinv.Map())) {
01686     std::cerr << "MatrixMatrix::Jacboi: ERROR, RowMap of A must match RowMap of B "
01687         << "and Map of D."<<std::endl;
01688     return(-1);
01689   }
01690 
01691   //It doesn't matter whether C is already Filled or not. If it is already
01692   //Filled, it must have space allocated for the positions that will be
01693   //referenced in forming C. If it doesn't have enough space,
01694   //we'll error out later when trying to store result values.
01695 
01696   //We're going to need to import remotely-owned sections of A and/or B
01697   //if more than 1 processor is performing this run, depending on the scenario.
01698   int numProcs = A.Comm().NumProc();
01699 
01700   // Maps
01701   const Epetra_Map* rowmap_A = &(A.RowMap());
01702   const Epetra_Map* rowmap_B = &(B.RowMap());
01703 
01704 
01705 
01706   //Declare some 'work-space' maps which may be created depending on
01707   //the scenario, and which will be deleted before exiting this function.
01708   const Epetra_Map* workmap1 = NULL;
01709   const Epetra_Map* workmap2 = NULL;
01710   const Epetra_Map* mapunion1 = NULL;
01711 
01712   //Declare a couple of structs that will be used to hold views of the data
01713   //of A and B, to be used for fast access during the matrix-multiplication.
01714   CrsMatrixStruct Aview;
01715   CrsMatrixStruct Bview;
01716 
01717   const Epetra_Map* targetMap_A = rowmap_A;
01718   const Epetra_Map* targetMap_B = rowmap_B;
01719 
01720 #ifdef ENABLE_MMM_TIMINGS
01721   MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi All I&X")));
01722 #endif
01723 
01724   //Now import any needed remote rows and populate the Aview struct.
01725   if(call_FillComplete_on_result) {
01726     EPETRA_CHK_ERR(import_only<int_type>(A,*targetMap_A,Aview));
01727   }
01728   else  {
01729     EPETRA_CHK_ERR( import_and_extract_views<int_type>(A, *targetMap_A, Aview));
01730   }
01731 
01732   // NOTE:  Next up is to switch to import_only for B as well, and then modify the THREE SerialCores
01733   // to add a Acol2Brow and Acol2Bimportrow array for in-algorithm lookups.  
01734   
01735   // Make sure B's views are consistent with A even in serial.
01736   const Epetra_Map* colmap_op_A = NULL;
01737   if(numProcs > 1){
01738     colmap_op_A = &(A.ColMap());
01739     targetMap_B = colmap_op_A;
01740   }
01741 
01742   //Now import any needed remote rows and populate the Bview struct.  
01743   if(call_FillComplete_on_result) {
01744     EPETRA_CHK_ERR(import_only<int_type>(B,*targetMap_B,Bview,A.Importer()));
01745   }
01746   else {
01747     EPETRA_CHK_ERR( import_and_extract_views<int_type>(B, *targetMap_B, Bview) );
01748   }
01749 
01750 #ifdef ENABLE_MMM_TIMINGS
01751   MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi All Multiply")));
01752 #endif
01753 
01754   // Zero if filled
01755   if(C.Filled()) C.PutScalar(0.0);
01756 
01757   //Now call the appropriate method to perform the actual multiplication.
01758   CrsWrapper_Epetra_CrsMatrix ecrsmat(C);
01759   EPETRA_CHK_ERR( jacobi_A_B(omega,Dinv,A,Aview,B,Bview,C,call_FillComplete_on_result) );
01760 
01761   //Finally, delete the objects that were potentially created
01762   //during the course of importing remote sections of A and B.
01763   delete mapunion1; mapunion1 = NULL;
01764   delete workmap1; workmap1 = NULL;
01765   delete workmap2; workmap2 = NULL;
01766 
01767   return(0);
01768 }
01769 
01770 
01771 
01772 int MatrixMatrix::Jacobi(double omega,
01773        const Epetra_Vector & Dinv,
01774        const Epetra_CrsMatrix& A,
01775        const Epetra_CrsMatrix& B,
01776        Epetra_CrsMatrix& C,
01777        bool call_FillComplete_on_result)
01778 {
01779 #ifndef EPETRA_NO_32BIT_GLOBAL_INDICES
01780   if(A.RowMap().GlobalIndicesInt() && B.RowMap().GlobalIndicesInt()) {
01781     return TJacobi<int>(omega, Dinv, A, B, C, call_FillComplete_on_result);
01782   }
01783   else
01784 #endif
01785 #ifndef EPETRA_NO_64BIT_GLOBAL_INDICES
01786   if(A.RowMap().GlobalIndicesLongLong() && B.RowMap().GlobalIndicesLongLong()) {
01787     return TJacobi<long long>(omega, Dinv, A, B, C, call_FillComplete_on_result);
01788   }
01789   else
01790 #endif
01791     throw "EpetraExt::MatrixMatrix::Jacobi: GlobalIndices type unknown";
01792 }
01793 
01794 
01795 
01796 
01797 
01798 
01799 
01800 
01801 
01802 
01803 } // namespace EpetraExt
01804 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines