EpetraExt Development
EpetraExt_MatrixMatrix.cpp
Go to the documentation of this file.
00001 //@HEADER
00002 // ***********************************************************************
00003 //
00004 //     EpetraExt: Epetra Extended - Linear Algebra Services Package
00005 //                 Copyright (2011) Sandia Corporation
00006 //
00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00008 // the U.S. Government retains certain rights in this software.
00009 //
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00038 //
00039 // ***********************************************************************
00040 //@HEADER
00041 
00042 #include <EpetraExt_ConfigDefs.h>
00043 #include <EpetraExt_MatrixMatrix.h>
00044 
00045 #include <EpetraExt_MMHelpers.h>
00046 
00047 #include <EpetraExt_Transpose_RowMatrix.h>
00048 
00049 #include <Epetra_Export.h>
00050 #include <Epetra_Import.h>
00051 #include <Epetra_Util.h>
00052 #include <Epetra_Map.h>
00053 #include <Epetra_Comm.h>
00054 #include <Epetra_CrsMatrix.h>
00055 #include <Epetra_Vector.h>
00056 #include <Epetra_Directory.h>
00057 #include <Epetra_HashTable.h>
00058 #include <Epetra_Distributor.h>
00059 
00060 #include <Teuchos_TimeMonitor.hpp>
00061 
00062 #ifdef HAVE_VECTOR
00063 #include <vector>
00064 #endif
00065 
00066 
00067 
00068 namespace EpetraExt {
00069 //=========================================================================
00070 //
00071 //Method for internal use... sparsedot forms a dot-product between two
00072 //sparsely-populated 'vectors'.
00073 //Important assumption: assumes the indices in u_ind and v_ind are sorted.
00074 //
00075 template<typename int_type>
00076 double sparsedot(double* u, int_type* u_ind, int u_len,
00077      double* v, int_type* v_ind, int v_len)
00078 {
00079   double result = 0.0;
00080 
00081   int v_idx = 0;
00082   int u_idx = 0;
00083 
00084   while(v_idx < v_len && u_idx < u_len) {
00085     int_type ui = u_ind[u_idx];
00086     int_type vi = v_ind[v_idx];
00087 
00088     if (ui < vi) {
00089       ++u_idx;
00090     }
00091     else if (ui > vi) {
00092       ++v_idx;
00093     }
00094     else {
00095       result += u[u_idx++]*v[v_idx++];
00096     }
00097   }
00098 
00099   return(result);
00100 }
00101 
00102 //=========================================================================
00103 //kernel method for computing the local portion of C = A*B^T
00104 template<typename int_type>
00105 int mult_A_Btrans(CrsMatrixStruct& Aview,
00106       CrsMatrixStruct& Bview,
00107       CrsWrapper& C)
00108 {
00109   int i, j, k;
00110   int returnValue = 0;
00111 
00112   int maxlen = 0;
00113   for(i=0; i<Aview.numRows; ++i) {
00114     if (Aview.numEntriesPerRow[i] > maxlen) maxlen = Aview.numEntriesPerRow[i];
00115   }
00116   for(i=0; i<Bview.numRows; ++i) {
00117     if (Bview.numEntriesPerRow[i] > maxlen) maxlen = Bview.numEntriesPerRow[i];
00118   }
00119 
00120   //std::cout << "Aview: " << std::endl;
00121   //dumpCrsMatrixStruct(Aview);
00122 
00123   //std::cout << "Bview: " << std::endl;
00124   //dumpCrsMatrixStruct(Bview);
00125 
00126   int numBcols = Bview.colMap->NumMyElements();
00127   int numBrows = Bview.numRows;
00128 
00129   int iworklen = maxlen*2 + numBcols;
00130   int_type* iwork = new int_type[iworklen];
00131 
00132   int_type * bcols = iwork+maxlen*2;
00133   int_type* bgids = 0;
00134   Bview.colMap->MyGlobalElementsPtr(bgids);
00135   double* bvals = new double[maxlen*2];
00136   double* avals = bvals+maxlen;
00137 
00138   int_type max_all_b = (int_type) Bview.colMap->MaxAllGID64();
00139   int_type min_all_b = (int_type) Bview.colMap->MinAllGID64();
00140 
00141   //bcols will hold the GIDs from B's column-map for fast access
00142   //during the computations below
00143   for(i=0; i<numBcols; ++i) {
00144     int blid = Bview.colMap->LID(bgids[i]);
00145     bcols[blid] = bgids[i];
00146   }
00147 
00148   //next create arrays indicating the first and last column-index in
00149   //each row of B, so that we can know when to skip certain rows below.
00150   //This will provide a large performance gain for banded matrices, and
00151   //a somewhat smaller gain for *most* other matrices.
00152   int_type* b_firstcol = new int_type[2*numBrows];
00153   int_type* b_lastcol = b_firstcol+numBrows;
00154   int_type temp;
00155   for(i=0; i<numBrows; ++i) {
00156     b_firstcol[i] = max_all_b;
00157     b_lastcol[i] = min_all_b;
00158 
00159     int Blen_i = Bview.numEntriesPerRow[i];
00160     if (Blen_i < 1) continue;
00161     int* Bindices_i = Bview.indices[i];
00162 
00163     if (Bview.remote[i]) {
00164       for(k=0; k<Blen_i; ++k) {
00165         temp = (int_type) Bview.importColMap->GID64(Bindices_i[k]);
00166         if (temp < b_firstcol[i]) b_firstcol[i] = temp;
00167         if (temp > b_lastcol[i]) b_lastcol[i] = temp;
00168       }
00169     }
00170     else {
00171       for(k=0; k<Blen_i; ++k) {
00172         temp = bcols[Bindices_i[k]];
00173         if (temp < b_firstcol[i]) b_firstcol[i] = temp;
00174         if (temp > b_lastcol[i]) b_lastcol[i] = temp;
00175       }
00176     }
00177   }
00178 
00179   Epetra_Util util;
00180 
00181   int_type* Aind = iwork;
00182   int_type* Bind = iwork+maxlen;
00183 
00184   bool C_filled = C.Filled();
00185 
00186   //To form C = A*B^T, we're going to execute this expression:
00187   //
00188   // C(i,j) = sum_k( A(i,k)*B(j,k) )
00189   //
00190   //This is the easiest case of all to code (easier than A*B, A^T*B, A^T*B^T).
00191   //But it requires the use of a 'sparsedot' function (we're simply forming
00192   //dot-products with row A_i and row B_j for all i and j).
00193 
00194   //loop over the rows of A.
00195   for(i=0; i<Aview.numRows; ++i) {
00196     if (Aview.remote[i]) {
00197       continue;
00198     }
00199 
00200     int* Aindices_i = Aview.indices[i];
00201     double* Aval_i  = Aview.values[i];
00202     int A_len_i = Aview.numEntriesPerRow[i];
00203     if (A_len_i < 1) {
00204       continue;
00205     }
00206 
00207     for(k=0; k<A_len_i; ++k) {
00208       Aind[k] = (int_type) Aview.colMap->GID64(Aindices_i[k]);
00209       avals[k] = Aval_i[k];
00210     }
00211 
00212     util.Sort<int_type>(true, A_len_i, Aind, 1, &avals, 0, NULL, 0, 0);
00213 
00214     int_type mina = Aind[0];
00215     int_type maxa = Aind[A_len_i-1];
00216 
00217     if (mina > max_all_b || maxa < min_all_b) {
00218       continue;
00219     }
00220 
00221     int_type global_row = (int_type) Aview.rowMap->GID64(i);
00222 
00223     //loop over the rows of B and form results C_ij = dot(A(i,:),B(j,:))
00224     for(j=0; j<Bview.numRows; ++j) {
00225       if (b_firstcol[j] > maxa || b_lastcol[j] < mina) {
00226         continue;
00227       }
00228 
00229       int* Bindices_j = Bview.indices[j];
00230       int B_len_j = Bview.numEntriesPerRow[j];
00231       if (B_len_j < 1) {
00232         continue;
00233       }
00234 
00235       int_type tmp;
00236       int Blen = 0;
00237 
00238       if (Bview.remote[j]) {
00239         for(k=0; k<B_len_j; ++k) {
00240     tmp = (int_type) Bview.importColMap->GID64(Bindices_j[k]);
00241           if (tmp < mina || tmp > maxa) {
00242             continue;
00243           }
00244 
00245           bvals[Blen] = Bview.values[j][k];
00246           Bind[Blen++] = tmp;
00247   }
00248       }
00249       else {
00250         for(k=0; k<B_len_j; ++k) {
00251     tmp = bcols[Bindices_j[k]];
00252           if (tmp < mina || tmp > maxa) {
00253             continue;
00254           }
00255 
00256           bvals[Blen] = Bview.values[j][k];
00257           Bind[Blen++] = tmp;
00258   }
00259       }
00260 
00261       if (Blen < 1) {
00262         continue;
00263       }
00264 
00265       util.Sort<int_type>(true, Blen, Bind, 1, &bvals, 0, NULL, 0, 0);
00266 
00267       double C_ij = sparsedot(avals, Aind, A_len_i,
00268             bvals, Bind, Blen);
00269 
00270       if (C_ij == 0.0) {
00271   continue;
00272       }
00273       int_type global_col = (int_type) Bview.rowMap->GID64(j);
00274 
00275       int err = C_filled ?
00276         C.SumIntoGlobalValues(global_row, 1, &C_ij, &global_col)
00277         :
00278         C.InsertGlobalValues(global_row, 1, &C_ij, &global_col);
00279 
00280       if (err < 0) {
00281   return(err);
00282       }
00283       if (err > 0) {
00284   if (C_filled) {
00285     //C.Filled()==true, and C doesn't have all the necessary nonzero
00286           //locations, or global_row or global_col is out of range (less
00287           //than 0 or non local).
00288           std::cerr << "EpetraExt::MatrixMatrix::Multiply Warning: failed "
00289               << "to insert value in result matrix at position "<<global_row
00290              <<"," <<global_col<<", possibly because result matrix has a "
00291              << "column-map that doesn't include column "<<global_col
00292              <<" on this proc." <<std::endl;
00293     return(err);
00294   }
00295       }
00296     }
00297   }
00298 
00299   delete [] iwork;
00300   delete [] bvals;
00301   delete [] b_firstcol;
00302 
00303   return(returnValue);
00304 }
00305 
00306 int mult_A_Btrans(CrsMatrixStruct& Aview,
00307        CrsMatrixStruct& Bview,
00308        CrsWrapper& C)
00309 {
00310 #ifndef EPETRA_NO_32BIT_GLOBAL_INDICES
00311   if(Aview.rowMap->GlobalIndicesInt() &&
00312      Aview.colMap->GlobalIndicesInt() &&
00313      Aview.rowMap->GlobalIndicesInt() &&
00314      Aview.colMap->GlobalIndicesInt()) {
00315     return mult_A_Btrans<int>(Aview, Bview, C);
00316   }
00317   else
00318 #endif
00319 #ifndef EPETRA_NO_64BIT_GLOBAL_INDICES
00320   if(Aview.rowMap->GlobalIndicesLongLong() &&
00321      Aview.colMap->GlobalIndicesLongLong() &&
00322      Aview.rowMap->GlobalIndicesLongLong() &&
00323      Aview.colMap->GlobalIndicesLongLong()) {
00324     return mult_A_Btrans<long long>(Aview, Bview, C);
00325   }
00326   else
00327 #endif
00328     throw std::runtime_error("EpetraExt::mult_A_Btrans: GlobalIndices type unknown");
00329 }
00330 
00331 //=========================================================================
00332 //kernel method for computing the local portion of C = A^T*B
00333 template<typename int_type>
00334 int mult_Atrans_B(CrsMatrixStruct& Aview,
00335       CrsMatrixStruct& Bview,
00336       CrsWrapper& C)
00337 {
00338   int C_firstCol = Bview.colMap->MinLID();
00339   int C_lastCol = Bview.colMap->MaxLID();
00340 
00341   int C_firstCol_import = 0;
00342   int C_lastCol_import = -1;
00343 
00344   if (Bview.importColMap != NULL) {
00345     C_firstCol_import = Bview.importColMap->MinLID();
00346     C_lastCol_import = Bview.importColMap->MaxLID();
00347   }
00348 
00349   int C_numCols = C_lastCol - C_firstCol + 1;
00350   int C_numCols_import = C_lastCol_import - C_firstCol_import + 1;
00351 
00352   if (C_numCols_import > C_numCols) C_numCols = C_numCols_import;
00353 
00354   double* C_row_i = new double[C_numCols];
00355   int_type* C_colInds = new int_type[C_numCols];
00356 
00357   int i, j, k;
00358 
00359   for(j=0; j<C_numCols; ++j) {
00360     C_row_i[j] = 0.0;
00361     C_colInds[j] = 0;
00362   }
00363 
00364   //To form C = A^T*B, compute a series of outer-product updates.
00365   //
00366   // for (ith column of A^T) { 
00367   //   C_i = outer product of A^T(:,i) and B(i,:)
00368   // Where C_i is the ith matrix update,
00369   //       A^T(:,i) is the ith column of A^T, and
00370   //       B(i,:) is the ith row of B.
00371   //
00372 
00373   //dumpCrsMatrixStruct(Aview);
00374   //dumpCrsMatrixStruct(Bview);
00375   int localProc = Bview.colMap->Comm().MyPID();
00376 
00377   int_type* Arows = 0;
00378   Aview.rowMap->MyGlobalElementsPtr(Arows);
00379 
00380   bool C_filled = C.Filled();
00381 
00382   //loop over the rows of A (which are the columns of A^T).
00383   for(i=0; i<Aview.numRows; ++i) {
00384 
00385     int* Aindices_i = Aview.indices[i];
00386     double* Aval_i  = Aview.values[i];
00387 
00388     //we'll need to get the row of B corresponding to Arows[i],
00389     //where Arows[i] is the GID of A's ith row.
00390     int Bi = Bview.rowMap->LID(Arows[i]);
00391     if (Bi<0) {
00392       std::cout << "mult_Atrans_B ERROR, proc "<<localProc<<" needs row "
00393      <<Arows[i]<<" of matrix B, but doesn't have it."<<std::endl;
00394       return(-1);
00395     }
00396 
00397     int* Bcol_inds = Bview.indices[Bi];
00398     double* Bvals_i = Bview.values[Bi];
00399 
00400     //for each column-index Aj in the i-th row of A, we'll update
00401     //global-row GID(Aj) of the result matrix C. In that row of C,
00402     //we'll update column-indices given by the column-indices in the
00403     //ith row of B that we're now holding (Bcol_inds).
00404 
00405     //First create a list of GIDs for the column-indices
00406     //that we'll be updating.
00407 
00408     int Blen = Bview.numEntriesPerRow[Bi];
00409     if (Bview.remote[Bi]) {
00410       for(j=0; j<Blen; ++j) {
00411         C_colInds[j] = (int_type) Bview.importColMap->GID64(Bcol_inds[j]);
00412       }
00413     }
00414     else {
00415       for(j=0; j<Blen; ++j) {
00416         C_colInds[j] = (int_type) Bview.colMap->GID64(Bcol_inds[j]);
00417       }
00418     }
00419 
00420     //loop across the i-th row of A (column of A^T)
00421     for(j=0; j<Aview.numEntriesPerRow[i]; ++j) {
00422 
00423       int Aj = Aindices_i[j];
00424       double Aval = Aval_i[j];
00425 
00426       int_type global_row;
00427       if (Aview.remote[i]) {
00428   global_row = (int_type) Aview.importColMap->GID64(Aj);
00429       }
00430       else {
00431   global_row = (int_type) Aview.colMap->GID64(Aj);
00432       }
00433 
00434       if (!C.RowMap().MyGID(global_row)) {
00435         continue;
00436       }
00437 
00438       for(k=0; k<Blen; ++k) {
00439         C_row_i[k] = Aval*Bvals_i[k];
00440       }
00441 
00442       //
00443       //Now add this row-update to C.
00444       //
00445 
00446       int err = C_filled ?
00447         C.SumIntoGlobalValues(global_row, Blen, C_row_i, C_colInds)
00448         :
00449         C.InsertGlobalValues(global_row, Blen, C_row_i, C_colInds);
00450 
00451       if (err < 0) {
00452         return(err);
00453       }
00454       if (err > 0) {
00455         if (C_filled) {
00456           //C is Filled, and doesn't have all the necessary nonzero locations.
00457           return(err);
00458         }
00459       }
00460     }
00461   }
00462 
00463   delete [] C_row_i;
00464   delete [] C_colInds;
00465 
00466   return(0);
00467 }
00468 
00469 int mult_Atrans_B(CrsMatrixStruct& Aview,
00470        CrsMatrixStruct& Bview,
00471        CrsWrapper& C)
00472 {
00473 #ifndef EPETRA_NO_32BIT_GLOBAL_INDICES
00474   if(Aview.rowMap->GlobalIndicesInt() &&
00475      Aview.colMap->GlobalIndicesInt() &&
00476      Aview.rowMap->GlobalIndicesInt() &&
00477      Aview.colMap->GlobalIndicesInt()) {
00478     return mult_Atrans_B<int>(Aview, Bview, C);
00479   }
00480   else
00481 #endif
00482 #ifndef EPETRA_NO_64BIT_GLOBAL_INDICES
00483   if(Aview.rowMap->GlobalIndicesLongLong() &&
00484      Aview.colMap->GlobalIndicesLongLong() &&
00485      Aview.rowMap->GlobalIndicesLongLong() &&
00486      Aview.colMap->GlobalIndicesLongLong()) {
00487     return mult_Atrans_B<long long>(Aview, Bview, C);
00488   }
00489   else
00490 #endif
00491     throw std::runtime_error("EpetraExt::mult_Atrans_B: GlobalIndices type unknown");
00492 }
00493 
00494 //kernel method for computing the local portion of C = A^T*B^T
00495 template<typename int_type>
00496 int mult_Atrans_Btrans(CrsMatrixStruct& Aview,
00497            CrsMatrixStruct& Bview,
00498            CrsWrapper& C)
00499 {
00500   int C_firstCol = Aview.rowMap->MinLID();
00501   int C_lastCol = Aview.rowMap->MaxLID();
00502 
00503   int C_firstCol_import = 0;
00504   int C_lastCol_import = -1;
00505 
00506   if (Aview.importColMap != NULL) {
00507     C_firstCol_import = Aview.importColMap->MinLID();
00508     C_lastCol_import = Aview.importColMap->MaxLID();
00509   }
00510 
00511   int C_numCols = C_lastCol - C_firstCol + 1;
00512   int C_numCols_import = C_lastCol_import - C_firstCol_import + 1;
00513 
00514   if (C_numCols_import > C_numCols) C_numCols = C_numCols_import;
00515 
00516   double* dwork = new double[C_numCols];
00517   int_type* iwork = new int_type[C_numCols];
00518 
00519   double* C_col_j = dwork;
00520   int_type* C_inds = iwork;
00521 
00522   //std::cout << "Aview: " << std::endl;
00523   //dumpCrsMatrixStruct(Aview);
00524 
00525   //std::cout << "Bview: " << std::endl;
00526   //dumpCrsMatrixStruct(Bview);
00527 
00528 
00529   int i, j, k;
00530 
00531   for(j=0; j<C_numCols; ++j) {
00532     C_col_j[j] = 0.0;
00533     C_inds[j] = -1;
00534   }
00535 
00536   int_type* A_col_inds = 0;
00537   Aview.colMap->MyGlobalElementsPtr(A_col_inds);
00538   int_type* A_col_inds_import = 0;
00539   if(Aview.importColMap)
00540     Aview.importColMap->MyGlobalElementsPtr(A_col_inds_import);
00541 
00542   const Epetra_Map* Crowmap = &(C.RowMap());
00543 
00544   //To form C = A^T*B^T, we're going to execute this expression:
00545   //
00546   // C(i,j) = sum_k( A(k,i)*B(j,k) )
00547   //
00548   //Our goal, of course, is to navigate the data in A and B once, without
00549   //performing searches for column-indices, etc. In other words, we avoid
00550   //column-wise operations like the plague...
00551 
00552   int_type* Brows = 0;
00553   Bview.rowMap->MyGlobalElementsPtr(Brows);
00554 
00555   //loop over the rows of B
00556   for(j=0; j<Bview.numRows; ++j) {
00557     int* Bindices_j = Bview.indices[j];
00558     double* Bvals_j = Bview.values[j];
00559 
00560     int_type global_col = Brows[j];
00561 
00562     //loop across columns in the j-th row of B and for each corresponding
00563     //row in A, loop across columns and accumulate product
00564     //A(k,i)*B(j,k) into our partial sum quantities in C_col_j. In other
00565     //words, as we stride across B(j,:), we use selected rows in A to
00566     //calculate updates for column j of the result matrix C.
00567 
00568     for(k=0; k<Bview.numEntriesPerRow[j]; ++k) {
00569 
00570       int bk = Bindices_j[k];
00571       double Bval = Bvals_j[k];
00572 
00573       int_type global_k;
00574       if (Bview.remote[j]) {
00575   global_k = (int_type) Bview.importColMap->GID64(bk);
00576       }
00577       else {
00578   global_k = (int_type) Bview.colMap->GID64(bk);
00579       }
00580 
00581       //get the corresponding row in A
00582       int ak = Aview.rowMap->LID(global_k);
00583       if (ak<0) {
00584   continue;
00585       }
00586 
00587       int* Aindices_k = Aview.indices[ak];
00588       double* Avals_k = Aview.values[ak];
00589 
00590       int C_len = 0;
00591 
00592       if (Aview.remote[ak]) {
00593   for(i=0; i<Aview.numEntriesPerRow[ak]; ++i) {
00594     C_col_j[C_len] = Avals_k[i]*Bval;
00595           C_inds[C_len++] = A_col_inds_import[Aindices_k[i]];
00596   }
00597       }
00598       else {
00599   for(i=0; i<Aview.numEntriesPerRow[ak]; ++i) {
00600     C_col_j[C_len] = Avals_k[i]*Bval;
00601           C_inds[C_len++] = A_col_inds[Aindices_k[i]];
00602   }
00603       }
00604 
00605       //Now loop across the C_col_j values and put non-zeros into C.
00606 
00607       for(i=0; i < C_len ; ++i) {
00608   if (C_col_j[i] == 0.0) continue;
00609 
00610   int_type global_row = C_inds[i];
00611   if (!Crowmap->MyGID(global_row)) {
00612     continue;
00613   }
00614 
00615   int err = C.SumIntoGlobalValues(global_row, 1, &(C_col_j[i]), &global_col);
00616 
00617   if (err < 0) {
00618     return(err);
00619   }
00620   else {
00621           if (err > 0) {
00622       err = C.InsertGlobalValues(global_row, 1, &(C_col_j[i]), &global_col);
00623       if (err < 0) {
00624               return(err);
00625             }
00626     }
00627   }
00628       }
00629 
00630     }
00631   }
00632 
00633   delete [] dwork;
00634   delete [] iwork;
00635 
00636   return(0);
00637 }
00638 
00639 int mult_Atrans_Btrans(CrsMatrixStruct& Aview,
00640        CrsMatrixStruct& Bview,
00641        CrsWrapper& C)
00642 {
00643 #ifndef EPETRA_NO_32BIT_GLOBAL_INDICES
00644   if(Aview.rowMap->GlobalIndicesInt() &&
00645      Aview.colMap->GlobalIndicesInt() &&
00646      Aview.rowMap->GlobalIndicesInt() &&
00647      Aview.colMap->GlobalIndicesInt()) {
00648     return mult_Atrans_Btrans<int>(Aview, Bview, C);
00649   }
00650   else
00651 #endif
00652 #ifndef EPETRA_NO_64BIT_GLOBAL_INDICES
00653   if(Aview.rowMap->GlobalIndicesLongLong() &&
00654      Aview.colMap->GlobalIndicesLongLong() &&
00655      Aview.rowMap->GlobalIndicesLongLong() &&
00656      Aview.colMap->GlobalIndicesLongLong()) {
00657     return mult_Atrans_Btrans<long long>(Aview, Bview, C);
00658   }
00659   else
00660 #endif
00661     throw std::runtime_error("EpetraExt::mult_Atrans_Btrans: GlobalIndices type unknown");
00662 }
00663 
00664 // ==============================================================
00665 template<typename int_type>
00666 int import_and_extract_views(const Epetra_CrsMatrix& M,
00667            const Epetra_Map& targetMap,
00668            CrsMatrixStruct& Mview,
00669            const Epetra_Import * prototypeImporter=0)
00670 {
00671   //The goal of this method is to populate the 'Mview' struct with views of the
00672   //rows of M, including all rows that correspond to elements in 'targetMap'.
00673   //
00674   //If targetMap includes local elements that correspond to remotely-owned rows
00675   //of M, then those remotely-owned rows will be imported into
00676   //'Mview.importMatrix', and views of them will be included in 'Mview'.
00677 
00678   // The prototype importer, if used, has to know who owns all of the PIDs in M's rowmap.  
00679   // The typical use of this is to provide A's column map when I&XV is called for B, for
00680   // a C = A * B multiply.  
00681 
00682 #ifdef ENABLE_MMM_TIMINGS
00683   using Teuchos::TimeMonitor;
00684   Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM I&X Alloc")));
00685 #endif
00686   Mview.deleteContents();
00687 
00688   Mview.origMatrix          = &M;
00689   const Epetra_Map& Mrowmap = M.RowMap();
00690   int numProcs              = Mrowmap.Comm().NumProc();
00691   Mview.numRows             = targetMap.NumMyElements();
00692   int_type* Mrows           = 0;
00693   targetMap.MyGlobalElementsPtr(Mrows);
00694 
00695   if (Mview.numRows > 0) {
00696     Mview.numEntriesPerRow = new int[Mview.numRows];
00697     Mview.indices = new int*[Mview.numRows];
00698     Mview.values  = new double*[Mview.numRows];
00699     Mview.remote  = new bool[Mview.numRows];
00700   }
00701 
00702   Mview.origRowMap   = &(M.RowMap());
00703   Mview.rowMap       = &targetMap;
00704   Mview.colMap       = &(M.ColMap());
00705   Mview.domainMap    = &(M.DomainMap());
00706   Mview.importColMap = NULL;
00707   Mview.numRemote    = 0;
00708 
00709 
00710 #ifdef ENABLE_MMM_TIMINGS
00711   MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM I&X Extract")));
00712 #endif
00713 
00714   int i;
00715   int *rowptr=0, *colind=0;
00716   double *vals=0;
00717 
00718   EPETRA_CHK_ERR( M.ExtractCrsDataPointers(rowptr,colind,vals) );
00719 
00720   if(Mrowmap.SameAs(targetMap)) {
00721     // Short Circuit: The Row and Target Maps are the Same
00722     for(i=0; i<Mview.numRows; ++i) {
00723       Mview.numEntriesPerRow[i] = rowptr[i+1]-rowptr[i];
00724       Mview.indices[i]          = colind + rowptr[i];
00725       Mview.values[i]           = vals + rowptr[i];
00726       Mview.remote[i]           = false;
00727     }
00728     return 0;
00729   }
00730   else if(prototypeImporter && prototypeImporter->SourceMap().SameAs(M.RowMap()) && prototypeImporter->TargetMap().SameAs(targetMap)){
00731     // The prototypeImporter can tell me what is local and what is remote
00732     int * PermuteToLIDs   = prototypeImporter->PermuteToLIDs();
00733     int * PermuteFromLIDs = prototypeImporter->PermuteFromLIDs();
00734     int * RemoteLIDs      = prototypeImporter->RemoteLIDs();
00735     for(int i=0; i<prototypeImporter->NumSameIDs();i++){    
00736       Mview.numEntriesPerRow[i] = rowptr[i+1]-rowptr[i];
00737       Mview.indices[i]          = colind + rowptr[i];
00738       Mview.values[i]           = vals + rowptr[i];
00739       Mview.remote[i]           = false;
00740     }
00741     for(int i=0; i<prototypeImporter->NumPermuteIDs();i++){
00742       int to                     = PermuteToLIDs[i];
00743       int from                   = PermuteFromLIDs[i];
00744       Mview.numEntriesPerRow[to] = rowptr[from+1]-rowptr[from];
00745       Mview.indices[to]          = colind + rowptr[from];
00746       Mview.values[to]           = vals + rowptr[from];
00747       Mview.remote[to]           = false;
00748     }
00749     for(int i=0; i<prototypeImporter->NumRemoteIDs();i++){
00750       Mview.remote[RemoteLIDs[i]] = true;
00751       ++Mview.numRemote;
00752     }
00753   }
00754   else {
00755     // Only LID can tell me who is local and who is remote
00756     for(i=0; i<Mview.numRows; ++i) {
00757       int mlid = Mrowmap.LID(Mrows[i]);
00758       if (mlid < 0) {
00759   Mview.remote[i] = true;
00760   ++Mview.numRemote;
00761       }
00762       else {
00763   Mview.numEntriesPerRow[i] = rowptr[mlid+1]-rowptr[mlid];
00764   Mview.indices[i]          = colind + rowptr[mlid];
00765   Mview.values[i]           = vals + rowptr[mlid];
00766   Mview.remote[i]           = false;
00767       }
00768     }
00769   }
00770 
00771 #ifdef ENABLE_MMM_TIMINGS
00772   MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM I&X Collective-0")));
00773 #endif
00774 
00775   if (numProcs < 2) {
00776     if (Mview.numRemote > 0) {
00777       std::cerr << "EpetraExt::MatrixMatrix::Multiply ERROR, numProcs < 2 but "
00778      << "attempting to import remote matrix rows."<<std::endl;
00779       return(-1);
00780     }
00781 
00782     //If only one processor we don't need to import any remote rows, so return.
00783     return(0);
00784   }
00785 
00786   //
00787   //Now we will import the needed remote rows of M, if the global maximum
00788   //value of numRemote is greater than 0.
00789   //
00790 
00791   int globalMaxNumRemote = 0;
00792   Mrowmap.Comm().MaxAll(&Mview.numRemote, &globalMaxNumRemote, 1);
00793 
00794   if (globalMaxNumRemote > 0) {
00795 #ifdef ENABLE_MMM_TIMINGS
00796     MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM I&X Import-1")));
00797 #endif
00798     //Create a map that describes the remote rows of M that we need.
00799 
00800     int_type* MremoteRows = Mview.numRemote>0 ? new int_type[Mview.numRemote] : NULL;
00801     int offset = 0;
00802     for(i=0; i<Mview.numRows; ++i) {
00803       if (Mview.remote[i]) {
00804   MremoteRows[offset++] = Mrows[i];
00805       }
00806     }
00807 
00808   LightweightMap MremoteRowMap((int_type) -1, Mview.numRemote, MremoteRows, (int_type) Mrowmap.IndexBase64());
00809 
00810 #ifdef ENABLE_MMM_TIMINGS
00811     MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM I&X Import-2")));
00812 #endif
00813     //Create an importer with target-map MremoteRowMap and 
00814     //source-map Mrowmap.
00815     Epetra_Import    * importer=0;
00816     RemoteOnlyImport * Rimporter=0;
00817     if(prototypeImporter && prototypeImporter->SourceMap().SameAs(M.RowMap()) && prototypeImporter->TargetMap().SameAs(targetMap)) {
00818       Rimporter = new RemoteOnlyImport(*prototypeImporter,MremoteRowMap);
00819     }
00820     else if(!prototypeImporter) {
00821       Epetra_Map MremoteRowMap2((int_type) -1, Mview.numRemote, MremoteRows, (int_type) Mrowmap.IndexBase64(), Mrowmap.Comm());
00822       importer=new Epetra_Import(MremoteRowMap2, Mrowmap);
00823     }
00824     else
00825       throw std::runtime_error("prototypeImporter->SourceMap() does not match M.RowMap()!");
00826 
00827 
00828 #ifdef ENABLE_MMM_TIMINGS
00829     MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM I&X Import-3")));
00830 #endif
00831 
00832     if(Mview.importMatrix) delete Mview.importMatrix;
00833     if(Rimporter) Mview.importMatrix = new LightweightCrsMatrix(M,*Rimporter);
00834     else Mview.importMatrix = new LightweightCrsMatrix(M,*importer);
00835 
00836 #ifdef ENABLE_MMM_TIMINGS
00837     MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM I&X Import-4")));
00838 #endif
00839 
00840     //Finally, use the freshly imported data to fill in the gaps in our views
00841     int N;
00842     if(Mview.importMatrix->use_lw) N = Mview.importMatrix->RowMapLW_->NumMyElements();
00843     else N = Mview.importMatrix->RowMapEP_->NumMyElements();
00844 
00845     if(N > 0) {
00846       rowptr = &Mview.importMatrix->rowptr_[0];
00847       colind = &Mview.importMatrix->colind_[0];
00848       vals   = &Mview.importMatrix->vals_[0];
00849     }
00850 
00851 
00852     for(i=0; i<Mview.numRows; ++i) {
00853       if (Mview.remote[i]) {
00854   int importLID = MremoteRowMap.LID(Mrows[i]);
00855   Mview.numEntriesPerRow[i] = rowptr[importLID+1]-rowptr[importLID];
00856   Mview.indices[i]          = colind + rowptr[importLID];
00857   Mview.values[i]           = vals + rowptr[importLID];
00858       }
00859     }
00860 
00861 
00862     int_type * MyColGIDs = 0;
00863   if(Mview.importMatrix->ColMap_.NumMyElements()>0)
00864     Mview.importMatrix->ColMap_.MyGlobalElementsPtr(MyColGIDs);
00865     Mview.importColMap = new Epetra_Map((int_type) -1,Mview.importMatrix->ColMap_.NumMyElements(),MyColGIDs,(int_type) Mview.importMatrix->ColMap_.IndexBase64(),M.Comm());
00866     delete [] MremoteRows;
00867 #ifdef ENABLE_MMM_TIMINGS
00868     MM=Teuchos::null;
00869 #endif   
00870 
00871     // Cleanup
00872     delete Rimporter;
00873     delete importer;
00874 
00875   }
00876   return(0);
00877 }
00878 
00879 
00880 
00881 
00882 // ==============================================================
00883 template<typename int_type>
00884 int import_only(const Epetra_CrsMatrix& M,
00885     const Epetra_Map& targetMap,
00886     CrsMatrixStruct& Mview,
00887     const Epetra_Import * prototypeImporter=0)
00888 {
00889   // The goal of this method to populare the Mview object with ONLY the rows of M
00890   // that correspond to elements in 'targetMap.'  There will be no population of the
00891   // numEntriesPerRow, indices, values, remote or numRemote arrays.
00892 
00893 
00894   // The prototype importer, if used, has to know who owns all of the PIDs in M's rowmap.  
00895   // The typical use of this is to provide A's column map when I&XV is called for B, for
00896   // a C = A * B multiply.  
00897 
00898 #ifdef ENABLE_MMM_TIMINGS
00899   using Teuchos::TimeMonitor;
00900   Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Ionly Setup")));
00901 #endif
00902 
00903   Mview.deleteContents();
00904 
00905   Mview.origMatrix          = &M;
00906   const Epetra_Map& Mrowmap = M.RowMap();
00907   int numProcs              = Mrowmap.Comm().NumProc();
00908   Mview.numRows             = targetMap.NumMyElements();
00909 
00910   Mview.origRowMap   = &(M.RowMap());
00911   Mview.rowMap       = &targetMap;
00912   Mview.colMap       = &(M.ColMap());
00913   Mview.domainMap    = &(M.DomainMap());
00914   Mview.importColMap = NULL;
00915 
00916   int i;
00917   int numRemote =0;
00918   int N = Mview.rowMap->NumMyElements();
00919 
00920   if(Mrowmap.SameAs(targetMap)) {
00921     numRemote = 0;
00922     Mview.targetMapToOrigRow.resize(N); 
00923     for(i=0;i<N; i++) Mview.targetMapToOrigRow[i]=i;
00924     return 0;
00925   }
00926   else if(prototypeImporter && prototypeImporter->SourceMap().SameAs(M.RowMap()) && prototypeImporter->TargetMap().SameAs(targetMap)){
00927     numRemote = prototypeImporter->NumRemoteIDs();
00928 
00929     Mview.targetMapToOrigRow.resize(N);    Mview.targetMapToOrigRow.assign(N,-1);
00930     Mview.targetMapToImportRow.resize(N);  Mview.targetMapToImportRow.assign(N,-1);
00931 
00932     const int * PermuteToLIDs   = prototypeImporter->PermuteToLIDs();
00933     const int * PermuteFromLIDs = prototypeImporter->PermuteFromLIDs();
00934     const int * RemoteLIDs      = prototypeImporter->RemoteLIDs();
00935 
00936     for(i=0; i<prototypeImporter->NumSameIDs();i++)
00937       Mview.targetMapToOrigRow[i] = i;
00938 
00939     // NOTE: These are reversed on purpose since we're doing a revere map.
00940     for(i=0; i<prototypeImporter->NumPermuteIDs();i++)
00941       Mview.targetMapToOrigRow[PermuteToLIDs[i]] = PermuteFromLIDs[i];
00942     
00943     for(i=0; i<prototypeImporter->NumRemoteIDs();i++)
00944       Mview.targetMapToImportRow[RemoteLIDs[i]] = i;
00945 
00946   }
00947   else
00948     throw std::runtime_error("import_only: This routine only works if you either have the right map or no prototypeImporter");
00949 
00950   if (numProcs < 2) {
00951     if (Mview.numRemote > 0) {
00952       std::cerr << "EpetraExt::MatrixMatrix::Multiply ERROR, numProcs < 2 but "
00953      << "attempting to import remote matrix rows."<<std::endl;
00954       return(-1);
00955     }
00956 
00957     //If only one processor we don't need to import any remote rows, so return.
00958     return(0);
00959   }
00960 
00961 #ifdef ENABLE_MMM_TIMINGS
00962   MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Ionly Import-1")));
00963 #endif
00964   const int * RemoteLIDs = prototypeImporter->RemoteLIDs();
00965   
00966     //Create a map that describes the remote rows of M that we need.
00967   int_type* MremoteRows = numRemote>0 ? new int_type[prototypeImporter->NumRemoteIDs()] : 0;
00968   for(i=0; i<prototypeImporter->NumRemoteIDs(); i++)
00969     MremoteRows[i] = (int_type) targetMap.GID64(RemoteLIDs[i]);
00970   
00971   LightweightMap MremoteRowMap((int_type) -1, numRemote, MremoteRows, (int_type)Mrowmap.IndexBase64());
00972 
00973 #ifdef ENABLE_MMM_TIMINGS
00974   MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Ionly Import-2")));
00975 #endif
00976   //Create an importer with target-map MremoteRowMap and 
00977   //source-map Mrowmap.
00978   RemoteOnlyImport * Rimporter=0;
00979   Rimporter = new RemoteOnlyImport(*prototypeImporter,MremoteRowMap);
00980   
00981 #ifdef ENABLE_MMM_TIMINGS
00982   MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Ionly Import-3")));
00983 #endif
00984   
00985   Mview.importMatrix = new LightweightCrsMatrix(M,*Rimporter);
00986   
00987 #ifdef ENABLE_MMM_TIMINGS
00988   MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Ionly Import-4")));
00989 #endif
00990 
00991   // Cleanup
00992   delete Rimporter;
00993   delete [] MremoteRows;
00994 
00995   return(0);
00996 }
00997 
00998 
00999 
01000 
01001 //=========================================================================
01002 template<typename int_type>
01003 int form_map_union(const Epetra_Map* map1,
01004        const Epetra_Map* map2,
01005        const Epetra_Map*& mapunion)
01006 {
01007   //form the union of two maps
01008 
01009   if (map1 == NULL) {
01010     mapunion = new Epetra_Map(*map2);
01011     return(0);
01012   }
01013 
01014   if (map2 == NULL) {
01015     mapunion = new Epetra_Map(*map1);
01016     return(0);
01017   }
01018 
01019   int map1_len       = map1->NumMyElements();
01020   int_type* map1_elements = 0;
01021   map1->MyGlobalElementsPtr(map1_elements);
01022   int map2_len       = map2->NumMyElements();
01023   int_type* map2_elements = 0;
01024   map2->MyGlobalElementsPtr(map2_elements);
01025 
01026   int_type* union_elements = new int_type[map1_len+map2_len];
01027 
01028   int map1_offset = 0, map2_offset = 0, union_offset = 0;
01029 
01030   while(map1_offset < map1_len && map2_offset < map2_len) {
01031     int_type map1_elem = map1_elements[map1_offset];
01032     int_type map2_elem = map2_elements[map2_offset];
01033 
01034     if (map1_elem < map2_elem) {
01035       union_elements[union_offset++] = map1_elem;
01036       ++map1_offset;
01037     }
01038     else if (map1_elem > map2_elem) {
01039       union_elements[union_offset++] = map2_elem;
01040       ++map2_offset;
01041     }
01042     else {
01043       union_elements[union_offset++] = map1_elem;
01044       ++map1_offset;
01045       ++map2_offset;
01046     }
01047   }
01048 
01049   int i;
01050   for(i=map1_offset; i<map1_len; ++i) {
01051     union_elements[union_offset++] = map1_elements[i];
01052   }
01053 
01054   for(i=map2_offset; i<map2_len; ++i) {
01055     union_elements[union_offset++] = map2_elements[i];
01056   }
01057 
01058   mapunion = new Epetra_Map((int_type) -1, union_offset, union_elements,
01059           (int_type) map1->IndexBase64(), map1->Comm());
01060 
01061   delete [] union_elements;
01062 
01063   return(0);
01064 }
01065 
01066 //=========================================================================
01067 template<typename int_type>
01068 Epetra_Map* Tfind_rows_containing_cols(const Epetra_CrsMatrix& M,
01069                                       const Epetra_Map& column_map)
01070 {
01071   //The goal of this function is to find all rows in the matrix M that contain
01072   //column-indices which are in 'column_map'. A map containing those rows is
01073   //returned. (The returned map will then be used as the source row-map for
01074   //importing those rows into an overlapping distribution.)
01075 
01076   std::pair<int_type,int_type> my_col_range = get_col_range<int_type>(column_map);
01077 
01078   const Epetra_Comm& Comm = M.RowMap().Comm();
01079   int num_procs = Comm.NumProc();
01080   int my_proc = Comm.MyPID();
01081   std::vector<int> send_procs;
01082   send_procs.reserve(num_procs-1);
01083   std::vector<int_type> col_ranges;
01084   col_ranges.reserve(2*(num_procs-1));
01085   for(int p=0; p<num_procs; ++p) {
01086     if (p == my_proc) continue;
01087     send_procs.push_back(p);
01088     col_ranges.push_back(my_col_range.first);
01089     col_ranges.push_back(my_col_range.second);
01090   }
01091 
01092   Epetra_Distributor* distor = Comm.CreateDistributor();
01093 
01094   int num_recv_procs = 0;
01095   int num_send_procs = send_procs.size();
01096   bool deterministic = true;
01097   int* send_procs_ptr = send_procs.size()>0 ? &send_procs[0] : NULL;
01098   distor->CreateFromSends(num_send_procs, send_procs_ptr, deterministic, num_recv_procs);
01099 
01100   int len_import_chars = 0;
01101   char* import_chars = NULL;
01102 
01103   char* export_chars = col_ranges.size()>0 ? reinterpret_cast<char*>(&col_ranges[0]) : NULL;
01104   int err = distor->Do(export_chars, 2*sizeof(int_type), len_import_chars, import_chars);
01105   if (err != 0) {
01106     std::cout << "ERROR returned from Distributor::Do."<<std::endl;
01107   }
01108  
01109   int_type* IntImports = reinterpret_cast<int_type*>(import_chars);
01110   int num_import_pairs = len_import_chars/(2*sizeof(int_type));
01111   int offset = 0;
01112   std::vector<int> send_procs2;
01113   send_procs2.reserve(num_procs);
01114   std::vector<int_type> proc_col_ranges;
01115   std::pair<int_type,int_type> M_col_range = get_col_range<int_type>(M);
01116   for(int i=0; i<num_import_pairs; ++i) {
01117     int_type first_col = IntImports[offset++];
01118     int_type last_col =  IntImports[offset++];
01119     if (first_col <= M_col_range.second && last_col >= M_col_range.first) {
01120       send_procs2.push_back(send_procs[i]);
01121       proc_col_ranges.push_back(first_col);
01122       proc_col_ranges.push_back(last_col);
01123     }
01124   }
01125 
01126   std::vector<int_type> send_rows;
01127   std::vector<int> rows_per_send_proc;
01128   pack_outgoing_rows(M, proc_col_ranges, send_rows, rows_per_send_proc);
01129 
01130   Epetra_Distributor* distor2 = Comm.CreateDistributor();
01131 
01132   int* send_procs2_ptr = send_procs2.size()>0 ? &send_procs2[0] : NULL;
01133   num_recv_procs = 0;
01134   err = distor2->CreateFromSends(send_procs2.size(), send_procs2_ptr, deterministic, num_recv_procs);
01135 
01136   export_chars = send_rows.size()>0 ? reinterpret_cast<char*>(&send_rows[0]) : NULL;
01137   int* rows_per_send_proc_ptr = rows_per_send_proc.size()>0 ? &rows_per_send_proc[0] : NULL;
01138   len_import_chars = 0;
01139   err = distor2->Do(export_chars, (int)sizeof(int_type), rows_per_send_proc_ptr, len_import_chars, import_chars);
01140 
01141   int_type* recvd_row_ints = reinterpret_cast<int_type*>(import_chars);
01142   int num_recvd_rows = len_import_chars/sizeof(int_type);
01143 
01144   Epetra_Map recvd_rows((int_type) -1, num_recvd_rows, recvd_row_ints, (int_type) column_map.IndexBase64(), Comm);
01145 
01146   delete distor;
01147   delete distor2;
01148   delete [] import_chars;
01149 
01150   Epetra_Map* result_map = NULL;
01151 
01152   err = form_map_union<int_type>(&(M.RowMap()), &recvd_rows, (const Epetra_Map*&)result_map);
01153   if (err != 0) {
01154     return(NULL);
01155   }
01156 
01157   return(result_map);
01158 }
01159 
01160 Epetra_Map* find_rows_containing_cols(const Epetra_CrsMatrix& M,
01161                                       const Epetra_Map& column_map)
01162 {
01163 #ifndef EPETRA_NO_32BIT_GLOBAL_INDICES
01164   if(M.RowMatrixRowMap().GlobalIndicesInt() && column_map.GlobalIndicesInt()) {
01165     return Tfind_rows_containing_cols<int>(M, column_map);
01166   }
01167   else
01168 #endif
01169 #ifndef EPETRA_NO_64BIT_GLOBAL_INDICES
01170   if(M.RowMatrixRowMap().GlobalIndicesLongLong() && column_map.GlobalIndicesLongLong()) {
01171     return Tfind_rows_containing_cols<long long>(M, column_map);
01172   }
01173   else
01174 #endif
01175     throw std::runtime_error("EpetraExt::find_rows_containing_cols: GlobalIndices type unknown");
01176 }
01177 
01178 //=========================================================================
01179 template<typename int_type>
01180 int MatrixMatrix::TMultiply(const Epetra_CrsMatrix& A,
01181          bool transposeA,
01182          const Epetra_CrsMatrix& B,
01183          bool transposeB,
01184          Epetra_CrsMatrix& C,
01185                            bool call_FillComplete_on_result)
01186 {
01187   // DEBUG
01188   //  bool NewFlag=!C.IndicesAreLocal() && !C.IndicesAreGlobal();
01189 #ifdef ENABLE_MMM_TIMINGS
01190   using Teuchos::TimeMonitor;
01191   Teuchos::RCP<TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM All Setup")));
01192 #endif
01193   // end DEBUG
01194 
01195   //
01196   //This method forms the matrix-matrix product C = op(A) * op(B), where
01197   //op(A) == A   if transposeA is false,
01198   //op(A) == A^T if transposeA is true,
01199   //and similarly for op(B).
01200   //
01201 
01202   //A and B should already be Filled.
01203   //(Should we go ahead and call FillComplete() on them if necessary?
01204   // or error out? For now, we choose to error out.)
01205   if (!A.Filled() || !B.Filled()) {
01206     EPETRA_CHK_ERR(-1);
01207   }
01208   
01209   // Is the C matrix new?
01210   bool NewFlag=!C.IndicesAreLocal() && !C.IndicesAreGlobal();
01211 
01212   //We're going to refer to the different combinations of op(A) and op(B)
01213   //as scenario 1 through 4.
01214 
01215   int scenario = 1;//A*B
01216   if (transposeB && !transposeA) scenario = 2;//A*B^T
01217   if (transposeA && !transposeB) scenario = 3;//A^T*B
01218   if (transposeA && transposeB)  scenario = 4;//A^T*B^T
01219   if(NewFlag && transposeA && !transposeB) scenario = 5; // A^T*B, newmatrix
01220 
01221   //now check size compatibility
01222   long long Aouter = transposeA ? A.NumGlobalCols64() : A.NumGlobalRows64();
01223   long long Bouter = transposeB ? B.NumGlobalRows64() : B.NumGlobalCols64();
01224   long long Ainner = transposeA ? A.NumGlobalRows64() : A.NumGlobalCols64();
01225   long long Binner = transposeB ? B.NumGlobalCols64() : B.NumGlobalRows64();
01226   if (Ainner != Binner) {
01227     std::cerr << "MatrixMatrix::Multiply: ERROR, inner dimensions of op(A) and op(B) "
01228          << "must match for matrix-matrix product. op(A) is "
01229          <<Aouter<<"x"<<Ainner << ", op(B) is "<<Binner<<"x"<<Bouter<<std::endl;
01230     return(-1);
01231   }
01232 
01233   //The result matrix C must at least have a row-map that reflects the
01234   //correct row-size. Don't check the number of columns because rectangular
01235   //matrices which were constructed with only one map can still end up
01236   //having the correct capacity and dimensions when filled.
01237   if (Aouter > C.NumGlobalRows64()) {
01238     std::cerr << "MatrixMatrix::Multiply: ERROR, dimensions of result C must "
01239          << "match dimensions of op(A) * op(B). C has "<<C.NumGlobalRows64()
01240          << " rows, should have at least "<<Aouter << std::endl;
01241     return(-1);
01242   }
01243 
01244   //It doesn't matter whether C is already Filled or not. If it is already
01245   //Filled, it must have space allocated for the positions that will be
01246   //referenced in forming C = op(A)*op(B). If it doesn't have enough space,
01247   //we'll error out later when trying to store result values.
01248 
01249   //We're going to need to import remotely-owned sections of A and/or B
01250   //if more than 1 processor is performing this run, depending on the scenario.
01251   int numProcs = A.Comm().NumProc();
01252 
01253   //If we are to use the transpose of A and/or B, we'll need to be able to 
01254   //access, on the local processor, all rows that contain column-indices in
01255   //the domain-map.
01256   const Epetra_Map* domainMap_A = &(A.DomainMap());
01257   const Epetra_Map* domainMap_B = &(B.DomainMap());
01258 
01259   const Epetra_Map* rowmap_A = &(A.RowMap());
01260   const Epetra_Map* rowmap_B = &(B.RowMap());
01261 
01262   //Declare some 'work-space' maps which may be created depending on
01263   //the scenario, and which will be deleted before exiting this function.
01264   const Epetra_Map* workmap1 = NULL;
01265   const Epetra_Map* workmap2 = NULL;
01266   const Epetra_Map* mapunion1 = NULL;
01267 
01268   //Declare a couple of structs that will be used to hold views of the data
01269   //of A and B, to be used for fast access during the matrix-multiplication.
01270   CrsMatrixStruct Aview;
01271   CrsMatrixStruct Atransview;
01272   CrsMatrixStruct Bview;
01273   Teuchos::RCP<Epetra_CrsMatrix> Atrans;
01274   
01275   const Epetra_Map* targetMap_A = rowmap_A;
01276   const Epetra_Map* targetMap_B = rowmap_B;
01277 
01278 #ifdef ENABLE_MMM_TIMINGS
01279   MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM All I&X")));
01280 #endif
01281   if (numProcs > 1) {
01282     //If op(A) = A^T, find all rows of A that contain column-indices in the
01283     //local portion of the domain-map. (We'll import any remote rows
01284     //that fit this criteria onto the local processor.)
01285     if (scenario == 3 || scenario == 4) {
01286       workmap1 = Tfind_rows_containing_cols<int_type>(A, *domainMap_A);
01287       targetMap_A = workmap1;
01288     }
01289   }
01290   if (scenario == 5) {
01291     targetMap_A = &(A.ColMap());
01292   }
01293   
01294   //Now import any needed remote rows and populate the Aview struct.
01295   if(scenario==1 && call_FillComplete_on_result) {
01296     EPETRA_CHK_ERR(import_only<int_type>(A,*targetMap_A,Aview));
01297   }
01298   else if (scenario == 5){
01299     // Perform a local transpose of A and store that in Atransview
01300     EpetraExt::RowMatrix_Transpose at(const_cast<Epetra_Map *>(targetMap_A),false);
01301     Epetra_CrsMatrix * Anonconst = const_cast<Epetra_CrsMatrix *>(&A);
01302     Atrans = Teuchos::rcp(at.CreateTransposeLocal(*Anonconst));
01303     import_only<int_type>(*Atrans,*targetMap_A,Atransview);
01304   }
01305   else  {
01306     EPETRA_CHK_ERR( import_and_extract_views<int_type>(A, *targetMap_A, Aview));
01307   }
01308 
01309 
01310   // NOTE:  Next up is to switch to import_only for B as well, and then modify the THREE SerialCores
01311   // to add a Acol2Brow and Acol2Bimportrow array for in-algorithm lookups.  
01312   
01313 
01314   // Make sure B's views are consistent with A even in serial.
01315   const Epetra_Map* colmap_op_A = NULL;
01316   if(scenario==1 || numProcs > 1){
01317     if (transposeA && scenario == 3) {
01318       colmap_op_A = targetMap_A;
01319     }
01320     else {
01321       colmap_op_A = &(A.ColMap());
01322     }
01323     targetMap_B = colmap_op_A;  
01324   }
01325   if(scenario==5) targetMap_B = &(B.RowMap());
01326 
01327 
01328   if (numProcs > 1) {
01329     //If op(B) = B^T, find all rows of B that contain column-indices in the
01330     //local-portion of the domain-map, or in the column-map of op(A).
01331     //We'll import any remote rows that fit this criteria onto the
01332     //local processor.
01333     if (transposeB) {
01334       EPETRA_CHK_ERR( form_map_union<int_type>(colmap_op_A, domainMap_B, mapunion1) );
01335       workmap2 = Tfind_rows_containing_cols<int_type>(B, *mapunion1);
01336       targetMap_B = workmap2;
01337     }
01338   }
01339 
01340   //Now import any needed remote rows and populate the Bview struct.  
01341   if((scenario==1 && call_FillComplete_on_result) || scenario==5) {
01342     EPETRA_CHK_ERR(import_only<int_type>(B,*targetMap_B,Bview,A.Importer()));
01343   }
01344   else {
01345     EPETRA_CHK_ERR( import_and_extract_views<int_type>(B, *targetMap_B, Bview) );
01346   }
01347 
01348 #ifdef ENABLE_MMM_TIMINGS
01349   MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM All Multiply")));
01350 #endif
01351 
01352   // Zero if filled
01353   if(C.Filled()) C.PutScalar(0.0);
01354 
01355   //Now call the appropriate method to perform the actual multiplication.
01356   CrsWrapper_Epetra_CrsMatrix ecrsmat(C);
01357 
01358   switch(scenario) {
01359   case 1:    EPETRA_CHK_ERR( mult_A_B(A,Aview,B,Bview,C,call_FillComplete_on_result) );
01360     break;
01361   case 2:    EPETRA_CHK_ERR( mult_A_Btrans(Aview, Bview, ecrsmat) );
01362     break;
01363   case 3:    EPETRA_CHK_ERR( mult_Atrans_B(Aview, Bview, ecrsmat) );
01364     break;
01365   case 4:    EPETRA_CHK_ERR( mult_Atrans_Btrans(Aview, Bview, ecrsmat) );
01366     break;
01367   case 5:    EPETRA_CHK_ERR( mult_AT_B_newmatrix(Atransview, Bview, C) );
01368     break;
01369   }
01370 
01371 
01372   if (scenario != 1 && call_FillComplete_on_result && scenario != 5) {
01373     //We'll call FillComplete on the C matrix before we exit, and give
01374     //it a domain-map and a range-map.
01375     //The domain-map will be the domain-map of B, unless
01376     //op(B)==transpose(B), in which case the range-map of B will be used.
01377     //The range-map will be the range-map of A, unless
01378     //op(A)==transpose(A), in which case the domain-map of A will be used.
01379 
01380     const Epetra_Map* domainmap =
01381       transposeB ? &(B.RangeMap()) : &(B.DomainMap());
01382 
01383     const Epetra_Map* rangemap =
01384       transposeA ? &(A.DomainMap()) : &(A.RangeMap());
01385 
01386     if (!C.Filled()) {
01387       EPETRA_CHK_ERR( C.FillComplete(*domainmap, *rangemap) );
01388     }
01389   }
01390 
01391   //Finally, delete the objects that were potentially created
01392   //during the course of importing remote sections of A and B.
01393 
01394   delete mapunion1; mapunion1 = NULL;
01395   delete workmap1; workmap1 = NULL;
01396   delete workmap2; workmap2 = NULL;
01397 
01398   return(0);
01399 }
01400 
01401 int MatrixMatrix::Multiply(const Epetra_CrsMatrix& A,
01402          bool transposeA,
01403          const Epetra_CrsMatrix& B,
01404          bool transposeB,
01405          Epetra_CrsMatrix& C,
01406                            bool call_FillComplete_on_result)
01407 {
01408 #ifndef EPETRA_NO_32BIT_GLOBAL_INDICES
01409   if(A.RowMap().GlobalIndicesInt() && B.RowMap().GlobalIndicesInt()) {
01410   return TMultiply<int>(A, transposeA, B, transposeB, C, call_FillComplete_on_result);
01411   }
01412   else
01413 #endif
01414 #ifndef EPETRA_NO_64BIT_GLOBAL_INDICES
01415   if(A.RowMap().GlobalIndicesLongLong() && B.RowMap().GlobalIndicesLongLong()) {
01416   return TMultiply<long long>(A, transposeA, B, transposeB, C, call_FillComplete_on_result);
01417   }
01418   else
01419 #endif
01420     throw std::runtime_error("EpetraExt::MatrixMatrix::Add: GlobalIndices type unknown");
01421 }
01422 
01423 //=========================================================================
01424 template<typename int_type>
01425 int MatrixMatrix::TAdd(const Epetra_CrsMatrix& A,
01426                       bool transposeA,
01427                       double scalarA,
01428                       Epetra_CrsMatrix& B,
01429                       double scalarB )
01430 {
01431   //
01432   //This method forms the matrix-matrix sum B = scalarA * op(A) + scalarB * B, where
01433 
01434   //A should already be Filled. It doesn't matter whether B is
01435   //already Filled, but if it is, then its graph must already contain
01436   //all nonzero locations that will be referenced in forming the
01437   //sum.
01438 
01439   if (!A.Filled() ) {
01440     std::cerr << "EpetraExt::MatrixMatrix::Add ERROR, input matrix A.Filled() is false, it is required to be true. (Result matrix B is not required to be Filled())."<<std::endl;
01441     EPETRA_CHK_ERR(-1);
01442   }
01443 
01444   //explicit tranpose A formed as necessary
01445   Epetra_CrsMatrix * Aprime = 0;
01446   EpetraExt::RowMatrix_Transpose * Atrans = 0;
01447   if( transposeA )
01448   {
01449     Atrans = new EpetraExt::RowMatrix_Transpose();
01450     Aprime = &(dynamic_cast<Epetra_CrsMatrix&>(((*Atrans)(const_cast<Epetra_CrsMatrix&>(A)))));
01451   }
01452   else
01453     Aprime = const_cast<Epetra_CrsMatrix*>(&A);
01454 
01455   int MaxNumEntries = EPETRA_MAX( A.MaxNumEntries(), B.MaxNumEntries() );
01456   int A_NumEntries, B_NumEntries;
01457   int_type * A_Indices = new int_type[MaxNumEntries];
01458   double * A_Values = new double[MaxNumEntries];
01459   int* B_Indices_local;
01460   int_type* B_Indices_global;
01461   double* B_Values;
01462 
01463   int NumMyRows = B.NumMyRows();
01464   int_type Row;
01465   int err;
01466   int ierr = 0;
01467 
01468   if( scalarA )
01469   {
01470     //Loop over B's rows and sum into
01471     for( int i = 0; i < NumMyRows; ++i )
01472     {
01473       Row = (int_type) B.GRID64(i);
01474       EPETRA_CHK_ERR( Aprime->ExtractGlobalRowCopy( Row, MaxNumEntries, A_NumEntries, A_Values, A_Indices ) );
01475 
01476       if (scalarB != 1.0) {
01477         if (!B.Filled()) {
01478           EPETRA_CHK_ERR( B.ExtractGlobalRowView( Row, B_NumEntries,
01479                                                   B_Values, B_Indices_global));
01480         }
01481         else {
01482           EPETRA_CHK_ERR( B.ExtractMyRowView( i, B_NumEntries,
01483                                               B_Values, B_Indices_local));
01484         }
01485 
01486         for(int jj=0; jj<B_NumEntries; ++jj) {
01487           B_Values[jj] = scalarB*B_Values[jj];
01488         }
01489       }
01490 
01491       if( scalarA != 1.0 ) {
01492         for( int j = 0; j < A_NumEntries; ++j ) A_Values[j] *= scalarA;
01493       }
01494 
01495       if( B.Filled() ) {//Sum In Values
01496         err = B.SumIntoGlobalValues( Row, A_NumEntries, A_Values, A_Indices );
01497         assert( err >= 0 );
01498         if (err < 0) ierr = err;
01499       }
01500       else {
01501         err = B.InsertGlobalValues( Row, A_NumEntries, A_Values, A_Indices );
01502         assert( err == 0 || err == 1 || err == 3 );
01503         if (err < 0) ierr = err;
01504       }
01505     }
01506   }
01507   else {
01508     EPETRA_CHK_ERR( B.Scale(scalarB) );
01509   }
01510 
01511   delete [] A_Indices;
01512   delete [] A_Values;
01513 
01514   if( Atrans ) delete Atrans;
01515 
01516   return(ierr);
01517 }
01518 
01519 int MatrixMatrix::Add(const Epetra_CrsMatrix& A,
01520                       bool transposeA,
01521                       double scalarA,
01522                       Epetra_CrsMatrix& B,
01523                       double scalarB )
01524 {
01525 #ifndef EPETRA_NO_32BIT_GLOBAL_INDICES
01526   if(A.RowMap().GlobalIndicesInt() && B.RowMap().GlobalIndicesInt()) {
01527   return TAdd<int>(A, transposeA, scalarA, B, scalarB);
01528   }
01529   else
01530 #endif
01531 #ifndef EPETRA_NO_64BIT_GLOBAL_INDICES
01532   if(A.RowMap().GlobalIndicesLongLong() && B.RowMap().GlobalIndicesLongLong()) {
01533   return TAdd<long long>(A, transposeA, scalarA, B, scalarB);
01534   }
01535   else
01536 #endif
01537     throw std::runtime_error("EpetraExt::MatrixMatrix::Add: GlobalIndices type unknown");
01538 }
01539 
01540 template<typename int_type>
01541 int MatrixMatrix::TAdd(const Epetra_CrsMatrix& A,
01542                       bool transposeA,
01543                       double scalarA,
01544                       const Epetra_CrsMatrix & B,
01545                       bool transposeB,
01546                       double scalarB,
01547                       Epetra_CrsMatrix * & C)
01548 {
01549   //
01550   //This method forms the matrix-matrix sum C = scalarA * op(A) + scalarB * op(B), where
01551 
01552   //A and B should already be Filled. C should be an empty pointer.
01553 
01554   if (!A.Filled() || !B.Filled() ) {
01555      std::cerr << "EpetraExt::MatrixMatrix::Add ERROR, input matrix A.Filled() or B.Filled() is false,"
01556                << "they are required to be true. (Result matrix C should be an empty pointer)" << std::endl;
01557      EPETRA_CHK_ERR(-1);
01558   }
01559 
01560   Epetra_CrsMatrix * Aprime = 0, * Bprime=0;
01561   EpetraExt::RowMatrix_Transpose * Atrans = 0,* Btrans = 0;
01562 
01563   //explicit tranpose A formed as necessary
01564   if( transposeA ) {
01565      Atrans = new EpetraExt::RowMatrix_Transpose();
01566      Aprime = &(dynamic_cast<Epetra_CrsMatrix&>(((*Atrans)(const_cast<Epetra_CrsMatrix&>(A)))));
01567   }
01568   else
01569      Aprime = const_cast<Epetra_CrsMatrix*>(&A);
01570 
01571   //explicit tranpose B formed as necessary
01572   if( transposeB ) {
01573      Btrans = new EpetraExt::RowMatrix_Transpose();
01574      Bprime = &(dynamic_cast<Epetra_CrsMatrix&>(((*Btrans)(const_cast<Epetra_CrsMatrix&>(B)))));
01575   }
01576   else
01577      Bprime = const_cast<Epetra_CrsMatrix*>(&B);
01578 
01579   // allocate or zero the new matrix
01580   if(C!=0)
01581      C->PutScalar(0.0);
01582   else
01583      C = new Epetra_CrsMatrix(Copy,Aprime->RowMap(),0);
01584 
01585   // build arrays  for easy resuse
01586   int ierr = 0;
01587   Epetra_CrsMatrix * Mat[] = { Aprime,Bprime};
01588   double scalar[] = { scalarA, scalarB};
01589 
01590   // do a loop over each matrix to add: A reordering might be more efficient
01591   for(int k=0;k<2;k++) {
01592      int MaxNumEntries = Mat[k]->MaxNumEntries();
01593      int NumEntries;
01594      int_type * Indices = new int_type[MaxNumEntries];
01595      double * Values = new double[MaxNumEntries];
01596    
01597      int NumMyRows = Mat[k]->NumMyRows();
01598      int err;
01599      int_type Row;
01600      int ierr = 0;
01601    
01602      //Loop over rows and sum into C
01603      for( int i = 0; i < NumMyRows; ++i ) {
01604         Row = (int_type) Mat[k]->GRID64(i);
01605         EPETRA_CHK_ERR( Mat[k]->ExtractGlobalRowCopy( Row, MaxNumEntries, NumEntries, Values, Indices));
01606    
01607         if( scalar[k] != 1.0 )
01608            for( int j = 0; j < NumEntries; ++j ) Values[j] *= scalar[k];
01609    
01610         if(C->Filled()) { // Sum in values
01611            err = C->SumIntoGlobalValues( Row, NumEntries, Values, Indices );
01612            if (err < 0) ierr = err;
01613         } else { // just add it to the unfilled CRS Matrix
01614            err = C->InsertGlobalValues( Row, NumEntries, Values, Indices );
01615            if (err < 0) ierr = err;
01616         }
01617      }
01618 
01619      delete [] Indices;
01620      delete [] Values;
01621   }
01622 
01623   if( Atrans ) delete Atrans;
01624   if( Btrans ) delete Btrans;
01625 
01626   return(ierr);
01627 }
01628 
01629 int MatrixMatrix::Add(const Epetra_CrsMatrix& A,
01630                       bool transposeA,
01631                       double scalarA,
01632                       const Epetra_CrsMatrix & B,
01633                       bool transposeB,
01634                       double scalarB,
01635                       Epetra_CrsMatrix * & C)
01636 {
01637 #ifndef EPETRA_NO_32BIT_GLOBAL_INDICES
01638   if(A.RowMap().GlobalIndicesInt() && B.RowMap().GlobalIndicesInt()) {
01639   return TAdd<int>(A, transposeA, scalarA, B, transposeB, scalarB, C);
01640   }
01641   else
01642 #endif
01643 #ifndef EPETRA_NO_64BIT_GLOBAL_INDICES
01644   if(A.RowMap().GlobalIndicesLongLong() && B.RowMap().GlobalIndicesLongLong()) {
01645   return TAdd<long long>(A, transposeA, scalarA, B, transposeB, scalarB, C);
01646   }
01647   else
01648 #endif
01649     throw std::runtime_error("EpetraExt::MatrixMatrix::Add: GlobalIndices type unknown");
01650 }
01651 
01652 
01653 
01654 //=========================================================================
01655 template<typename int_type>
01656 int MatrixMatrix::TJacobi(double omega,
01657         const Epetra_Vector & Dinv,
01658         const Epetra_CrsMatrix& A,
01659         const Epetra_CrsMatrix& B,
01660         Epetra_CrsMatrix& C,
01661         bool call_FillComplete_on_result)
01662 {
01663 #ifdef ENABLE_MMM_TIMINGS
01664   using Teuchos::TimeMonitor;
01665   Teuchos::RCP<TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi All Setup")));
01666 #endif
01667 
01668   //A and B should already be Filled.
01669   if (!A.Filled() || !B.Filled()) {
01670     EPETRA_CHK_ERR(-1);
01671   }
01672 
01673   //now check size compatibility
01674   long long Aouter = A.NumGlobalRows64();
01675   long long Bouter = B.NumGlobalCols64();
01676   long long Ainner = A.NumGlobalCols64();
01677   long long Binner = B.NumGlobalRows64();
01678   long long Dlen   = Dinv.GlobalLength64();
01679   if (Ainner != Binner) {
01680     std::cerr << "MatrixMatrix::Jacobi: ERROR, inner dimensions of A and B "
01681          << "must match for matrix-matrix product. A is "
01682          <<Aouter<<"x"<<Ainner << ", B is "<<Binner<<"x"<<Bouter<<std::endl;
01683     return(-1);
01684   }
01685 
01686   //The result matrix C must at least have a row-map that reflects the
01687   //correct row-size. Don't check the number of columns because rectangular
01688   //matrices which were constructed with only one map can still end up
01689   //having the correct capacity and dimensions when filled.
01690   if (Aouter > C.NumGlobalRows64()) {
01691     std::cerr << "MatrixMatrix::Jacobi: ERROR, dimensions of result C must "
01692          << "match dimensions of A * B. C has "<<C.NumGlobalRows64()
01693          << " rows, should have at least "<<Aouter << std::endl;
01694     return(-1);
01695   }
01696 
01697   // Check against the D matrix
01698   if(Dlen != Aouter) {
01699     std::cerr << "MatrixMatrix::Jacboi: ERROR, dimensions of result D must "
01700         << "match dimensions of A's rows. D has "<< Dlen
01701         << " rows, should have " << Aouter << std::endl;
01702     return(-1);
01703   }
01704   
01705   if(!A.RowMap().SameAs(B.RowMap()) || !A.RowMap().SameAs(Dinv.Map())) {
01706     std::cerr << "MatrixMatrix::Jacboi: ERROR, RowMap of A must match RowMap of B "
01707         << "and Map of D."<<std::endl;
01708     return(-1);
01709   }
01710 
01711   //It doesn't matter whether C is already Filled or not. If it is already
01712   //Filled, it must have space allocated for the positions that will be
01713   //referenced in forming C. If it doesn't have enough space,
01714   //we'll error out later when trying to store result values.
01715 
01716   //We're going to need to import remotely-owned sections of A and/or B
01717   //if more than 1 processor is performing this run, depending on the scenario.
01718   int numProcs = A.Comm().NumProc();
01719 
01720   // Maps
01721   const Epetra_Map* rowmap_A = &(A.RowMap());
01722   const Epetra_Map* rowmap_B = &(B.RowMap());
01723 
01724 
01725 
01726   //Declare some 'work-space' maps which may be created depending on
01727   //the scenario, and which will be deleted before exiting this function.
01728   const Epetra_Map* workmap1 = NULL;
01729   const Epetra_Map* workmap2 = NULL;
01730   const Epetra_Map* mapunion1 = NULL;
01731 
01732   //Declare a couple of structs that will be used to hold views of the data
01733   //of A and B, to be used for fast access during the matrix-multiplication.
01734   CrsMatrixStruct Aview;
01735   CrsMatrixStruct Bview;
01736 
01737   const Epetra_Map* targetMap_A = rowmap_A;
01738   const Epetra_Map* targetMap_B = rowmap_B;
01739 
01740 #ifdef ENABLE_MMM_TIMINGS
01741   MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi All I&X")));
01742 #endif
01743 
01744   //Now import any needed remote rows and populate the Aview struct.
01745   if(call_FillComplete_on_result) {
01746     EPETRA_CHK_ERR(import_only<int_type>(A,*targetMap_A,Aview));
01747   }
01748   else  {
01749     EPETRA_CHK_ERR( import_and_extract_views<int_type>(A, *targetMap_A, Aview));
01750   }
01751 
01752   // NOTE:  Next up is to switch to import_only for B as well, and then modify the THREE SerialCores
01753   // to add a Acol2Brow and Acol2Bimportrow array for in-algorithm lookups.  
01754   
01755   // Make sure B's views are consistent with A even in serial.
01756   const Epetra_Map* colmap_op_A = NULL;
01757   if(numProcs > 1){
01758     colmap_op_A = &(A.ColMap());
01759     targetMap_B = colmap_op_A;
01760   }
01761 
01762   //Now import any needed remote rows and populate the Bview struct.  
01763   if(call_FillComplete_on_result) {
01764     EPETRA_CHK_ERR(import_only<int_type>(B,*targetMap_B,Bview,A.Importer()));
01765   }
01766   else {
01767     EPETRA_CHK_ERR( import_and_extract_views<int_type>(B, *targetMap_B, Bview) );
01768   }
01769 
01770 #ifdef ENABLE_MMM_TIMINGS
01771   MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi All Multiply")));
01772 #endif
01773 
01774   // Zero if filled
01775   if(C.Filled()) C.PutScalar(0.0);
01776 
01777   //Now call the appropriate method to perform the actual multiplication.
01778   CrsWrapper_Epetra_CrsMatrix ecrsmat(C);
01779   EPETRA_CHK_ERR( jacobi_A_B(omega,Dinv,A,Aview,B,Bview,C,call_FillComplete_on_result) );
01780 
01781   //Finally, delete the objects that were potentially created
01782   //during the course of importing remote sections of A and B.
01783   delete mapunion1; mapunion1 = NULL;
01784   delete workmap1; workmap1 = NULL;
01785   delete workmap2; workmap2 = NULL;
01786 
01787   return(0);
01788 }
01789 
01790 
01791 
01792 int MatrixMatrix::Jacobi(double omega,
01793        const Epetra_Vector & Dinv,
01794        const Epetra_CrsMatrix& A,
01795        const Epetra_CrsMatrix& B,
01796        Epetra_CrsMatrix& C,
01797        bool call_FillComplete_on_result)
01798 {
01799 #ifndef EPETRA_NO_32BIT_GLOBAL_INDICES
01800   if(A.RowMap().GlobalIndicesInt() && B.RowMap().GlobalIndicesInt()) {
01801     return TJacobi<int>(omega, Dinv, A, B, C, call_FillComplete_on_result);
01802   }
01803   else
01804 #endif
01805 #ifndef EPETRA_NO_64BIT_GLOBAL_INDICES
01806   if(A.RowMap().GlobalIndicesLongLong() && B.RowMap().GlobalIndicesLongLong()) {
01807     return TJacobi<long long>(omega, Dinv, A, B, C, call_FillComplete_on_result);
01808   }
01809   else
01810 #endif
01811     throw std::runtime_error("EpetraExt::MatrixMatrix::Jacobi: GlobalIndices type unknown");
01812 }
01813 
01814 
01815 
01816 
01817 
01818 
01819 
01820 
01821 
01822 
01823 } // namespace EpetraExt
01824 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines