EpetraExt Package Browser (Single Doxygen Collection) Development
EpetraExt_MatrixMatrix.cpp
Go to the documentation of this file.
00001 //@HEADER
00002 // ***********************************************************************
00003 // 
00004 //     EpetraExt: Epetra Extended - Linear Algebra Services Package
00005 //                 Copyright (2001) Sandia Corporation
00006 // 
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 // 
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //  
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //  
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00025 // 
00026 // ***********************************************************************
00027 //@HEADER
00028 
00029 #include <EpetraExt_ConfigDefs.h>
00030 #include <EpetraExt_MatrixMatrix.h>
00031 
00032 #include <EpetraExt_MMHelpers.h>
00033 
00034 #include <EpetraExt_Transpose_RowMatrix.h>
00035 
00036 #include <Epetra_Export.h>
00037 #include <Epetra_Import.h>
00038 #include <Epetra_Util.h>
00039 #include <Epetra_Map.h>
00040 #include <Epetra_Comm.h>
00041 #include <Epetra_CrsMatrix.h>
00042 #include <Epetra_Directory.h>
00043 #include <Epetra_HashTable.h>
00044 #include <Epetra_Distributor.h>
00045 
00046 #ifdef HAVE_VECTOR
00047 #include <vector>
00048 #endif
00049 
00050 namespace EpetraExt {
00051 
00052 //
00053 //Method for internal use... sparsedot forms a dot-product between two
00054 //sparsely-populated 'vectors'.
00055 //Important assumption: assumes the indices in u_ind and v_ind are sorted.
00056 //
00057 double sparsedot(double* u, int* u_ind, int u_len,
00058      double* v, int* v_ind, int v_len)
00059 {
00060   double result = 0.0;
00061 
00062   int v_idx = 0;
00063   int u_idx = 0;
00064 
00065   while(v_idx < v_len && u_idx < u_len) {
00066     int ui = u_ind[u_idx];
00067     int vi = v_ind[v_idx];
00068 
00069     if (ui < vi) {
00070       ++u_idx;
00071     }
00072     else if (ui > vi) {
00073       ++v_idx;
00074     }
00075     else {
00076       result += u[u_idx++]*v[v_idx++];
00077     }
00078   }
00079 
00080   return(result);
00081 }
00082 
00083 //kernel method for computing the local portion of C = A*B
00084 int mult_A_B(CrsMatrixStruct& Aview,
00085        CrsMatrixStruct& Bview,
00086        CrsWrapper& C)
00087 {
00088   int C_firstCol = Bview.colMap->MinLID();
00089   int C_lastCol = Bview.colMap->MaxLID();
00090 
00091   int C_firstCol_import = 0;
00092   int C_lastCol_import = -1;
00093 
00094   int* bcols = Bview.colMap->MyGlobalElements();
00095   int* bcols_import = NULL;
00096   if (Bview.importColMap != NULL) {
00097     C_firstCol_import = Bview.importColMap->MinLID();
00098     C_lastCol_import = Bview.importColMap->MaxLID();
00099 
00100     bcols_import = Bview.importColMap->MyGlobalElements();
00101   }
00102 
00103   int C_numCols = C_lastCol - C_firstCol + 1;
00104   int C_numCols_import = C_lastCol_import - C_firstCol_import + 1;
00105 
00106   if (C_numCols_import > C_numCols) C_numCols = C_numCols_import;
00107   double* dwork = new double[C_numCols];
00108   int* iwork = new int[C_numCols];
00109 
00110   double* C_row_i = dwork;
00111   int* C_cols = iwork;
00112 
00113   int C_row_i_length, i, j, k;
00114 
00115   //To form C = A*B we're going to execute this expression:
00116   //
00117   // C(i,j) = sum_k( A(i,k)*B(k,j) )
00118   //
00119   //Our goal, of course, is to navigate the data in A and B once, without
00120   //performing searches for column-indices, etc.
00121 
00122   bool C_filled = C.Filled();
00123 
00124   //loop over the rows of A.
00125   for(i=0; i<Aview.numRows; ++i) {
00126 
00127     //only navigate the local portion of Aview... (It's probable that we
00128     //imported more of A than we need for A*B, because other cases like A^T*B 
00129     //need the extra rows.)
00130     if (Aview.remote[i]) {
00131       continue;
00132     }
00133 
00134     int* Aindices_i = Aview.indices[i];
00135     double* Aval_i  = Aview.values[i];
00136 
00137     int global_row = Aview.rowMap->GID(i);
00138 
00139     //loop across the i-th row of A and for each corresponding row
00140     //in B, loop across colums and accumulate product
00141     //A(i,k)*B(k,j) into our partial sum quantities C_row_i. In other words,
00142     //as we stride across B(k,:) we're calculating updates for row i of the
00143     //result matrix C.
00144 
00145     for(k=0; k<Aview.numEntriesPerRow[i]; ++k) {
00146       int Ak = Bview.rowMap->LID(Aview.colMap->GID(Aindices_i[k]));
00147       double Aval = Aval_i[k];
00148 
00149       int* Bcol_inds = Bview.indices[Ak];
00150       double* Bvals_k = Bview.values[Ak];
00151 
00152       C_row_i_length = 0;
00153 
00154       if (Bview.remote[Ak]) {
00155   for(j=0; j<Bview.numEntriesPerRow[Ak]; ++j) {
00156     C_row_i[C_row_i_length] = Aval*Bvals_k[j];
00157           C_cols[C_row_i_length++] = bcols_import[Bcol_inds[j]];
00158   }
00159       }
00160       else {
00161   for(j=0; j<Bview.numEntriesPerRow[Ak]; ++j) {
00162     C_row_i[C_row_i_length] = Aval*Bvals_k[j];
00163           C_cols[C_row_i_length++] = bcols[Bcol_inds[j]];
00164   }
00165       }
00166 
00167       //
00168       //Now put the C_row_i values into C.
00169       //
00170 
00171       int err = C_filled ?
00172           C.SumIntoGlobalValues(global_row, C_row_i_length, C_row_i, C_cols)
00173           :
00174           C.InsertGlobalValues(global_row, C_row_i_length, C_row_i, C_cols);
00175  
00176       if (err < 0) {
00177         return(err);
00178       }
00179       if (err > 0) {
00180         if (C_filled) {
00181           //C is Filled, and doesn't
00182           //have all the necessary nonzero locations.
00183           return(err);
00184         }
00185       }
00186     }
00187   }
00188 
00189   delete [] dwork;
00190   delete [] iwork;
00191 
00192   return(0);
00193 }
00194 
00195 //kernel method for computing the local portion of C = A*B^T
00196 int mult_A_Btrans(CrsMatrixStruct& Aview,
00197       CrsMatrixStruct& Bview,
00198       CrsWrapper& C)
00199 {
00200   int i, j, k;
00201   int returnValue = 0;
00202 
00203   int maxlen = 0;
00204   for(i=0; i<Aview.numRows; ++i) {
00205     if (Aview.numEntriesPerRow[i] > maxlen) maxlen = Aview.numEntriesPerRow[i];
00206   }
00207   for(i=0; i<Bview.numRows; ++i) {
00208     if (Bview.numEntriesPerRow[i] > maxlen) maxlen = Bview.numEntriesPerRow[i];
00209   }
00210 
00211   //cout << "Aview: " << endl;
00212   //dumpCrsMatrixStruct(Aview);
00213 
00214   //cout << "Bview: " << endl;
00215   //dumpCrsMatrixStruct(Bview);
00216 
00217   int numBcols = Bview.colMap->NumMyElements();
00218   int numBrows = Bview.numRows;
00219 
00220   int iworklen = maxlen*2 + numBcols;
00221   int* iwork = new int[iworklen];
00222 
00223   int* bcols = iwork+maxlen*2;
00224   int* bgids = Bview.colMap->MyGlobalElements();
00225   double* bvals = new double[maxlen*2];
00226   double* avals = bvals+maxlen;
00227 
00228   int max_all_b = Bview.colMap->MaxAllGID();
00229   int min_all_b = Bview.colMap->MinAllGID();
00230 
00231   //bcols will hold the GIDs from B's column-map for fast access
00232   //during the computations below
00233   for(i=0; i<numBcols; ++i) {
00234     int blid = Bview.colMap->LID(bgids[i]);
00235     bcols[blid] = bgids[i];
00236   }
00237 
00238   //next create arrays indicating the first and last column-index in
00239   //each row of B, so that we can know when to skip certain rows below.
00240   //This will provide a large performance gain for banded matrices, and
00241   //a somewhat smaller gain for *most* other matrices.
00242   int* b_firstcol = new int[2*numBrows];
00243   int* b_lastcol = b_firstcol+numBrows;
00244   int temp;
00245   for(i=0; i<numBrows; ++i) {
00246     b_firstcol[i] = max_all_b;
00247     b_lastcol[i] = min_all_b;
00248 
00249     int Blen_i = Bview.numEntriesPerRow[i];
00250     if (Blen_i < 1) continue;
00251     int* Bindices_i = Bview.indices[i];
00252 
00253     if (Bview.remote[i]) {
00254       for(k=0; k<Blen_i; ++k) {
00255         temp = Bview.importColMap->GID(Bindices_i[k]);
00256         if (temp < b_firstcol[i]) b_firstcol[i] = temp;
00257         if (temp > b_lastcol[i]) b_lastcol[i] = temp;
00258       }
00259     }
00260     else {
00261       for(k=0; k<Blen_i; ++k) {
00262         temp = bcols[Bindices_i[k]];
00263         if (temp < b_firstcol[i]) b_firstcol[i] = temp;
00264         if (temp > b_lastcol[i]) b_lastcol[i] = temp;
00265       }
00266     }
00267   }
00268 
00269   Epetra_Util util;
00270 
00271   int* Aind = iwork;
00272   int* Bind = iwork+maxlen;
00273 
00274   bool C_filled = C.Filled();
00275 
00276   //To form C = A*B^T, we're going to execute this expression:
00277   //
00278   // C(i,j) = sum_k( A(i,k)*B(j,k) )
00279   //
00280   //This is the easiest case of all to code (easier than A*B, A^T*B, A^T*B^T).
00281   //But it requires the use of a 'sparsedot' function (we're simply forming
00282   //dot-products with row A_i and row B_j for all i and j).
00283 
00284   //loop over the rows of A.
00285   for(i=0; i<Aview.numRows; ++i) {
00286     if (Aview.remote[i]) {
00287       continue;
00288     }
00289 
00290     int* Aindices_i = Aview.indices[i];
00291     double* Aval_i  = Aview.values[i];
00292     int A_len_i = Aview.numEntriesPerRow[i];
00293     if (A_len_i < 1) {
00294       continue;
00295     }
00296 
00297     for(k=0; k<A_len_i; ++k) {
00298       Aind[k] = Aview.colMap->GID(Aindices_i[k]);
00299       avals[k] = Aval_i[k];
00300     }
00301 
00302     util.Sort(true, A_len_i, Aind, 1, &avals, 0, NULL);
00303 
00304     int mina = Aind[0];
00305     int maxa = Aind[A_len_i-1];
00306 
00307     if (mina > max_all_b || maxa < min_all_b) {
00308       continue;
00309     }
00310 
00311     int global_row = Aview.rowMap->GID(i);
00312 
00313     //loop over the rows of B and form results C_ij = dot(A(i,:),B(j,:))
00314     for(j=0; j<Bview.numRows; ++j) {
00315       if (b_firstcol[j] > maxa || b_lastcol[j] < mina) {
00316         continue;
00317       }
00318 
00319       int* Bindices_j = Bview.indices[j];
00320       int B_len_j = Bview.numEntriesPerRow[j];
00321       if (B_len_j < 1) {
00322         continue;
00323       }
00324 
00325       int tmp, Blen = 0;
00326 
00327       if (Bview.remote[j]) {
00328         for(k=0; k<B_len_j; ++k) {
00329     tmp = Bview.importColMap->GID(Bindices_j[k]);
00330           if (tmp < mina || tmp > maxa) {
00331             continue;
00332           }
00333 
00334           bvals[Blen] = Bview.values[j][k];
00335           Bind[Blen++] = tmp;
00336   }
00337       }
00338       else {
00339         for(k=0; k<B_len_j; ++k) {
00340     tmp = bcols[Bindices_j[k]];
00341           if (tmp < mina || tmp > maxa) {
00342             continue;
00343           }
00344 
00345           bvals[Blen] = Bview.values[j][k];
00346           Bind[Blen++] = tmp;
00347   }
00348       }
00349 
00350       if (Blen < 1) {
00351         continue;
00352       }
00353 
00354       util.Sort(true, Blen, Bind, 1, &bvals, 0, NULL);
00355 
00356       double C_ij = sparsedot(avals, Aind, A_len_i,
00357             bvals, Bind, Blen);
00358 
00359       if (C_ij == 0.0) {
00360   continue;
00361       }
00362       int global_col = Bview.rowMap->GID(j);
00363 
00364       int err = C_filled ?
00365         C.SumIntoGlobalValues(global_row, 1, &C_ij, &global_col)
00366         :
00367         C.InsertGlobalValues(global_row, 1, &C_ij, &global_col);
00368 
00369       if (err < 0) {
00370   return(err);
00371       }
00372       if (err > 0) {
00373   if (C_filled) {
00374     //C.Filled()==true, and C doesn't have all the necessary nonzero
00375           //locations, or global_row or global_col is out of range (less
00376           //than 0 or non local).
00377           std::cerr << "EpetraExt::MatrixMatrix::Multiply Warning: failed "
00378               << "to insert value in result matrix at position "<<global_row
00379              <<"," <<global_col<<", possibly because result matrix has a "
00380              << "column-map that doesn't include column "<<global_col
00381              <<" on this proc." <<std::endl;
00382     return(err);
00383   }
00384       }
00385     }
00386   }
00387 
00388   delete [] iwork;
00389   delete [] bvals;
00390   delete [] b_firstcol;
00391 
00392   return(returnValue);
00393 }
00394 
00395 //kernel method for computing the local portion of C = A^T*B
00396 int mult_Atrans_B(CrsMatrixStruct& Aview,
00397       CrsMatrixStruct& Bview,
00398       CrsWrapper& C)
00399 {
00400   int C_firstCol = Bview.colMap->MinLID();
00401   int C_lastCol = Bview.colMap->MaxLID();
00402 
00403   int C_firstCol_import = 0;
00404   int C_lastCol_import = -1;
00405 
00406   if (Bview.importColMap != NULL) {
00407     C_firstCol_import = Bview.importColMap->MinLID();
00408     C_lastCol_import = Bview.importColMap->MaxLID();
00409   }
00410 
00411   int C_numCols = C_lastCol - C_firstCol + 1;
00412   int C_numCols_import = C_lastCol_import - C_firstCol_import + 1;
00413 
00414   if (C_numCols_import > C_numCols) C_numCols = C_numCols_import;
00415 
00416   double* C_row_i = new double[C_numCols];
00417   int* C_colInds = new int[C_numCols];
00418 
00419   int i, j, k;
00420 
00421   for(j=0; j<C_numCols; ++j) {
00422     C_row_i[j] = 0.0;
00423     C_colInds[j] = 0;
00424   }
00425 
00426   //To form C = A^T*B, compute a series of outer-product updates.
00427   //
00428   // for (ith column of A^T) { 
00429   //   C_i = outer product of A^T(:,i) and B(i,:)
00430   // Where C_i is the ith matrix update,
00431   //       A^T(:,i) is the ith column of A^T, and
00432   //       B(i,:) is the ith row of B.
00433   //
00434 
00435   //dumpCrsMatrixStruct(Aview);
00436   //dumpCrsMatrixStruct(Bview);
00437   int localProc = Bview.colMap->Comm().MyPID();
00438 
00439   int* Arows = Aview.rowMap->MyGlobalElements();
00440 
00441   bool C_filled = C.Filled();
00442 
00443   //loop over the rows of A (which are the columns of A^T).
00444   for(i=0; i<Aview.numRows; ++i) {
00445 
00446     int* Aindices_i = Aview.indices[i];
00447     double* Aval_i  = Aview.values[i];
00448 
00449     //we'll need to get the row of B corresponding to Arows[i],
00450     //where Arows[i] is the GID of A's ith row.
00451     int Bi = Bview.rowMap->LID(Arows[i]);
00452     if (Bi<0) {
00453       cout << "mult_Atrans_B ERROR, proc "<<localProc<<" needs row "
00454      <<Arows[i]<<" of matrix B, but doesn't have it."<<endl;
00455       return(-1);
00456     }
00457 
00458     int* Bcol_inds = Bview.indices[Bi];
00459     double* Bvals_i = Bview.values[Bi];
00460 
00461     //for each column-index Aj in the i-th row of A, we'll update
00462     //global-row GID(Aj) of the result matrix C. In that row of C,
00463     //we'll update column-indices given by the column-indices in the
00464     //ith row of B that we're now holding (Bcol_inds).
00465 
00466     //First create a list of GIDs for the column-indices
00467     //that we'll be updating.
00468 
00469     int Blen = Bview.numEntriesPerRow[Bi];
00470     if (Bview.remote[Bi]) {
00471       for(j=0; j<Blen; ++j) {
00472         C_colInds[j] = Bview.importColMap->GID(Bcol_inds[j]);
00473       }
00474     }
00475     else {
00476       for(j=0; j<Blen; ++j) {
00477         C_colInds[j] = Bview.colMap->GID(Bcol_inds[j]);
00478       }
00479     }
00480 
00481     //loop across the i-th row of A (column of A^T)
00482     for(j=0; j<Aview.numEntriesPerRow[i]; ++j) {
00483 
00484       int Aj = Aindices_i[j];
00485       double Aval = Aval_i[j];
00486 
00487       int global_row;
00488       if (Aview.remote[i]) {
00489   global_row = Aview.importColMap->GID(Aj);
00490       }
00491       else {
00492   global_row = Aview.colMap->GID(Aj);
00493       }
00494 
00495       if (!C.RowMap().MyGID(global_row)) {
00496         continue;
00497       }
00498 
00499       for(k=0; k<Blen; ++k) {
00500         C_row_i[k] = Aval*Bvals_i[k];
00501       }
00502 
00503       //
00504       //Now add this row-update to C.
00505       //
00506 
00507       int err = C_filled ?
00508         C.SumIntoGlobalValues(global_row, Blen, C_row_i, C_colInds)
00509         :
00510         C.InsertGlobalValues(global_row, Blen, C_row_i, C_colInds);
00511 
00512       if (err < 0) {
00513         return(err);
00514       }
00515       if (err > 0) {
00516         if (C_filled) {
00517           //C is Filled, and doesn't have all the necessary nonzero locations.
00518           return(err);
00519         }
00520       }
00521     }
00522   }
00523 
00524   delete [] C_row_i;
00525   delete [] C_colInds;
00526 
00527   return(0);
00528 }
00529 
00530 //kernel method for computing the local portion of C = A^T*B^T
00531 int mult_Atrans_Btrans(CrsMatrixStruct& Aview,
00532            CrsMatrixStruct& Bview,
00533            CrsWrapper& C)
00534 {
00535   int C_firstCol = Aview.rowMap->MinLID();
00536   int C_lastCol = Aview.rowMap->MaxLID();
00537 
00538   int C_firstCol_import = 0;
00539   int C_lastCol_import = -1;
00540 
00541   if (Aview.importColMap != NULL) {
00542     C_firstCol_import = Aview.importColMap->MinLID();
00543     C_lastCol_import = Aview.importColMap->MaxLID();
00544   }
00545 
00546   int C_numCols = C_lastCol - C_firstCol + 1;
00547   int C_numCols_import = C_lastCol_import - C_firstCol_import + 1;
00548 
00549   if (C_numCols_import > C_numCols) C_numCols = C_numCols_import;
00550 
00551   double* dwork = new double[C_numCols];
00552   int* iwork = new int[C_numCols];
00553 
00554   double* C_col_j = dwork;
00555   int* C_inds = iwork;
00556 
00557   //cout << "Aview: " << endl;
00558   //dumpCrsMatrixStruct(Aview);
00559 
00560   //cout << "Bview: " << endl;
00561   //dumpCrsMatrixStruct(Bview);
00562 
00563 
00564   int i, j, k;
00565 
00566   for(j=0; j<C_numCols; ++j) {
00567     C_col_j[j] = 0.0;
00568     C_inds[j] = -1;
00569   }
00570 
00571   int* A_col_inds = Aview.colMap->MyGlobalElements();
00572   int* A_col_inds_import = Aview.importColMap ?
00573     Aview.importColMap->MyGlobalElements() : 0;
00574 
00575   const Epetra_Map* Crowmap = &(C.RowMap());
00576 
00577   //To form C = A^T*B^T, we're going to execute this expression:
00578   //
00579   // C(i,j) = sum_k( A(k,i)*B(j,k) )
00580   //
00581   //Our goal, of course, is to navigate the data in A and B once, without
00582   //performing searches for column-indices, etc. In other words, we avoid
00583   //column-wise operations like the plague...
00584 
00585   int* Brows = Bview.rowMap->MyGlobalElements();
00586 
00587   //loop over the rows of B
00588   for(j=0; j<Bview.numRows; ++j) {
00589     int* Bindices_j = Bview.indices[j];
00590     double* Bvals_j = Bview.values[j];
00591 
00592     int global_col = Brows[j];
00593 
00594     //loop across columns in the j-th row of B and for each corresponding
00595     //row in A, loop across columns and accumulate product
00596     //A(k,i)*B(j,k) into our partial sum quantities in C_col_j. In other
00597     //words, as we stride across B(j,:), we use selected rows in A to
00598     //calculate updates for column j of the result matrix C.
00599 
00600     for(k=0; k<Bview.numEntriesPerRow[j]; ++k) {
00601 
00602       int bk = Bindices_j[k];
00603       double Bval = Bvals_j[k];
00604 
00605       int global_k;
00606       if (Bview.remote[j]) {
00607   global_k = Bview.importColMap->GID(bk);
00608       }
00609       else {
00610   global_k = Bview.colMap->GID(bk);
00611       }
00612 
00613       //get the corresponding row in A
00614       int ak = Aview.rowMap->LID(global_k);
00615       if (ak<0) {
00616   continue;
00617       }
00618 
00619       int* Aindices_k = Aview.indices[ak];
00620       double* Avals_k = Aview.values[ak];
00621 
00622       int C_len = 0;
00623 
00624       if (Aview.remote[ak]) {
00625   for(i=0; i<Aview.numEntriesPerRow[ak]; ++i) {
00626     C_col_j[C_len] = Avals_k[i]*Bval;
00627           C_inds[C_len++] = A_col_inds_import[Aindices_k[i]];
00628   }
00629       }
00630       else {
00631   for(i=0; i<Aview.numEntriesPerRow[ak]; ++i) {
00632     C_col_j[C_len] = Avals_k[i]*Bval;
00633           C_inds[C_len++] = A_col_inds[Aindices_k[i]];
00634   }
00635       }
00636 
00637       //Now loop across the C_col_j values and put non-zeros into C.
00638 
00639       for(i=0; i < C_len ; ++i) {
00640   if (C_col_j[i] == 0.0) continue;
00641 
00642   int global_row = C_inds[i];
00643   if (!Crowmap->MyGID(global_row)) {
00644     continue;
00645   }
00646 
00647   int err = C.SumIntoGlobalValues(global_row, 1, &(C_col_j[i]), &global_col);
00648 
00649   if (err < 0) {
00650     return(err);
00651   }
00652   else {
00653           if (err > 0) {
00654       err = C.InsertGlobalValues(global_row, 1, &(C_col_j[i]), &global_col);
00655       if (err < 0) {
00656               return(err);
00657             }
00658     }
00659   }
00660       }
00661 
00662     }
00663   }
00664 
00665   delete [] dwork;
00666   delete [] iwork;
00667 
00668   return(0);
00669 }
00670 
00671 int import_and_extract_views(const Epetra_CrsMatrix& M,
00672            const Epetra_Map& targetMap,
00673            CrsMatrixStruct& Mview)
00674 {
00675   //The goal of this method is to populate the 'Mview' struct with views of the
00676   //rows of M, including all rows that correspond to elements in 'targetMap'.
00677   //
00678   //If targetMap includes local elements that correspond to remotely-owned rows
00679   //of M, then those remotely-owned rows will be imported into
00680   //'Mview.importMatrix', and views of them will be included in 'Mview'.
00681 
00682   Mview.deleteContents();
00683 
00684   const Epetra_Map& Mrowmap = M.RowMap();
00685 
00686   int numProcs = Mrowmap.Comm().NumProc();
00687 
00688   Mview.numRows = targetMap.NumMyElements();
00689 
00690   int* Mrows = targetMap.MyGlobalElements();
00691 
00692   if (Mview.numRows > 0) {
00693     Mview.numEntriesPerRow = new int[Mview.numRows];
00694     Mview.indices = new int*[Mview.numRows];
00695     Mview.values = new double*[Mview.numRows];
00696     Mview.remote = new bool[Mview.numRows];
00697   }
00698 
00699   Mview.numRemote = 0;
00700 
00701   int i;
00702   for(i=0; i<Mview.numRows; ++i) {
00703     int mlid = Mrowmap.LID(Mrows[i]);
00704     if (mlid < 0) {
00705       Mview.remote[i] = true;
00706       ++Mview.numRemote;
00707     }
00708     else {
00709       EPETRA_CHK_ERR( M.ExtractMyRowView(mlid, Mview.numEntriesPerRow[i],
00710            Mview.values[i], Mview.indices[i]) );
00711       Mview.remote[i] = false;
00712     }
00713   }
00714 
00715   Mview.origRowMap = &(M.RowMap());
00716   Mview.rowMap = &targetMap;
00717   Mview.colMap = &(M.ColMap());
00718   Mview.domainMap = &(M.DomainMap());
00719   Mview.importColMap = NULL;
00720 
00721   if (numProcs < 2) {
00722     if (Mview.numRemote > 0) {
00723       cerr << "EpetraExt::MatrixMatrix::Multiply ERROR, numProcs < 2 but "
00724      << "attempting to import remote matrix rows."<<endl;
00725       return(-1);
00726     }
00727 
00728     //If only one processor we don't need to import any remote rows, so return.
00729     return(0);
00730   }
00731 
00732   //
00733   //Now we will import the needed remote rows of M, if the global maximum
00734   //value of numRemote is greater than 0.
00735   //
00736 
00737   int globalMaxNumRemote = 0;
00738   Mrowmap.Comm().MaxAll(&Mview.numRemote, &globalMaxNumRemote, 1);
00739 
00740   if (globalMaxNumRemote > 0) {
00741     //Create a map that describes the remote rows of M that we need.
00742 
00743     int* MremoteRows = Mview.numRemote>0 ? new int[Mview.numRemote] : NULL;
00744     int offset = 0;
00745     for(i=0; i<Mview.numRows; ++i) {
00746       if (Mview.remote[i]) {
00747   MremoteRows[offset++] = Mrows[i];
00748       }
00749     }
00750 
00751     Epetra_Map MremoteRowMap(-1, Mview.numRemote, MremoteRows,
00752            Mrowmap.IndexBase(), Mrowmap.Comm());
00753 
00754     //Create an importer with target-map MremoteRowMap and 
00755     //source-map Mrowmap.
00756     Epetra_Import importer(MremoteRowMap, Mrowmap);
00757 
00758     //Now create a new matrix into which we can import the remote rows of M
00759     //that we need.
00760     Mview.importMatrix = new Epetra_CrsMatrix(Copy, MremoteRowMap, 1);
00761 
00762     EPETRA_CHK_ERR( Mview.importMatrix->Import(M, importer, Insert) );
00763 
00764     EPETRA_CHK_ERR( Mview.importMatrix->FillComplete(M.DomainMap(), M.RangeMap()) );
00765 
00766     //Finally, use the freshly imported data to fill in the gaps in our views
00767     //of rows of M.
00768     for(i=0; i<Mview.numRows; ++i) {
00769       if (Mview.remote[i]) {
00770   int importLID = MremoteRowMap.LID(Mrows[i]);
00771   EPETRA_CHK_ERR( Mview.importMatrix->ExtractMyRowView(importLID,
00772               Mview.numEntriesPerRow[i],
00773               Mview.values[i],
00774               Mview.indices[i]) );
00775       }
00776     }
00777 
00778     Mview.importColMap = &(Mview.importMatrix->ColMap());
00779 
00780     delete [] MremoteRows;
00781   }
00782 
00783   return(0);
00784 }
00785 
00786 int distribute_list(const Epetra_Comm& Comm,
00787                     int lenSendList,
00788                     const int* sendList,
00789                     int& maxSendLen,
00790                     int*& recvList)
00791 {
00792   maxSendLen = 0; 
00793   Comm.MaxAll(&lenSendList, &maxSendLen, 1);
00794   int numProcs = Comm.NumProc();
00795   recvList = new int[numProcs*maxSendLen];
00796   int* send = new int[maxSendLen];
00797   for(int i=0; i<lenSendList; ++i) {
00798     send[i] = sendList[i];
00799   }
00800 
00801   Comm.GatherAll(send, recvList, maxSendLen);
00802   delete [] send;
00803 
00804   return(0);
00805 }
00806 
00807 Epetra_Map* create_map_from_imported_rows(const Epetra_Map* map,
00808             int totalNumSend,
00809             int* sendRows,
00810             int numProcs,
00811             int* numSendPerProc)
00812 {
00813   //Perform sparse all-to-all communication to send the row-GIDs
00814   //in sendRows to appropriate processors according to offset
00815   //information in numSendPerProc.
00816   //Then create and return a map containing the rows that we
00817   //received on the local processor.
00818 
00819   Epetra_Distributor* distributor = map->Comm().CreateDistributor();
00820 
00821   int* sendPIDs = totalNumSend>0 ? new int[totalNumSend] : NULL;
00822   int offset = 0;
00823   for(int i=0; i<numProcs; ++i) {
00824     for(int j=0; j<numSendPerProc[i]; ++j) {
00825       sendPIDs[offset++] = i;
00826     }
00827   }
00828 
00829   int numRecv = 0;
00830   int err = distributor->CreateFromSends(totalNumSend, sendPIDs,
00831            true, numRecv);
00832   assert( err == 0 );
00833 
00834   char* c_recv_objs = numRecv>0 ? new char[numRecv*sizeof(int)] : NULL;
00835   int num_c_recv = numRecv*(int)sizeof(int);
00836 
00837   err = distributor->Do(reinterpret_cast<char*>(sendRows),
00838       (int)sizeof(int), num_c_recv, c_recv_objs);
00839   assert( err == 0 );
00840 
00841   int* recvRows = reinterpret_cast<int*>(c_recv_objs);
00842 
00843   //Now create a map with the rows we've received from other processors.
00844   Epetra_Map* import_rows = new Epetra_Map(-1, numRecv, recvRows,
00845              map->IndexBase(), map->Comm());
00846 
00847   delete [] c_recv_objs;
00848   delete [] sendPIDs;
00849 
00850   delete distributor;
00851 
00852   return( import_rows );
00853 }
00854 
00855 int form_map_union(const Epetra_Map* map1,
00856        const Epetra_Map* map2,
00857        const Epetra_Map*& mapunion)
00858 {
00859   //form the union of two maps
00860 
00861   if (map1 == NULL) {
00862     mapunion = new Epetra_Map(*map2);
00863     return(0);
00864   }
00865 
00866   if (map2 == NULL) {
00867     mapunion = new Epetra_Map(*map1);
00868     return(0);
00869   }
00870 
00871   int map1_len       = map1->NumMyElements();
00872   int* map1_elements = map1->MyGlobalElements();
00873   int map2_len       = map2->NumMyElements();
00874   int* map2_elements = map2->MyGlobalElements();
00875 
00876   int* union_elements = new int[map1_len+map2_len];
00877 
00878   int map1_offset = 0, map2_offset = 0, union_offset = 0;
00879 
00880   while(map1_offset < map1_len && map2_offset < map2_len) {
00881     int map1_elem = map1_elements[map1_offset];
00882     int map2_elem = map2_elements[map2_offset];
00883 
00884     if (map1_elem < map2_elem) {
00885       union_elements[union_offset++] = map1_elem;
00886       ++map1_offset;
00887     }
00888     else if (map1_elem > map2_elem) {
00889       union_elements[union_offset++] = map2_elem;
00890       ++map2_offset;
00891     }
00892     else {
00893       union_elements[union_offset++] = map1_elem;
00894       ++map1_offset;
00895       ++map2_offset;
00896     }
00897   }
00898 
00899   int i;
00900   for(i=map1_offset; i<map1_len; ++i) {
00901     union_elements[union_offset++] = map1_elements[i];
00902   }
00903 
00904   for(i=map2_offset; i<map2_len; ++i) {
00905     union_elements[union_offset++] = map2_elements[i];
00906   }
00907 
00908   mapunion = new Epetra_Map(-1, union_offset, union_elements,
00909           map1->IndexBase(), map1->Comm());
00910 
00911   delete [] union_elements;
00912 
00913   return(0);
00914 }
00915 
00916 Epetra_Map* find_rows_containing_cols(const Epetra_CrsMatrix& M,
00917                                       const Epetra_Map* colmap)
00918 {
00919   //The goal of this function is to find all rows in the matrix M that contain
00920   //column-indices which are in 'colmap'. A map containing those rows is
00921   //returned.
00922 
00923   int numProcs = colmap->Comm().NumProc();
00924   int localProc = colmap->Comm().MyPID();
00925 
00926   if (numProcs < 2) {
00927     Epetra_Map* result_map = NULL;
00928 
00929     int err = form_map_union(&(M.RowMap()), NULL,
00930                              (const Epetra_Map*&)result_map);
00931     if (err != 0) {
00932       return(NULL);
00933     }
00934     return(result_map);
00935   }
00936 
00937   int MnumRows = M.NumMyRows();
00938   int numCols = colmap->NumMyElements();
00939 
00940   int* iwork = new int[numCols+2*numProcs+numProcs*MnumRows];
00941   int iworkOffset = 0;
00942 
00943   int* cols = &(iwork[iworkOffset]); iworkOffset += numCols;
00944 
00945   cols[0] = numCols;
00946   colmap->MyGlobalElements( &(cols[1]) );
00947 
00948   //cols are not necessarily sorted at this point, so we'll make sure
00949   //they are sorted.
00950   Epetra_Util util;
00951   util.Sort(true, numCols, &(cols[1]), 0, NULL, 0, NULL);
00952 
00953   int* all_proc_cols = NULL;
00954   
00955   int max_num_cols;
00956   distribute_list(colmap->Comm(), numCols+1, cols, max_num_cols, all_proc_cols);
00957 
00958   const Epetra_CrsGraph& Mgraph = M.Graph();
00959   const Epetra_Map& Mrowmap = M.RowMap();
00960   const Epetra_Map& Mcolmap = M.ColMap();
00961   int MminMyLID = Mrowmap.MinLID();
00962 
00963   int* procNumCols = &(iwork[iworkOffset]); iworkOffset += numProcs;
00964   int* procNumRows = &(iwork[iworkOffset]); iworkOffset += numProcs;
00965   int* procRows_1D = &(iwork[iworkOffset]);
00966   int** procCols = new int*[numProcs];
00967   int** procRows = new int*[numProcs];
00968   int i, err;
00969   int offset = 0;
00970   for(i=0; i<numProcs; ++i) {
00971     procNumCols[i] = all_proc_cols[offset];
00972     procCols[i] = &(all_proc_cols[offset+1]);
00973     offset += max_num_cols;
00974 
00975     procNumRows[i] = 0;
00976     procRows[i] = &(procRows_1D[i*MnumRows]);
00977   }
00978 
00979   int* Mindices;
00980 
00981   for(int row=0; row<MnumRows; ++row) {
00982     int localRow = MminMyLID+row;
00983     int globalRow = Mrowmap.GID(localRow);
00984     int MnumCols;
00985     err = Mgraph.ExtractMyRowView(localRow, MnumCols, Mindices);
00986     if (err != 0) {
00987       cerr << "proc "<<localProc<<", error in Mgraph.ExtractMyRowView, row "
00988            <<localRow<<endl;
00989       return(NULL);
00990     }
00991 
00992     for(int j=0; j<MnumCols; ++j) {
00993       int colGID = Mcolmap.GID(Mindices[j]);
00994 
00995       for(int p=0; p<numProcs; ++p) {
00996         if (p==localProc) continue;
00997 
00998         int insertPoint;
00999         int foundOffset = Epetra_Util_binary_search(colGID, procCols[p],
01000                                                     procNumCols[p], insertPoint);
01001         if (foundOffset > -1) {
01002           int numRowsP = procNumRows[p];
01003           int* prows = procRows[p];
01004           if (numRowsP < 1 || prows[numRowsP-1] < globalRow) {
01005             prows[numRowsP] = globalRow;
01006             procNumRows[p]++;
01007           }
01008         }
01009       }
01010     }
01011   }
01012 
01013   //Now make the contents of procRows occupy a contiguous section
01014   //of procRows_1D.
01015   offset = procNumRows[0];
01016   for(i=1; i<numProcs; ++i) {
01017     for(int j=0; j<procNumRows[i]; ++j) {
01018       procRows_1D[offset++] = procRows[i][j];
01019     }
01020   }
01021 
01022   int totalNumSend = offset;
01023   //Next we will do a sparse all-to-all communication to send the lists of rows
01024   //to the appropriate processors, and create a map with the rows we've received
01025   //from other processors.
01026   Epetra_Map* recvd_rows =
01027     create_map_from_imported_rows(&Mrowmap, totalNumSend,
01028                                   procRows_1D, numProcs, procNumRows);
01029 
01030   Epetra_Map* result_map = NULL;
01031 
01032   err = form_map_union(&(M.RowMap()), recvd_rows, (const Epetra_Map*&)result_map);
01033   if (err != 0) {
01034     return(NULL);
01035   }
01036 
01037   delete [] iwork;
01038   delete [] procCols;
01039   delete [] procRows;
01040   delete [] all_proc_cols;
01041   delete recvd_rows;
01042 
01043   return(result_map);
01044 }
01045 
01046 int MatrixMatrix::Multiply(const Epetra_CrsMatrix& A,
01047          bool transposeA,
01048          const Epetra_CrsMatrix& B,
01049          bool transposeB,
01050          Epetra_CrsMatrix& C,
01051                            bool call_FillComplete_on_result)
01052 {
01053   //
01054   //This method forms the matrix-matrix product C = op(A) * op(B), where
01055   //op(A) == A   if transposeA is false,
01056   //op(A) == A^T if transposeA is true,
01057   //and similarly for op(B).
01058   //
01059 
01060   //A and B should already be Filled.
01061   //(Should we go ahead and call FillComplete() on them if necessary?
01062   // or error out? For now, we choose to error out.)
01063   if (!A.Filled() || !B.Filled()) {
01064     EPETRA_CHK_ERR(-1);
01065   }
01066 
01067   //We're going to refer to the different combinations of op(A) and op(B)
01068   //as scenario 1 through 4.
01069 
01070   int scenario = 1;//A*B
01071   if (transposeB && !transposeA) scenario = 2;//A*B^T
01072   if (transposeA && !transposeB) scenario = 3;//A^T*B
01073   if (transposeA && transposeB)  scenario = 4;//A^T*B^T
01074 
01075   //now check size compatibility
01076   int Aouter = transposeA ? A.NumGlobalCols() : A.NumGlobalRows();
01077   int Bouter = transposeB ? B.NumGlobalRows() : B.NumGlobalCols();
01078   int Ainner = transposeA ? A.NumGlobalRows() : A.NumGlobalCols();
01079   int Binner = transposeB ? B.NumGlobalCols() : B.NumGlobalRows();
01080   if (Ainner != Binner) {
01081     cerr << "MatrixMatrix::Multiply: ERROR, inner dimensions of op(A) and op(B) "
01082          << "must match for matrix-matrix product. op(A) is "
01083          <<Aouter<<"x"<<Ainner << ", op(B) is "<<Binner<<"x"<<Bouter<<endl;
01084     return(-1);
01085   }
01086 
01087   //The result matrix C must at least have a row-map that reflects the
01088   //correct row-size. Don't check the number of columns because rectangular
01089   //matrices which were constructed with only one map can still end up
01090   //having the correct capacity and dimensions when filled.
01091   if (Aouter > C.NumGlobalRows()) {
01092     cerr << "MatrixMatrix::Multiply: ERROR, dimensions of result C must "
01093          << "match dimensions of op(A) * op(B). C has "<<C.NumGlobalRows()
01094          << " rows, should have at least "<<Aouter << endl;
01095     return(-1);
01096   }
01097 
01098   //It doesn't matter whether C is already Filled or not. If it is already
01099   //Filled, it must have space allocated for the positions that will be
01100   //referenced in forming C = op(A)*op(B). If it doesn't have enough space,
01101   //we'll error out later when trying to store result values.
01102 
01103   //We're going to need to import remotely-owned sections of A and/or B
01104   //if more than 1 processor is performing this run, depending on the scenario.
01105   int numProcs = A.Comm().NumProc();
01106 
01107   //If we are to use the transpose of A and/or B, we'll need to be able to 
01108   //access, on the local processor, all rows that contain column-indices in
01109   //the domain-map.
01110   const Epetra_Map* domainMap_A = &(A.DomainMap());
01111   const Epetra_Map* domainMap_B = &(B.DomainMap());
01112 
01113   const Epetra_Map* rowmap_A = &(A.RowMap());
01114   const Epetra_Map* rowmap_B = &(B.RowMap());
01115 
01116   //Declare some 'work-space' maps which may be created depending on
01117   //the scenario, and which will be deleted before exiting this function.
01118   const Epetra_Map* workmap1 = NULL;
01119   const Epetra_Map* workmap2 = NULL;
01120   const Epetra_Map* mapunion1 = NULL;
01121 
01122   //Declare a couple of structs that will be used to hold views of the data
01123   //of A and B, to be used for fast access during the matrix-multiplication.
01124   CrsMatrixStruct Aview;
01125   CrsMatrixStruct Bview;
01126 
01127   const Epetra_Map* targetMap_A = rowmap_A;
01128   const Epetra_Map* targetMap_B = rowmap_B;
01129 
01130   if (numProcs > 1) {
01131     //If op(A) = A^T, find all rows of A that contain column-indices in the
01132     //local portion of the domain-map. (We'll import any remote rows
01133     //that fit this criteria onto the local processor.)
01134     if (transposeA) {
01135       workmap1 = find_rows_containing_cols(A, domainMap_A);
01136       targetMap_A = workmap1;
01137     }
01138   }
01139 
01140   //Now import any needed remote rows and populate the Aview struct.
01141   EPETRA_CHK_ERR( import_and_extract_views(A, *targetMap_A, Aview) );
01142 
01143   //We will also need local access to all rows of B that correspond to the
01144   //column-map of op(A).
01145   if (numProcs > 1) {
01146     const Epetra_Map* colmap_op_A = NULL;
01147     if (transposeA) {
01148       colmap_op_A = targetMap_A;
01149     }
01150     else {
01151       colmap_op_A = &(A.ColMap());
01152     }
01153 
01154     targetMap_B = colmap_op_A;
01155 
01156     //If op(B) = B^T, find all rows of B that contain column-indices in the
01157     //local-portion of the domain-map, or in the column-map of op(A).
01158     //We'll import any remote rows that fit this criteria onto the
01159     //local processor.
01160     if (transposeB) {
01161       EPETRA_CHK_ERR( form_map_union(colmap_op_A, domainMap_B, mapunion1) );
01162       workmap2 = find_rows_containing_cols(B, mapunion1);
01163       targetMap_B = workmap2;
01164     }
01165   }
01166 
01167   //Now import any needed remote rows and populate the Bview struct.
01168   EPETRA_CHK_ERR( import_and_extract_views(B, *targetMap_B, Bview) );
01169 
01170   //If the result matrix C is not already FillComplete'd, we will do a
01171   //preprocessing step to create the nonzero structure, then call FillComplete,
01172   if (!C.Filled()) {
01173     CrsWrapper_GraphBuilder crsgraphbuilder(C.RowMap());
01174 
01175     //pass the graph-builder object to the multiplication kernel to fill in all
01176     //the nonzero positions that will be used in the result matrix.
01177     switch(scenario) {
01178     case 1:    EPETRA_CHK_ERR( mult_A_B(Aview, Bview, crsgraphbuilder) );
01179       break;
01180     case 2:    EPETRA_CHK_ERR( mult_A_Btrans(Aview, Bview, crsgraphbuilder) );
01181       break;
01182     case 3:    EPETRA_CHK_ERR( mult_Atrans_B(Aview, Bview, crsgraphbuilder) );
01183       break;
01184     case 4:    EPETRA_CHK_ERR( mult_Atrans_Btrans(Aview, Bview, crsgraphbuilder) );
01185       break;
01186     }
01187 
01188     //now insert all of the nonzero positions into the result matrix.
01189     insert_matrix_locations(crsgraphbuilder, C);
01190 
01191     if (call_FillComplete_on_result) {
01192       const Epetra_Map* domainmap =
01193         transposeB ? &(B.RangeMap()) : &(B.DomainMap());
01194 
01195       const Epetra_Map* rangemap =
01196         transposeA ? &(A.DomainMap()) : &(A.RangeMap());
01197 
01198       EPETRA_CHK_ERR( C.FillComplete(*domainmap, *rangemap) );
01199       call_FillComplete_on_result = false;
01200     }
01201   }
01202 
01203   //Pre-zero the result matrix:
01204   C.PutScalar(0.0);
01205 
01206   //Now call the appropriate method to perform the actual multiplication.
01207 
01208   CrsWrapper_Epetra_CrsMatrix ecrsmat(C);
01209 
01210   switch(scenario) {
01211   case 1:    EPETRA_CHK_ERR( mult_A_B(Aview, Bview, ecrsmat) );
01212     break;
01213   case 2:    EPETRA_CHK_ERR( mult_A_Btrans(Aview, Bview, ecrsmat) );
01214     break;
01215   case 3:    EPETRA_CHK_ERR( mult_Atrans_B(Aview, Bview, ecrsmat) );
01216     break;
01217   case 4:    EPETRA_CHK_ERR( mult_Atrans_Btrans(Aview, Bview, ecrsmat) );
01218     break;
01219   }
01220 
01221   if (call_FillComplete_on_result) {
01222     //We'll call FillComplete on the C matrix before we exit, and give
01223     //it a domain-map and a range-map.
01224     //The domain-map will be the domain-map of B, unless
01225     //op(B)==transpose(B), in which case the range-map of B will be used.
01226     //The range-map will be the range-map of A, unless
01227     //op(A)==transpose(A), in which case the domain-map of A will be used.
01228 
01229     const Epetra_Map* domainmap =
01230       transposeB ? &(B.RangeMap()) : &(B.DomainMap());
01231 
01232     const Epetra_Map* rangemap =
01233       transposeA ? &(A.DomainMap()) : &(A.RangeMap());
01234 
01235     if (!C.Filled()) {
01236       EPETRA_CHK_ERR( C.FillComplete(*domainmap, *rangemap) );
01237     }
01238   }
01239 
01240   //Finally, delete the objects that were potentially created
01241   //during the course of importing remote sections of A and B.
01242 
01243   delete mapunion1; mapunion1 = NULL;
01244   delete workmap1; workmap1 = NULL;
01245   delete workmap2; workmap2 = NULL;
01246 
01247   return(0);
01248 }
01249 
01250 int MatrixMatrix::Add(const Epetra_CrsMatrix& A,
01251                       bool transposeA,
01252                       double scalarA,
01253                       Epetra_CrsMatrix& B,
01254                       double scalarB )
01255 {
01256   //
01257   //This method forms the matrix-matrix sum B = scalarA * op(A) + scalarB * B, where
01258 
01259   //A should already be Filled. It doesn't matter whether B is
01260   //already Filled, but if it is, then its graph must already contain
01261   //all nonzero locations that will be referenced in forming the
01262   //sum.
01263 
01264   if (!A.Filled() ) {
01265     std::cerr << "EpetraExt::MatrixMatrix::Add ERROR, input matrix A.Filled() is false, it is required to be true. (Result matrix B is not required to be Filled())."<<std::endl;
01266     EPETRA_CHK_ERR(-1);
01267   }
01268 
01269   //explicit tranpose A formed as necessary
01270   Epetra_CrsMatrix * Aprime = 0;
01271   EpetraExt::RowMatrix_Transpose * Atrans = 0;
01272   if( transposeA )
01273   {
01274     Atrans = new EpetraExt::RowMatrix_Transpose();
01275     Aprime = &(dynamic_cast<Epetra_CrsMatrix&>(((*Atrans)(const_cast<Epetra_CrsMatrix&>(A)))));
01276   }
01277   else
01278     Aprime = const_cast<Epetra_CrsMatrix*>(&A);
01279 
01280   int MaxNumEntries = EPETRA_MAX( A.MaxNumEntries(), B.MaxNumEntries() );
01281   int A_NumEntries, B_NumEntries;
01282   int * A_Indices = new int[MaxNumEntries];
01283   double * A_Values = new double[MaxNumEntries];
01284   int* B_Indices;
01285   double* B_Values;
01286 
01287   int NumMyRows = B.NumMyRows();
01288   int Row, err;
01289   int ierr = 0;
01290 
01291   if( scalarA )
01292   {
01293     //Loop over B's rows and sum into
01294     for( int i = 0; i < NumMyRows; ++i )
01295     {
01296       Row = B.GRID(i);
01297       EPETRA_CHK_ERR( Aprime->ExtractGlobalRowCopy( Row, MaxNumEntries, A_NumEntries, A_Values, A_Indices ) );
01298 
01299       if (scalarB != 1.0) {
01300         if (!B.Filled()) {
01301           EPETRA_CHK_ERR( B.ExtractGlobalRowView( Row, B_NumEntries,
01302                                                   B_Values, B_Indices));
01303         }
01304         else {
01305           EPETRA_CHK_ERR( B.ExtractMyRowView( i, B_NumEntries,
01306                                               B_Values, B_Indices));
01307         }
01308 
01309         for(int jj=0; jj<B_NumEntries; ++jj) {
01310           B_Values[jj] = scalarB*B_Values[jj];
01311         }
01312       }
01313 
01314       if( scalarA != 1.0 ) {
01315         for( int j = 0; j < A_NumEntries; ++j ) A_Values[j] *= scalarA;
01316       }
01317 
01318       if( B.Filled() ) {//Sum In Values
01319         err = B.SumIntoGlobalValues( Row, A_NumEntries, A_Values, A_Indices );
01320         assert( err >= 0 );
01321         if (err < 0) ierr = err;
01322       }
01323       else {
01324         err = B.InsertGlobalValues( Row, A_NumEntries, A_Values, A_Indices );
01325         assert( err == 0 || err == 1 || err == 3 );
01326         if (err < 0) ierr = err;
01327       }
01328     }
01329   }
01330   else {
01331     EPETRA_CHK_ERR( B.Scale(scalarB) );
01332   }
01333 
01334   delete [] A_Indices;
01335   delete [] A_Values;
01336 
01337   if( Atrans ) delete Atrans;
01338 
01339   return(ierr);
01340 }
01341 
01342 int MatrixMatrix::Add(const Epetra_CrsMatrix& A,
01343                       bool transposeA,
01344                       double scalarA,
01345                       const Epetra_CrsMatrix & B,
01346                       bool transposeB,
01347                       double scalarB,
01348                       Epetra_CrsMatrix * & C)
01349 {
01350   //
01351   //This method forms the matrix-matrix sum C = scalarA * op(A) + scalarB * op(B), where
01352 
01353   //A and B should already be Filled. C should be an empty pointer.
01354 
01355   if (!A.Filled() || !B.Filled() ) {
01356      std::cerr << "EpetraExt::MatrixMatrix::Add ERROR, input matrix A.Filled() or B.Filled() is false,"
01357                << "they are required to be true. (Result matrix C should be an empty pointer)" << std::endl;
01358      EPETRA_CHK_ERR(-1);
01359   }
01360 
01361   Epetra_CrsMatrix * Aprime = 0, * Bprime=0;
01362   EpetraExt::RowMatrix_Transpose * Atrans = 0,* Btrans = 0;
01363 
01364   //explicit tranpose A formed as necessary
01365   if( transposeA ) {
01366      Atrans = new EpetraExt::RowMatrix_Transpose();
01367      Aprime = &(dynamic_cast<Epetra_CrsMatrix&>(((*Atrans)(const_cast<Epetra_CrsMatrix&>(A)))));
01368   }
01369   else
01370      Aprime = const_cast<Epetra_CrsMatrix*>(&A);
01371 
01372   //explicit tranpose B formed as necessary
01373   if( transposeB ) {
01374      Btrans = new EpetraExt::RowMatrix_Transpose();
01375      Bprime = &(dynamic_cast<Epetra_CrsMatrix&>(((*Btrans)(const_cast<Epetra_CrsMatrix&>(B)))));
01376   }
01377   else
01378      Bprime = const_cast<Epetra_CrsMatrix*>(&B);
01379 
01380   // allocate or zero the new matrix
01381   if(C!=0)
01382      C->PutScalar(0.0);
01383   else
01384      C = new Epetra_CrsMatrix(Copy,Aprime->RowMap(),0);
01385 
01386   // build arrays  for easy resuse
01387   int ierr = 0;
01388   Epetra_CrsMatrix * Mat[] = { Aprime,Bprime};
01389   double scalar[] = { scalarA, scalarB};
01390 
01391   // do a loop over each matrix to add: A reordering might be more efficient
01392   for(int k=0;k<2;k++) {
01393      int MaxNumEntries = Mat[k]->MaxNumEntries();
01394      int NumEntries;
01395      int * Indices = new int[MaxNumEntries];
01396      double * Values = new double[MaxNumEntries];
01397    
01398      int NumMyRows = Mat[k]->NumMyRows();
01399      int Row, err;
01400      int ierr = 0;
01401    
01402      //Loop over rows and sum into C
01403      for( int i = 0; i < NumMyRows; ++i ) {
01404         Row = Mat[k]->GRID(i);
01405         EPETRA_CHK_ERR( Mat[k]->ExtractGlobalRowCopy( Row, MaxNumEntries, NumEntries, Values, Indices));
01406    
01407         if( scalar[k] != 1.0 )
01408            for( int j = 0; j < NumEntries; ++j ) Values[j] *= scalar[k];
01409    
01410         if(C->Filled()) { // Sum in values
01411            err = C->SumIntoGlobalValues( Row, NumEntries, Values, Indices );
01412            if (err < 0) ierr = err;
01413         } else { // just add it to the unfilled CRS Matrix
01414            err = C->InsertGlobalValues( Row, NumEntries, Values, Indices );
01415            if (err < 0) ierr = err;
01416         }
01417      }
01418 
01419      delete [] Indices;
01420      delete [] Values;
01421   }
01422 
01423   if( Atrans ) delete Atrans;
01424   if( Btrans ) delete Btrans;
01425 
01426   return(ierr);
01427 }
01428 
01429 } // namespace EpetraExt
01430 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines