EpetraExt_MatrixMatrix.cpp

Go to the documentation of this file.
00001 //@HEADER
00002 // ***********************************************************************
00003 // 
00004 //     EpetraExt: Epetra Extended - Linear Algebra Services Package
00005 //                 Copyright (2001) Sandia Corporation
00006 // 
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 // 
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //  
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //  
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00025 // 
00026 // ***********************************************************************
00027 //@HEADER
00028 
00029 #include <EpetraExt_ConfigDefs.h>
00030 #include <EpetraExt_MatrixMatrix.h>
00031 
00032 #include <EpetraExt_Transpose_RowMatrix.h>
00033 
00034 #include <Epetra_Export.h>
00035 #include <Epetra_Import.h>
00036 #include <Epetra_Util.h>
00037 #include <Epetra_Map.h>
00038 #include <Epetra_Comm.h>
00039 #include <Epetra_CrsMatrix.h>
00040 #include <Epetra_Directory.h>
00041 #include <Epetra_HashTable.h>
00042 #include <Epetra_Distributor.h>
00043 
00044 #ifdef HAVE_VECTOR
00045 #include <vector>
00046 #endif
00047 
00048 namespace EpetraExt {
00049 
00050 //
00051 //Method for internal use... sparsedot forms a dot-product between two
00052 //sparsely-populated 'vectors'.
00053 //Important assumption: assumes the indices in u_ind and v_ind are sorted.
00054 //
00055 double sparsedot(double* u, int* u_ind, int u_len,
00056      double* v, int* v_ind, int v_len)
00057 {
00058   double result = 0.0;
00059 
00060   int v_idx = 0;
00061   int u_idx = 0;
00062 
00063   while(v_idx < v_len && u_idx < u_len) {
00064     int ui = u_ind[u_idx];
00065     int vi = v_ind[v_idx];
00066 
00067     if (ui < vi) {
00068       ++u_idx;
00069     }
00070     else if (ui > vi) {
00071       ++v_idx;
00072     }
00073     else {
00074       result += u[u_idx++]*v[v_idx++];
00075     }
00076   }
00077 
00078   return(result);
00079 }
00080 
00081 //struct that holds views of the contents of a CrsMatrix. These
00082 //contents may be a mixture of local and remote rows of the
00083 //original matrix. 
00084 class CrsMatrixStruct {
00085 public:
00086   CrsMatrixStruct()
00087     : numRows(0), numEntriesPerRow(NULL), indices(NULL), values(NULL),
00088       remote(NULL), numRemote(0), rowMap(NULL), colMap(NULL),
00089       domainMap(NULL), importColMap(NULL), importMatrix(NULL)
00090   {}
00091 
00092   virtual ~CrsMatrixStruct()
00093   {
00094     deleteContents();
00095   }
00096 
00097   void deleteContents()
00098   {
00099     numRows = 0;
00100     delete [] numEntriesPerRow; numEntriesPerRow = NULL;
00101     delete [] indices; indices = NULL;
00102     delete [] values; values = NULL;
00103     delete [] remote; remote = NULL;
00104     numRemote = 0;
00105     delete importMatrix;
00106   }
00107 
00108   int numRows;
00109   int* numEntriesPerRow;
00110   int** indices;
00111   double** values;
00112   bool* remote;
00113   int numRemote;
00114   const Epetra_Map* origRowMap;
00115   const Epetra_Map* rowMap;
00116   const Epetra_Map* colMap;
00117   const Epetra_Map* domainMap;
00118   const Epetra_Map* importColMap;
00119   Epetra_CrsMatrix* importMatrix;
00120 };
00121 
00122 int dumpCrsMatrixStruct(const CrsMatrixStruct& M)
00123 {
00124   cout << "proc " << M.rowMap->Comm().MyPID()<<endl;
00125   cout << "numRows: " << M.numRows<<endl;
00126   for(int i=0; i<M.numRows; ++i) {
00127     for(int j=0; j<M.numEntriesPerRow[i]; ++j) {
00128       if (M.remote[i]) {
00129   cout << "  *"<<M.rowMap->GID(i)<<"   "
00130        <<M.importColMap->GID(M.indices[i][j])<<"   "<<M.values[i][j]<<endl;
00131       }
00132       else {
00133   cout << "   "<<M.rowMap->GID(i)<<"   "
00134        <<M.colMap->GID(M.indices[i][j])<<"   "<<M.values[i][j]<<endl;
00135       }
00136     }
00137   }
00138   return(0);
00139 }
00140 
00141 //kernel method for computing the local portion of C = A*B
00142 int mult_A_B(CrsMatrixStruct& Aview,
00143        CrsMatrixStruct& Bview,
00144        Epetra_CrsMatrix& C)
00145 {
00146   int C_firstCol = Bview.colMap->MinLID();
00147   int C_lastCol = Bview.colMap->MaxLID();
00148 
00149   int C_firstCol_import = 0;
00150   int C_lastCol_import = -1;
00151 
00152   int* bcols = Bview.colMap->MyGlobalElements();
00153   int* bcols_import = NULL;
00154   if (Bview.importColMap != NULL) {
00155     C_firstCol_import = Bview.importColMap->MinLID();
00156     C_lastCol_import = Bview.importColMap->MaxLID();
00157 
00158     bcols_import = Bview.importColMap->MyGlobalElements();
00159   }
00160 
00161   int C_numCols = C_lastCol - C_firstCol + 1;
00162   int C_numCols_import = C_lastCol_import - C_firstCol_import + 1;
00163 
00164   if (C_numCols_import > C_numCols) C_numCols = C_numCols_import;
00165   double* dwork = new double[C_numCols];
00166   int* iwork = new int[C_numCols];
00167 
00168   double* C_row_i = dwork;
00169   int* C_cols = iwork;
00170 
00171   int C_row_i_length, i, j, k;
00172 
00173   //To form C = A*B we're going to execute this expression:
00174   //
00175   // C(i,j) = sum_k( A(i,k)*B(k,j) )
00176   //
00177   //Our goal, of course, is to navigate the data in A and B once, without
00178   //performing searches for column-indices, etc.
00179 
00180   bool C_filled = C.Filled();
00181 
00182   //loop over the rows of A.
00183   for(i=0; i<Aview.numRows; ++i) {
00184 
00185     //only navigate the local portion of Aview... (It's probable that we
00186     //imported more of A than we need for A*B, because other cases like A^T*B 
00187     //need the extra rows.)
00188     if (Aview.remote[i]) {
00189       continue;
00190     }
00191 
00192     int* Aindices_i = Aview.indices[i];
00193     double* Aval_i  = Aview.values[i];
00194 
00195     int global_row = Aview.rowMap->GID(i);
00196 
00197     //loop across the i-th row of A and for each corresponding row
00198     //in B, loop across colums and accumulate product
00199     //A(i,k)*B(k,j) into our partial sum quantities C_row_i. In other words,
00200     //as we stride across B(k,:) we're calculating updates for row i of the
00201     //result matrix C.
00202 
00203     for(k=0; k<Aview.numEntriesPerRow[i]; ++k) {
00204       int Ak = Bview.rowMap->LID(Aview.colMap->GID(Aindices_i[k]));
00205       double Aval = Aval_i[k];
00206 
00207       int* Bcol_inds = Bview.indices[Ak];
00208       double* Bvals_k = Bview.values[Ak];
00209 
00210       C_row_i_length = 0;
00211 
00212       if (Bview.remote[Ak]) {
00213   for(j=0; j<Bview.numEntriesPerRow[Ak]; ++j) {
00214     C_row_i[C_row_i_length] = Aval*Bvals_k[j];
00215           C_cols[C_row_i_length++] = bcols_import[Bcol_inds[j]];
00216   }
00217       }
00218       else {
00219   for(j=0; j<Bview.numEntriesPerRow[Ak]; ++j) {
00220     C_row_i[C_row_i_length] = Aval*Bvals_k[j];
00221           C_cols[C_row_i_length++] = bcols[Bcol_inds[j]];
00222   }
00223       }
00224 
00225       //
00226       //Now put the C_row_i values into C.
00227       //
00228 
00229       int err = C_filled ?
00230           C.SumIntoGlobalValues(global_row, C_row_i_length, C_row_i, C_cols)
00231           :
00232           C.InsertGlobalValues(global_row, C_row_i_length, C_row_i, C_cols);
00233  
00234       if (err < 0) {
00235         return(err);
00236       }
00237       if (err > 0) {
00238         if (C_filled) {
00239           //C is Filled, and doesn't
00240           //have all the necessary nonzero locations.
00241           return(err);
00242         }
00243       }
00244     }
00245   }
00246 
00247   delete [] dwork;
00248   delete [] iwork;
00249 
00250   return(0);
00251 }
00252 
00253 //kernel method for computing the local portion of C = A*B^T
00254 int mult_A_Btrans(CrsMatrixStruct& Aview,
00255       CrsMatrixStruct& Bview,
00256       Epetra_CrsMatrix& C)
00257 {
00258   int i, j, k;
00259   int returnValue = 0;
00260 
00261   int maxlen = 0;
00262   for(i=0; i<Aview.numRows; ++i) {
00263     if (Aview.numEntriesPerRow[i] > maxlen) maxlen = Aview.numEntriesPerRow[i];
00264   }
00265   for(i=0; i<Bview.numRows; ++i) {
00266     if (Bview.numEntriesPerRow[i] > maxlen) maxlen = Bview.numEntriesPerRow[i];
00267   }
00268 
00269   //cout << "Aview: " << endl;
00270   //dumpCrsMatrixStruct(Aview);
00271 
00272   //cout << "Bview: " << endl;
00273   //dumpCrsMatrixStruct(Bview);
00274 
00275   int numBcols = Bview.colMap->NumMyElements();
00276   int numBrows = Bview.numRows;
00277 
00278   int iworklen = maxlen*2 + numBcols;
00279   int* iwork = new int[iworklen];
00280 
00281   int* bcols = iwork+maxlen*2;
00282   int* bgids = Bview.colMap->MyGlobalElements();
00283   double* bvals = new double[maxlen*2];
00284   double* avals = bvals+maxlen;
00285 
00286   int max_all_b = Bview.colMap->MaxAllGID();
00287   int min_all_b = Bview.colMap->MinAllGID();
00288 
00289   //bcols will hold the GIDs from B's column-map for fast access
00290   //during the computations below
00291   for(i=0; i<numBcols; ++i) {
00292     int blid = Bview.colMap->LID(bgids[i]);
00293     bcols[blid] = bgids[i];
00294   }
00295 
00296   //next create arrays indicating the first and last column-index in
00297   //each row of B, so that we can know when to skip certain rows below.
00298   //This will provide a large performance gain for banded matrices, and
00299   //a somewhat smaller gain for *most* other matrices.
00300   int* b_firstcol = new int[2*numBrows];
00301   int* b_lastcol = b_firstcol+numBrows;
00302   int temp;
00303   for(i=0; i<numBrows; ++i) {
00304     b_firstcol[i] = max_all_b;
00305     b_lastcol[i] = min_all_b;
00306 
00307     int Blen_i = Bview.numEntriesPerRow[i];
00308     if (Blen_i < 1) continue;
00309     int* Bindices_i = Bview.indices[i];
00310 
00311     if (Bview.remote[i]) {
00312       for(k=0; k<Blen_i; ++k) {
00313         temp = Bview.importColMap->GID(Bindices_i[k]);
00314         if (temp < b_firstcol[i]) b_firstcol[i] = temp;
00315         if (temp > b_lastcol[i]) b_lastcol[i] = temp;
00316       }
00317     }
00318     else {
00319       for(k=0; k<Blen_i; ++k) {
00320         temp = bcols[Bindices_i[k]];
00321         if (temp < b_firstcol[i]) b_firstcol[i] = temp;
00322         if (temp > b_lastcol[i]) b_lastcol[i] = temp;
00323       }
00324     }
00325   }
00326 
00327   Epetra_Util util;
00328 
00329   int* Aind = iwork;
00330   int* Bind = iwork+maxlen;
00331 
00332   bool C_filled = C.Filled();
00333 
00334   //To form C = A*B^T, we're going to execute this expression:
00335   //
00336   // C(i,j) = sum_k( A(i,k)*B(j,k) )
00337   //
00338   //This is the easiest case of all to code (easier than A*B, A^T*B, A^T*B^T).
00339   //But it requires the use of a 'sparsedot' function (we're simply forming
00340   //dot-products with row A_i and row B_j for all i and j).
00341 
00342   //loop over the rows of A.
00343   for(i=0; i<Aview.numRows; ++i) {
00344     if (Aview.remote[i]) {
00345       continue;
00346     }
00347 
00348     int* Aindices_i = Aview.indices[i];
00349     double* Aval_i  = Aview.values[i];
00350     int A_len_i = Aview.numEntriesPerRow[i];
00351     if (A_len_i < 1) {
00352       continue;
00353     }
00354 
00355     for(k=0; k<A_len_i; ++k) {
00356       Aind[k] = Aview.colMap->GID(Aindices_i[k]);
00357       avals[k] = Aval_i[k];
00358     }
00359 
00360     util.Sort(true, A_len_i, Aind, 1, &avals, 0, NULL);
00361 
00362     int mina = Aind[0];
00363     int maxa = Aind[A_len_i-1];
00364 
00365     if (mina > max_all_b || maxa < min_all_b) {
00366       continue;
00367     }
00368 
00369     int global_row = Aview.rowMap->GID(i);
00370 
00371     //loop over the rows of B and form results C_ij = dot(A(i,:),B(j,:))
00372     for(j=0; j<Bview.numRows; ++j) {
00373       if (b_firstcol[j] > maxa || b_lastcol[j] < mina) {
00374         continue;
00375       }
00376 
00377       int* Bindices_j = Bview.indices[j];
00378       int B_len_j = Bview.numEntriesPerRow[j];
00379       if (B_len_j < 1) {
00380         continue;
00381       }
00382 
00383       int tmp, Blen = 0;
00384 
00385       if (Bview.remote[j]) {
00386         for(k=0; k<B_len_j; ++k) {
00387     tmp = Bview.importColMap->GID(Bindices_j[k]);
00388           if (tmp < mina || tmp > maxa) {
00389             continue;
00390           }
00391 
00392           bvals[Blen] = Bview.values[j][k];
00393           Bind[Blen++] = tmp;
00394   }
00395       }
00396       else {
00397         for(k=0; k<B_len_j; ++k) {
00398     tmp = bcols[Bindices_j[k]];
00399           if (tmp < mina || tmp > maxa) {
00400             continue;
00401           }
00402 
00403           bvals[Blen] = Bview.values[j][k];
00404           Bind[Blen++] = tmp;
00405   }
00406       }
00407 
00408       if (Blen < 1) {
00409         continue;
00410       }
00411 
00412       util.Sort(true, Blen, Bind, 1, &bvals, 0, NULL);
00413 
00414       double C_ij = sparsedot(avals, Aind, A_len_i,
00415             bvals, Bind, Blen);
00416 
00417       if (C_ij == 0.0) {
00418   continue;
00419       }
00420       int global_col = Bview.rowMap->GID(j);
00421 
00422       int err = C_filled ?
00423         C.SumIntoGlobalValues(global_row, 1, &C_ij, &global_col)
00424         :
00425         C.InsertGlobalValues(global_row, 1, &C_ij, &global_col);
00426 
00427       if (err < 0) {
00428   return(err);
00429       }
00430       if (err > 0) {
00431   if (C_filled) {
00432     //C.Filled()==true, and C doesn't have all the necessary nonzero
00433           //locations, or global_row or global_col is out of range (less
00434           //than 0 or non local).
00435           std::cerr << "EpetraExt::MatrixMatrix::Multiply Warning: failed "
00436               << "to insert value in result matrix at position "<<global_row
00437              <<"," <<global_col<<", possibly because result matrix has a "
00438              << "column-map that doesn't include column "<<global_col
00439              <<" on this proc." <<std::endl;
00440     return(err);
00441   }
00442       }
00443     }
00444   }
00445 
00446   delete [] iwork;
00447   delete [] bvals;
00448   delete [] b_firstcol;
00449 
00450   return(returnValue);
00451 }
00452 
00453 //kernel method for computing the local portion of C = A^T*B
00454 int mult_Atrans_B(CrsMatrixStruct& Aview,
00455       CrsMatrixStruct& Bview,
00456       Epetra_CrsMatrix& C)
00457 {
00458   int C_firstCol = Bview.colMap->MinLID();
00459   int C_lastCol = Bview.colMap->MaxLID();
00460 
00461   int C_firstCol_import = 0;
00462   int C_lastCol_import = -1;
00463 
00464   if (Bview.importColMap != NULL) {
00465     C_firstCol_import = Bview.importColMap->MinLID();
00466     C_lastCol_import = Bview.importColMap->MaxLID();
00467   }
00468 
00469   int C_numCols = C_lastCol - C_firstCol + 1;
00470   int C_numCols_import = C_lastCol_import - C_firstCol_import + 1;
00471 
00472   if (C_numCols_import > C_numCols) C_numCols = C_numCols_import;
00473 
00474   double* C_row_i = new double[C_numCols];
00475   int* C_colInds = new int[C_numCols];
00476 
00477   int i, j, k;
00478 
00479   for(j=0; j<C_numCols; ++j) {
00480     C_row_i[j] = 0.0;
00481     C_colInds[j] = 0;
00482   }
00483 
00484   //To form C = A^T*B, compute a series of outer-product updates.
00485   //
00486   // for (ith column of A^T) { 
00487   //   C_i = outer product of A^T(:,i) and B(i,:)
00488   // Where C_i is the ith matrix update,
00489   //       A^T(:,i) is the ith column of A^T, and
00490   //       B(i,:) is the ith row of B.
00491   //
00492 
00493   //dumpCrsMatrixStruct(Aview);
00494   //dumpCrsMatrixStruct(Bview);
00495   int localProc = Bview.colMap->Comm().MyPID();
00496 
00497   int* Arows = Aview.rowMap->MyGlobalElements();
00498 
00499   bool C_filled = C.Filled();
00500 
00501   //loop over the rows of A (which are the columns of A^T).
00502   for(i=0; i<Aview.numRows; ++i) {
00503 
00504     int* Aindices_i = Aview.indices[i];
00505     double* Aval_i  = Aview.values[i];
00506 
00507     //we'll need to get the row of B corresponding to Arows[i],
00508     //where Arows[i] is the GID of A's ith row.
00509     int Bi = Bview.rowMap->LID(Arows[i]);
00510     if (Bi<0) {
00511       cout << "mult_Atrans_B ERROR, proc "<<localProc<<" needs row "
00512      <<Arows[i]<<" of matrix B, but doesn't have it."<<endl;
00513       return(-1);
00514     }
00515 
00516     int* Bcol_inds = Bview.indices[Bi];
00517     double* Bvals_i = Bview.values[Bi];
00518 
00519     //for each column-index Aj in the i-th row of A, we'll update
00520     //global-row GID(Aj) of the result matrix C. In that row of C,
00521     //we'll update column-indices given by the column-indices in the
00522     //ith row of B that we're now holding (Bcol_inds).
00523 
00524     //First create a list of GIDs for the column-indices
00525     //that we'll be updating.
00526 
00527     int Blen = Bview.numEntriesPerRow[Bi];
00528     if (Bview.remote[Bi]) {
00529       for(j=0; j<Blen; ++j) {
00530         C_colInds[j] = Bview.importColMap->GID(Bcol_inds[j]);
00531       }
00532     }
00533     else {
00534       for(j=0; j<Blen; ++j) {
00535         C_colInds[j] = Bview.colMap->GID(Bcol_inds[j]);
00536       }
00537     }
00538 
00539     //loop across the i-th row of A (column of A^T)
00540     for(j=0; j<Aview.numEntriesPerRow[i]; ++j) {
00541 
00542       int Aj = Aindices_i[j];
00543       double Aval = Aval_i[j];
00544 
00545       int global_row;
00546       if (Aview.remote[i]) {
00547   global_row = Aview.importColMap->GID(Aj);
00548       }
00549       else {
00550   global_row = Aview.colMap->GID(Aj);
00551       }
00552 
00553       if (!C.RowMap().MyGID(global_row)) {
00554         continue;
00555       }
00556 
00557       for(k=0; k<Blen; ++k) {
00558         C_row_i[k] = Aval*Bvals_i[k];
00559       }
00560 
00561       //
00562       //Now add this row-update to C.
00563       //
00564 
00565       int err = C_filled ?
00566         C.SumIntoGlobalValues(global_row, Blen, C_row_i, C_colInds)
00567         :
00568         C.InsertGlobalValues(global_row, Blen, C_row_i, C_colInds);
00569 
00570       if (err < 0) {
00571         return(err);
00572       }
00573       if (err > 0) {
00574         if (C_filled) {
00575           //C is Filled, and doesn't have all the necessary nonzero locations.
00576           return(err);
00577         }
00578       }
00579     }
00580   }
00581 
00582   delete [] C_row_i;
00583   delete [] C_colInds;
00584 
00585   return(0);
00586 }
00587 
00588 //kernel method for computing the local portion of C = A^T*B^T
00589 int mult_Atrans_Btrans(CrsMatrixStruct& Aview,
00590            CrsMatrixStruct& Bview,
00591            Epetra_CrsMatrix& C)
00592 {
00593   int C_firstCol = Aview.rowMap->MinLID();
00594   int C_lastCol = Aview.rowMap->MaxLID();
00595 
00596   int C_firstCol_import = 0;
00597   int C_lastCol_import = -1;
00598 
00599   if (Aview.importColMap != NULL) {
00600     C_firstCol_import = Aview.importColMap->MinLID();
00601     C_lastCol_import = Aview.importColMap->MaxLID();
00602   }
00603 
00604   int C_numCols = C_lastCol - C_firstCol + 1;
00605   int C_numCols_import = C_lastCol_import - C_firstCol_import + 1;
00606 
00607   if (C_numCols_import > C_numCols) C_numCols = C_numCols_import;
00608 
00609   double* dwork = new double[C_numCols];
00610   int* iwork = new int[C_numCols];
00611 
00612   double* C_col_j = dwork;
00613   int* C_inds = iwork;
00614 
00615   //cout << "Aview: " << endl;
00616   //dumpCrsMatrixStruct(Aview);
00617 
00618   //cout << "Bview: " << endl;
00619   //dumpCrsMatrixStruct(Bview);
00620 
00621 
00622   int i, j, k;
00623 
00624   for(j=0; j<C_numCols; ++j) {
00625     C_col_j[j] = 0.0;
00626     C_inds[j] = -1;
00627   }
00628 
00629   int* A_col_inds = Aview.colMap->MyGlobalElements();
00630   int* A_col_inds_import = Aview.importColMap ?
00631     Aview.importColMap->MyGlobalElements() : 0;
00632 
00633   const Epetra_Map* Crowmap = &(C.RowMap());
00634 
00635   //To form C = A^T*B^T, we're going to execute this expression:
00636   //
00637   // C(i,j) = sum_k( A(k,i)*B(j,k) )
00638   //
00639   //Our goal, of course, is to navigate the data in A and B once, without
00640   //performing searches for column-indices, etc. In other words, we avoid
00641   //column-wise operations like the plague...
00642 
00643   int* Brows = Bview.rowMap->MyGlobalElements();
00644 
00645   //loop over the rows of B
00646   for(j=0; j<Bview.numRows; ++j) {
00647     int* Bindices_j = Bview.indices[j];
00648     double* Bvals_j = Bview.values[j];
00649 
00650     int global_col = Brows[j];
00651 
00652     //loop across columns in the j-th row of B and for each corresponding
00653     //row in A, loop across columns and accumulate product
00654     //A(k,i)*B(j,k) into our partial sum quantities in C_col_j. In other
00655     //words, as we stride across B(j,:), we use selected rows in A to
00656     //calculate updates for column j of the result matrix C.
00657 
00658     for(k=0; k<Bview.numEntriesPerRow[j]; ++k) {
00659 
00660       int bk = Bindices_j[k];
00661       double Bval = Bvals_j[k];
00662 
00663       int global_k;
00664       if (Bview.remote[j]) {
00665   global_k = Bview.importColMap->GID(bk);
00666       }
00667       else {
00668   global_k = Bview.colMap->GID(bk);
00669       }
00670 
00671       //get the corresponding row in A
00672       int ak = Aview.rowMap->LID(global_k);
00673       if (ak<0) {
00674   continue;
00675       }
00676 
00677       int* Aindices_k = Aview.indices[ak];
00678       double* Avals_k = Aview.values[ak];
00679 
00680       int C_len = 0;
00681 
00682       if (Aview.remote[ak]) {
00683   for(i=0; i<Aview.numEntriesPerRow[ak]; ++i) {
00684     C_col_j[C_len] = Avals_k[i]*Bval;
00685           C_inds[C_len++] = A_col_inds_import[Aindices_k[i]];
00686   }
00687       }
00688       else {
00689   for(i=0; i<Aview.numEntriesPerRow[ak]; ++i) {
00690     C_col_j[C_len] = Avals_k[i]*Bval;
00691           C_inds[C_len++] = A_col_inds[Aindices_k[i]];
00692   }
00693       }
00694 
00695       //Now loop across the C_col_j values and put non-zeros into C.
00696 
00697       for(i=0; i < C_len ; ++i) {
00698   if (C_col_j[i] == 0.0) continue;
00699 
00700   int global_row = C_inds[i];
00701   if (!Crowmap->MyGID(global_row)) {
00702     continue;
00703   }
00704 
00705   int err = C.SumIntoGlobalValues(global_row, 1, &(C_col_j[i]), &global_col);
00706 
00707   if (err < 0) {
00708     return(err);
00709   }
00710   else {
00711           if (err > 0) {
00712       err = C.InsertGlobalValues(global_row, 1, &(C_col_j[i]), &global_col);
00713       if (err < 0) {
00714               return(err);
00715             }
00716     }
00717   }
00718       }
00719 
00720     }
00721   }
00722 
00723   delete [] dwork;
00724   delete [] iwork;
00725 
00726   return(0);
00727 }
00728 
00729 int import_and_extract_views(const Epetra_CrsMatrix& M,
00730            const Epetra_Map& targetMap,
00731            CrsMatrixStruct& Mview)
00732 {
00733   //The goal of this method is to populate the 'Mview' struct with views of the
00734   //rows of M, including all rows that correspond to elements in 'targetMap'.
00735   //
00736   //If targetMap includes local elements that correspond to remotely-owned rows
00737   //of M, then those remotely-owned rows will be imported into
00738   //'Mview.importMatrix', and views of them will be included in 'Mview'.
00739 
00740   Mview.deleteContents();
00741 
00742   const Epetra_Map& Mrowmap = M.RowMap();
00743 
00744   int numProcs = Mrowmap.Comm().NumProc();
00745 
00746   Mview.numRows = targetMap.NumMyElements();
00747 
00748   int* Mrows = targetMap.MyGlobalElements();
00749 
00750   if (Mview.numRows > 0) {
00751     Mview.numEntriesPerRow = new int[Mview.numRows];
00752     Mview.indices = new int*[Mview.numRows];
00753     Mview.values = new double*[Mview.numRows];
00754     Mview.remote = new bool[Mview.numRows];
00755   }
00756 
00757   Mview.numRemote = 0;
00758 
00759   int i;
00760   for(i=0; i<Mview.numRows; ++i) {
00761     int mlid = Mrowmap.LID(Mrows[i]);
00762     if (mlid < 0) {
00763       Mview.remote[i] = true;
00764       ++Mview.numRemote;
00765     }
00766     else {
00767       EPETRA_CHK_ERR( M.ExtractMyRowView(mlid, Mview.numEntriesPerRow[i],
00768            Mview.values[i], Mview.indices[i]) );
00769       Mview.remote[i] = false;
00770     }
00771   }
00772 
00773   Mview.origRowMap = &(M.RowMap());
00774   Mview.rowMap = &targetMap;
00775   Mview.colMap = &(M.ColMap());
00776   Mview.domainMap = &(M.DomainMap());
00777   Mview.importColMap = NULL;
00778 
00779   if (numProcs < 2) {
00780     if (Mview.numRemote > 0) {
00781       cerr << "EpetraExt::MatrixMatrix::Multiply ERROR, numProcs < 2 but "
00782      << "attempting to import remote matrix rows."<<endl;
00783       return(-1);
00784     }
00785 
00786     //If only one processor we don't need to import any remote rows, so return.
00787     return(0);
00788   }
00789 
00790   //
00791   //Now we will import the needed remote rows of M, if the global maximum
00792   //value of numRemote is greater than 0.
00793   //
00794 
00795   int globalMaxNumRemote = 0;
00796   Mrowmap.Comm().MaxAll(&Mview.numRemote, &globalMaxNumRemote, 1);
00797 
00798   if (globalMaxNumRemote > 0) {
00799     //Create a map that describes the remote rows of M that we need.
00800 
00801     int* MremoteRows = Mview.numRemote>0 ? new int[Mview.numRemote] : NULL;
00802     int offset = 0;
00803     for(i=0; i<Mview.numRows; ++i) {
00804       if (Mview.remote[i]) {
00805   MremoteRows[offset++] = Mrows[i];
00806       }
00807     }
00808 
00809     Epetra_Map MremoteRowMap(-1, Mview.numRemote, MremoteRows,
00810            Mrowmap.IndexBase(), Mrowmap.Comm());
00811 
00812     //Create an importer with target-map MremoteRowMap and 
00813     //source-map Mrowmap.
00814     Epetra_Import importer(MremoteRowMap, Mrowmap);
00815 
00816     //Now create a new matrix into which we can import the remote rows of M
00817     //that we need.
00818     Mview.importMatrix = new Epetra_CrsMatrix(Copy, MremoteRowMap, 1);
00819 
00820     EPETRA_CHK_ERR( Mview.importMatrix->Import(M, importer, Insert) );
00821 
00822     EPETRA_CHK_ERR( Mview.importMatrix->FillComplete(M.DomainMap(), M.RangeMap()) );
00823 
00824     //Finally, use the freshly imported data to fill in the gaps in our views
00825     //of rows of M.
00826     for(i=0; i<Mview.numRows; ++i) {
00827       if (Mview.remote[i]) {
00828   int importLID = MremoteRowMap.LID(Mrows[i]);
00829   EPETRA_CHK_ERR( Mview.importMatrix->ExtractMyRowView(importLID,
00830               Mview.numEntriesPerRow[i],
00831               Mview.values[i],
00832               Mview.indices[i]) );
00833       }
00834     }
00835 
00836     Mview.importColMap = &(Mview.importMatrix->ColMap());
00837 
00838     delete [] MremoteRows;
00839   }
00840 
00841   return(0);
00842 }
00843 
00844 int distribute_list(const Epetra_Comm& Comm,
00845                     int lenSendList,
00846                     const int* sendList,
00847                     int& maxSendLen,
00848                     int*& recvList)
00849 {
00850   maxSendLen = 0; 
00851   Comm.MaxAll(&lenSendList, &maxSendLen, 1);
00852   int numProcs = Comm.NumProc();
00853   recvList = new int[numProcs*maxSendLen];
00854   int* send = new int[maxSendLen];
00855   for(int i=0; i<lenSendList; ++i) {
00856     send[i] = sendList[i];
00857   }
00858 
00859   Comm.GatherAll(send, recvList, maxSendLen);
00860   delete [] send;
00861 
00862   return(0);
00863 }
00864 
00865 Epetra_Map* create_map_from_imported_rows(const Epetra_Map* map,
00866             int totalNumSend,
00867             int* sendRows,
00868             int numProcs,
00869             int* numSendPerProc)
00870 {
00871   //Perform sparse all-to-all communication to send the row-GIDs
00872   //in sendRows to appropriate processors according to offset
00873   //information in numSendPerProc.
00874   //Then create and return a map containing the rows that we
00875   //received on the local processor.
00876 
00877   Epetra_Distributor* distributor = map->Comm().CreateDistributor();
00878 
00879   int* sendPIDs = totalNumSend>0 ? new int[totalNumSend] : NULL;
00880   int offset = 0;
00881   for(int i=0; i<numProcs; ++i) {
00882     for(int j=0; j<numSendPerProc[i]; ++j) {
00883       sendPIDs[offset++] = i;
00884     }
00885   }
00886 
00887   int numRecv = 0;
00888   int err = distributor->CreateFromSends(totalNumSend, sendPIDs,
00889            true, numRecv);
00890   assert( err == 0 );
00891 
00892   char* c_recv_objs = numRecv>0 ? new char[numRecv*sizeof(int)] : NULL;
00893   int num_c_recv = numRecv*(int)sizeof(int);
00894 
00895   err = distributor->Do(reinterpret_cast<char*>(sendRows),
00896       (int)sizeof(int), num_c_recv, c_recv_objs);
00897   assert( err == 0 );
00898 
00899   int* recvRows = reinterpret_cast<int*>(c_recv_objs);
00900 
00901   //Now create a map with the rows we've received from other processors.
00902   Epetra_Map* import_rows = new Epetra_Map(-1, numRecv, recvRows,
00903              map->IndexBase(), map->Comm());
00904 
00905   delete [] c_recv_objs;
00906   delete [] sendPIDs;
00907 
00908   delete distributor;
00909 
00910   return( import_rows );
00911 }
00912 
00913 int form_map_union(const Epetra_Map* map1,
00914        const Epetra_Map* map2,
00915        const Epetra_Map*& mapunion)
00916 {
00917   //form the union of two maps
00918 
00919   if (map1 == NULL) {
00920     mapunion = new Epetra_Map(*map2);
00921     return(0);
00922   }
00923 
00924   if (map2 == NULL) {
00925     mapunion = new Epetra_Map(*map1);
00926     return(0);
00927   }
00928 
00929   int map1_len       = map1->NumMyElements();
00930   int* map1_elements = map1->MyGlobalElements();
00931   int map2_len       = map2->NumMyElements();
00932   int* map2_elements = map2->MyGlobalElements();
00933 
00934   int* union_elements = new int[map1_len+map2_len];
00935 
00936   int map1_offset = 0, map2_offset = 0, union_offset = 0;
00937 
00938   while(map1_offset < map1_len && map2_offset < map2_len) {
00939     int map1_elem = map1_elements[map1_offset];
00940     int map2_elem = map2_elements[map2_offset];
00941 
00942     if (map1_elem < map2_elem) {
00943       union_elements[union_offset++] = map1_elem;
00944       ++map1_offset;
00945     }
00946     else if (map1_elem > map2_elem) {
00947       union_elements[union_offset++] = map2_elem;
00948       ++map2_offset;
00949     }
00950     else {
00951       union_elements[union_offset++] = map1_elem;
00952       ++map1_offset;
00953       ++map2_offset;
00954     }
00955   }
00956 
00957   int i;
00958   for(i=map1_offset; i<map1_len; ++i) {
00959     union_elements[union_offset++] = map1_elements[i];
00960   }
00961 
00962   for(i=map2_offset; i<map2_len; ++i) {
00963     union_elements[union_offset++] = map2_elements[i];
00964   }
00965 
00966   mapunion = new Epetra_Map(-1, union_offset, union_elements,
00967           map1->IndexBase(), map1->Comm());
00968 
00969   delete [] union_elements;
00970 
00971   return(0);
00972 }
00973 
00974 Epetra_Map* find_rows_containing_cols(const Epetra_CrsMatrix& M,
00975                                       const Epetra_Map* colmap)
00976 {
00977   //The goal of this function is to find all rows in the matrix M that contain
00978   //column-indices which are in 'colmap'. A map containing those rows is
00979   //returned.
00980 
00981   int numProcs = colmap->Comm().NumProc();
00982   int localProc = colmap->Comm().MyPID();
00983 
00984   if (numProcs < 2) {
00985     Epetra_Map* result_map = NULL;
00986 
00987     int err = form_map_union(&(M.RowMap()), NULL,
00988                              (const Epetra_Map*&)result_map);
00989     if (err != 0) {
00990       return(NULL);
00991     }
00992     return(result_map);
00993   }
00994 
00995   int MnumRows = M.NumMyRows();
00996   int numCols = colmap->NumMyElements();
00997 
00998   int* iwork = new int[numCols+2*numProcs+numProcs*MnumRows];
00999   int iworkOffset = 0;
01000 
01001   int* cols = &(iwork[iworkOffset]); iworkOffset += numCols;
01002 
01003   cols[0] = numCols;
01004   colmap->MyGlobalElements( &(cols[1]) );
01005 
01006   //cols are not necessarily sorted at this point, so we'll make sure
01007   //they are sorted.
01008   Epetra_Util util;
01009   util.Sort(true, numCols, &(cols[1]), 0, NULL, 0, NULL);
01010 
01011   int* all_proc_cols = NULL;
01012   
01013   int max_num_cols;
01014   distribute_list(colmap->Comm(), numCols+1, cols, max_num_cols, all_proc_cols);
01015 
01016   const Epetra_CrsGraph& Mgraph = M.Graph();
01017   const Epetra_Map& Mrowmap = M.RowMap();
01018   const Epetra_Map& Mcolmap = M.ColMap();
01019   int MminMyLID = Mrowmap.MinLID();
01020 
01021   int* procNumCols = &(iwork[iworkOffset]); iworkOffset += numProcs;
01022   int* procNumRows = &(iwork[iworkOffset]); iworkOffset += numProcs;
01023   int* procRows_1D = &(iwork[iworkOffset]);
01024   int** procCols = new int*[numProcs];
01025   int** procRows = new int*[numProcs];
01026   int i, err;
01027   int offset = 0;
01028   for(i=0; i<numProcs; ++i) {
01029     procNumCols[i] = all_proc_cols[offset];
01030     procCols[i] = &(all_proc_cols[offset+1]);
01031     offset += max_num_cols;
01032 
01033     procNumRows[i] = 0;
01034     procRows[i] = &(procRows_1D[i*MnumRows]);
01035   }
01036 
01037   int* Mindices;
01038 
01039   for(int row=0; row<MnumRows; ++row) {
01040     int localRow = MminMyLID+row;
01041     int globalRow = Mrowmap.GID(localRow);
01042     int MnumCols;
01043     err = Mgraph.ExtractMyRowView(localRow, MnumCols, Mindices);
01044     if (err != 0) {
01045       cerr << "proc "<<localProc<<", error in Mgraph.ExtractMyRowView, row "
01046            <<localRow<<endl;
01047       return(NULL);
01048     }
01049 
01050     for(int j=0; j<MnumCols; ++j) {
01051       int colGID = Mcolmap.GID(Mindices[j]);
01052 
01053       for(int p=0; p<numProcs; ++p) {
01054         if (p==localProc) continue;
01055 
01056         int insertPoint;
01057         int foundOffset = Epetra_Util_binary_search(colGID, procCols[p],
01058                                                     procNumCols[p], insertPoint);
01059         if (foundOffset > -1) {
01060           int numRowsP = procNumRows[p];
01061           int* prows = procRows[p];
01062           if (numRowsP < 1 || prows[numRowsP-1] < globalRow) {
01063             prows[numRowsP] = globalRow;
01064             procNumRows[p]++;
01065           }
01066         }
01067       }
01068     }
01069   }
01070 
01071   //Now make the contents of procRows occupy a contiguous section
01072   //of procRows_1D.
01073   offset = procNumRows[0];
01074   for(i=1; i<numProcs; ++i) {
01075     for(int j=0; j<procNumRows[i]; ++j) {
01076       procRows_1D[offset++] = procRows[i][j];
01077     }
01078   }
01079 
01080   int totalNumSend = offset;
01081   //Next we will do a sparse all-to-all communication to send the lists of rows
01082   //to the appropriate processors, and create a map with the rows we've received
01083   //from other processors.
01084   Epetra_Map* recvd_rows =
01085     create_map_from_imported_rows(&Mrowmap, totalNumSend,
01086                                   procRows_1D, numProcs, procNumRows);
01087 
01088   Epetra_Map* result_map = NULL;
01089 
01090   err = form_map_union(&(M.RowMap()), recvd_rows, (const Epetra_Map*&)result_map);
01091   if (err != 0) {
01092     return(NULL);
01093   }
01094 
01095   delete [] iwork;
01096   delete [] procCols;
01097   delete [] procRows;
01098   delete [] all_proc_cols;
01099   delete recvd_rows;
01100 
01101   return(result_map);
01102 }
01103 
01104 int MatrixMatrix::Multiply(const Epetra_CrsMatrix& A,
01105          bool transposeA,
01106          const Epetra_CrsMatrix& B,
01107          bool transposeB,
01108          Epetra_CrsMatrix& C,
01109                            bool call_FillComplete_on_result)
01110 {
01111   //
01112   //This method forms the matrix-matrix product C = op(A) * op(B), where
01113   //op(A) == A   if transposeA is false,
01114   //op(A) == A^T if transposeA is true,
01115   //and similarly for op(B).
01116   //
01117 
01118   //A and B should already be Filled.
01119   //(Should we go ahead and call FillComplete() on them if necessary?
01120   // or error out? For now, we choose to error out.)
01121   if (!A.Filled() || !B.Filled()) {
01122     EPETRA_CHK_ERR(-1);
01123   }
01124 
01125   //We're going to refer to the different combinations of op(A) and op(B)
01126   //as scenario 1 through 4.
01127 
01128   int scenario = 1;//A*B
01129   if (transposeB && !transposeA) scenario = 2;//A*B^T
01130   if (transposeA && !transposeB) scenario = 3;//A^T*B
01131   if (transposeA && transposeB)  scenario = 4;//A^T*B^T
01132 
01133   //now check size compatibility
01134   int Aouter = transposeA ? A.NumGlobalCols() : A.NumGlobalRows();
01135   int Bouter = transposeB ? B.NumGlobalRows() : B.NumGlobalCols();
01136   int Ainner = transposeA ? A.NumGlobalRows() : A.NumGlobalCols();
01137   int Binner = transposeB ? B.NumGlobalCols() : B.NumGlobalRows();
01138   if (Ainner != Binner) {
01139     cerr << "MatrixMatrix::Multiply: ERROR, inner dimensions of op(A) and op(B) "
01140          << "must match for matrix-matrix product. op(A) is "
01141          <<Aouter<<"x"<<Ainner << ", op(B) is "<<Binner<<"x"<<Bouter<<endl;
01142     return(-1);
01143   }
01144 
01145   //The result matrix C must at least have a row-map that reflects the
01146   //correct row-size. Don't check the number of columns because rectangular
01147   //matrices which were constructed with only one map can still end up
01148   //having the correct capacity and dimensions when filled.
01149   if (Aouter > C.NumGlobalRows()) {
01150     cerr << "MatrixMatrix::Multiply: ERROR, dimensions of result C must "
01151          << "match dimensions of op(A) * op(B). C has "<<C.NumGlobalRows()
01152          << " rows, should have at least "<<Aouter << endl;
01153     return(-1);
01154   }
01155 
01156   //It doesn't matter whether C is already Filled or not. If it is already
01157   //Filled, it must have space allocated for the positions that will be
01158   //referenced in forming C = op(A)*op(B). If it doesn't have enough space,
01159   //we'll error out later when trying to store result values.
01160 
01161   //We're going to need to import remotely-owned sections of A and/or B
01162   //if more than 1 processor is performing this run, depending on the scenario.
01163   int numProcs = A.Comm().NumProc();
01164 
01165   //If we are to use the transpose of A and/or B, we'll need to be able to 
01166   //access, on the local processor, all rows that contain column-indices in
01167   //the domain-map.
01168   const Epetra_Map* domainMap_A = &(A.DomainMap());
01169   const Epetra_Map* domainMap_B = &(B.DomainMap());
01170 
01171   const Epetra_Map* rowmap_A = &(A.RowMap());
01172   const Epetra_Map* rowmap_B = &(B.RowMap());
01173 
01174   //Declare some 'work-space' maps which may be created depending on
01175   //the scenario, and which will be deleted before exiting this function.
01176   const Epetra_Map* workmap1 = NULL;
01177   const Epetra_Map* workmap2 = NULL;
01178   const Epetra_Map* mapunion1 = NULL;
01179 
01180   //Declare a couple of structs that will be used to hold views of the data
01181   //of A and B, to be used for fast access during the matrix-multiplication.
01182   CrsMatrixStruct Aview;
01183   CrsMatrixStruct Bview;
01184 
01185   const Epetra_Map* targetMap_A = rowmap_A;
01186   const Epetra_Map* targetMap_B = rowmap_B;
01187 
01188   if (numProcs > 1) {
01189     //If op(A) = A^T, find all rows of A that contain column-indices in the
01190     //local portion of the domain-map. (We'll import any remote rows
01191     //that fit this criteria onto the local processor.)
01192     if (transposeA) {
01193       workmap1 = find_rows_containing_cols(A, domainMap_A);
01194       targetMap_A = workmap1;
01195     }
01196   }
01197 
01198   //Now import any needed remote rows and populate the Aview struct.
01199   EPETRA_CHK_ERR( import_and_extract_views(A, *targetMap_A, Aview) );
01200 
01201   //We will also need local access to all rows of B that correspond to the
01202   //column-map of op(A).
01203   if (numProcs > 1) {
01204     const Epetra_Map* colmap_op_A = NULL;
01205     if (transposeA) {
01206       colmap_op_A = targetMap_A;
01207     }
01208     else {
01209       colmap_op_A = &(A.ColMap());
01210     }
01211 
01212     targetMap_B = colmap_op_A;
01213 
01214     //If op(B) = B^T, find all rows of B that contain column-indices in the
01215     //local-portion of the domain-map, or in the column-map of op(A).
01216     //We'll import any remote rows that fit this criteria onto the
01217     //local processor.
01218     if (transposeB) {
01219       EPETRA_CHK_ERR( form_map_union(colmap_op_A, domainMap_B, mapunion1) );
01220       workmap2 = find_rows_containing_cols(B, mapunion1);
01221       targetMap_B = workmap2;
01222     }
01223   }
01224 
01225   //Now import any needed remote rows and populate the Bview struct.
01226   EPETRA_CHK_ERR( import_and_extract_views(B, *targetMap_B, Bview) );
01227 
01228   //zero the result matrix before we start the calculations.
01229   EPETRA_CHK_ERR( C.PutScalar(0.0) );
01230 
01231 
01232   //Now call the appropriate method to perform the actual multiplication.
01233 
01234   switch(scenario) {
01235   case 1:    EPETRA_CHK_ERR( mult_A_B(Aview, Bview, C) );
01236     break;
01237   case 2:    EPETRA_CHK_ERR( mult_A_Btrans(Aview, Bview, C) );
01238     break;
01239   case 3:    EPETRA_CHK_ERR( mult_Atrans_B(Aview, Bview, C) );
01240     break;
01241   case 4:    EPETRA_CHK_ERR( mult_Atrans_Btrans(Aview, Bview, C) );
01242     break;
01243   }
01244 
01245   if (call_FillComplete_on_result) {
01246     //We'll call FillComplete on the C matrix before we exit, and give
01247     //it a domain-map and a range-map.
01248     //The domain-map will be the domain-map of B, unless
01249     //op(B)==transpose(B), in which case the range-map of B will be used.
01250     //The range-map will be the range-map of A, unless
01251     //op(A)==transpose(A), in which case the domain-map of A will be used.
01252 
01253     const Epetra_Map* domainmap =
01254       transposeB ? &(B.RangeMap()) : &(B.DomainMap());
01255 
01256     const Epetra_Map* rangemap =
01257       transposeA ? &(A.DomainMap()) : &(A.RangeMap());
01258 
01259     if (!C.Filled()) {
01260       EPETRA_CHK_ERR( C.FillComplete(*domainmap, *rangemap) );
01261     }
01262   }
01263 
01264   //Finally, delete the objects that were potentially created
01265   //during the course of importing remote sections of A and B.
01266 
01267   delete mapunion1; mapunion1 = NULL;
01268   delete workmap1; workmap1 = NULL;
01269   delete workmap2; workmap2 = NULL;
01270 
01271   return(0);
01272 }
01273 
01274 int MatrixMatrix::Add(const Epetra_CrsMatrix& A,
01275                       bool transposeA,
01276                       double scalarA,
01277                       Epetra_CrsMatrix& B,
01278                       double scalarB )
01279 {
01280   //
01281   //This method forms the matrix-matrix sum B = scalarA * op(A) + scalarB * B, where
01282 
01283   //A should already be Filled. It doesn't matter whether B is
01284   //already Filled, but if it is, then its graph must already contain
01285   //all nonzero locations that will be referenced in forming the
01286   //sum.
01287 
01288   if (!A.Filled() ) {
01289     std::cerr << "EpetraExt::MatrixMatrix::Add ERROR, input matrix A.Filled() is false, it is required to be true. (Result matrix B is not required to be Filled())."<<std::endl;
01290     EPETRA_CHK_ERR(-1);
01291   }
01292 
01293   //explicit tranpose A formed as necessary
01294   Epetra_CrsMatrix * Aprime = 0;
01295   EpetraExt::RowMatrix_Transpose * Atrans = 0;
01296   if( transposeA )
01297   {
01298     Atrans = new EpetraExt::RowMatrix_Transpose();
01299     Aprime = &(dynamic_cast<Epetra_CrsMatrix&>(((*Atrans)(const_cast<Epetra_CrsMatrix&>(A)))));
01300   }
01301   else
01302     Aprime = const_cast<Epetra_CrsMatrix*>(&A);
01303 
01304   int MaxNumEntries = EPETRA_MAX( A.MaxNumEntries(), B.MaxNumEntries() );
01305   int A_NumEntries, B_NumEntries;
01306   int * A_Indices = new int[MaxNumEntries];
01307   double * A_Values = new double[MaxNumEntries];
01308   int* B_Indices;
01309   double* B_Values;
01310 
01311   int NumMyRows = B.NumMyRows();
01312   int Row, err;
01313 
01314   if( scalarA )
01315   {
01316     //Loop over B's rows and sum into
01317     for( int i = 0; i < NumMyRows; ++i )
01318     {
01319       Row = B.GRID(i);
01320       EPETRA_CHK_ERR( Aprime->ExtractGlobalRowCopy( Row, MaxNumEntries, A_NumEntries, A_Values, A_Indices ) );
01321 
01322       if (scalarB != 1.0) {
01323         if (!B.Filled()) {
01324           EPETRA_CHK_ERR( B.ExtractGlobalRowView( Row, B_NumEntries,
01325                                                   B_Values, B_Indices));
01326         }
01327         else {
01328           EPETRA_CHK_ERR( B.ExtractMyRowView( i, B_NumEntries,
01329                                               B_Values, B_Indices));
01330         }
01331 
01332         for(int jj=0; jj<B_NumEntries; ++jj) {
01333           B_Values[jj] = scalarB*B_Values[jj];
01334         }
01335       }
01336 
01337       if( scalarA != 1.0 ) {
01338         for( int j = 0; j < A_NumEntries; ++j ) A_Values[j] *= scalarA;
01339       }
01340 
01341       if( B.Filled() ) {//Sum In Values
01342         err = B.SumIntoGlobalValues( Row, A_NumEntries, A_Values, A_Indices );
01343         assert( err == 0 );
01344       }
01345       else {
01346         err = B.InsertGlobalValues( Row, A_NumEntries, A_Values, A_Indices );
01347         assert( err == 0 || err == 1 || err == 3 );
01348       }
01349     }
01350   }
01351   else {
01352     EPETRA_CHK_ERR( B.Scale(scalarB) );
01353   }
01354 
01355   delete [] A_Indices;
01356   delete [] A_Values;
01357 
01358   if( Atrans ) delete Atrans;
01359 
01360   return(0);
01361 }
01362 
01363 } // namespace EpetraExt
01364 

Generated on Tue Oct 20 12:45:30 2009 for EpetraExt by doxygen 1.4.7