EpetraExt Development
EpetraExt_MatrixMatrix.cpp
Go to the documentation of this file.
00001 //@HEADER
00002 // ***********************************************************************
00003 //
00004 //     EpetraExt: Epetra Extended - Linear Algebra Services Package
00005 //                 Copyright (2011) Sandia Corporation
00006 //
00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00008 // the U.S. Government retains certain rights in this software.
00009 //
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00038 //
00039 // ***********************************************************************
00040 //@HEADER
00041 
00042 #include <EpetraExt_ConfigDefs.h>
00043 #include <EpetraExt_MatrixMatrix.h>
00044 
00045 #include <EpetraExt_MMHelpers.h>
00046 
00047 #include <EpetraExt_Transpose_RowMatrix.h>
00048 
00049 #include <Epetra_Export.h>
00050 #include <Epetra_Import.h>
00051 #include <Epetra_Util.h>
00052 #include <Epetra_Map.h>
00053 #include <Epetra_Comm.h>
00054 #include <Epetra_CrsMatrix.h>
00055 #include <Epetra_Directory.h>
00056 #include <Epetra_HashTable.h>
00057 #include <Epetra_Distributor.h>
00058 
00059 #ifdef HAVE_VECTOR
00060 #include <vector>
00061 #endif
00062 
00063 namespace EpetraExt {
00064 
00065 //
00066 //Method for internal use... sparsedot forms a dot-product between two
00067 //sparsely-populated 'vectors'.
00068 //Important assumption: assumes the indices in u_ind and v_ind are sorted.
00069 //
00070 double sparsedot(double* u, int* u_ind, int u_len,
00071      double* v, int* v_ind, int v_len)
00072 {
00073   double result = 0.0;
00074 
00075   int v_idx = 0;
00076   int u_idx = 0;
00077 
00078   while(v_idx < v_len && u_idx < u_len) {
00079     int ui = u_ind[u_idx];
00080     int vi = v_ind[v_idx];
00081 
00082     if (ui < vi) {
00083       ++u_idx;
00084     }
00085     else if (ui > vi) {
00086       ++v_idx;
00087     }
00088     else {
00089       result += u[u_idx++]*v[v_idx++];
00090     }
00091   }
00092 
00093   return(result);
00094 }
00095 
00096 //kernel method for computing the local portion of C = A*B
00097 int mult_A_B(CrsMatrixStruct& Aview,
00098        CrsMatrixStruct& Bview,
00099        CrsWrapper& C)
00100 {
00101   int C_firstCol = Bview.colMap->MinLID();
00102   int C_lastCol = Bview.colMap->MaxLID();
00103 
00104   int C_firstCol_import = 0;
00105   int C_lastCol_import = -1;
00106 
00107   int* bcols = Bview.colMap->MyGlobalElements();
00108   int* bcols_import = NULL;
00109   if (Bview.importColMap != NULL) {
00110     C_firstCol_import = Bview.importColMap->MinLID();
00111     C_lastCol_import = Bview.importColMap->MaxLID();
00112 
00113     bcols_import = Bview.importColMap->MyGlobalElements();
00114   }
00115 
00116   int C_numCols = C_lastCol - C_firstCol + 1;
00117   int C_numCols_import = C_lastCol_import - C_firstCol_import + 1;
00118 
00119   if (C_numCols_import > C_numCols) C_numCols = C_numCols_import;
00120   double* dwork = new double[C_numCols];
00121   int* iwork = new int[C_numCols];
00122 
00123   double* C_row_i = dwork;
00124   int* C_cols = iwork;
00125 
00126   int C_row_i_length, i, j, k;
00127 
00128   //To form C = A*B we're going to execute this expression:
00129   //
00130   // C(i,j) = sum_k( A(i,k)*B(k,j) )
00131   //
00132   //Our goal, of course, is to navigate the data in A and B once, without
00133   //performing searches for column-indices, etc.
00134 
00135   bool C_filled = C.Filled();
00136 
00137   //loop over the rows of A.
00138   for(i=0; i<Aview.numRows; ++i) {
00139 
00140     //only navigate the local portion of Aview... (It's probable that we
00141     //imported more of A than we need for A*B, because other cases like A^T*B 
00142     //need the extra rows.)
00143     if (Aview.remote[i]) {
00144       continue;
00145     }
00146 
00147     int* Aindices_i = Aview.indices[i];
00148     double* Aval_i  = Aview.values[i];
00149 
00150     int global_row = Aview.rowMap->GID(i);
00151 
00152     //loop across the i-th row of A and for each corresponding row
00153     //in B, loop across colums and accumulate product
00154     //A(i,k)*B(k,j) into our partial sum quantities C_row_i. In other words,
00155     //as we stride across B(k,:) we're calculating updates for row i of the
00156     //result matrix C.
00157 
00158     for(k=0; k<Aview.numEntriesPerRow[i]; ++k) {
00159       int Ak = Bview.rowMap->LID(Aview.colMap->GID(Aindices_i[k]));
00160       double Aval = Aval_i[k];
00161 
00162       int* Bcol_inds = Bview.indices[Ak];
00163       double* Bvals_k = Bview.values[Ak];
00164 
00165       C_row_i_length = 0;
00166 
00167       if (Bview.remote[Ak]) {
00168   for(j=0; j<Bview.numEntriesPerRow[Ak]; ++j) {
00169     C_row_i[C_row_i_length] = Aval*Bvals_k[j];
00170           C_cols[C_row_i_length++] = bcols_import[Bcol_inds[j]];
00171   }
00172       }
00173       else {
00174   for(j=0; j<Bview.numEntriesPerRow[Ak]; ++j) {
00175     C_row_i[C_row_i_length] = Aval*Bvals_k[j];
00176           C_cols[C_row_i_length++] = bcols[Bcol_inds[j]];
00177   }
00178       }
00179 
00180       //
00181       //Now put the C_row_i values into C.
00182       //
00183 
00184       int err = C_filled ?
00185           C.SumIntoGlobalValues(global_row, C_row_i_length, C_row_i, C_cols)
00186           :
00187           C.InsertGlobalValues(global_row, C_row_i_length, C_row_i, C_cols);
00188  
00189       if (err < 0) {
00190         return(err);
00191       }
00192       if (err > 0) {
00193         if (C_filled) {
00194           //C is Filled, and doesn't
00195           //have all the necessary nonzero locations.
00196           return(err);
00197         }
00198       }
00199     }
00200   }
00201 
00202   delete [] dwork;
00203   delete [] iwork;
00204 
00205   return(0);
00206 }
00207 
00208 //kernel method for computing the local portion of C = A*B^T
00209 int mult_A_Btrans(CrsMatrixStruct& Aview,
00210       CrsMatrixStruct& Bview,
00211       CrsWrapper& C)
00212 {
00213   int i, j, k;
00214   int returnValue = 0;
00215 
00216   int maxlen = 0;
00217   for(i=0; i<Aview.numRows; ++i) {
00218     if (Aview.numEntriesPerRow[i] > maxlen) maxlen = Aview.numEntriesPerRow[i];
00219   }
00220   for(i=0; i<Bview.numRows; ++i) {
00221     if (Bview.numEntriesPerRow[i] > maxlen) maxlen = Bview.numEntriesPerRow[i];
00222   }
00223 
00224   //cout << "Aview: " << endl;
00225   //dumpCrsMatrixStruct(Aview);
00226 
00227   //cout << "Bview: " << endl;
00228   //dumpCrsMatrixStruct(Bview);
00229 
00230   int numBcols = Bview.colMap->NumMyElements();
00231   int numBrows = Bview.numRows;
00232 
00233   int iworklen = maxlen*2 + numBcols;
00234   int* iwork = new int[iworklen];
00235 
00236   int* bcols = iwork+maxlen*2;
00237   int* bgids = Bview.colMap->MyGlobalElements();
00238   double* bvals = new double[maxlen*2];
00239   double* avals = bvals+maxlen;
00240 
00241   int max_all_b = Bview.colMap->MaxAllGID();
00242   int min_all_b = Bview.colMap->MinAllGID();
00243 
00244   //bcols will hold the GIDs from B's column-map for fast access
00245   //during the computations below
00246   for(i=0; i<numBcols; ++i) {
00247     int blid = Bview.colMap->LID(bgids[i]);
00248     bcols[blid] = bgids[i];
00249   }
00250 
00251   //next create arrays indicating the first and last column-index in
00252   //each row of B, so that we can know when to skip certain rows below.
00253   //This will provide a large performance gain for banded matrices, and
00254   //a somewhat smaller gain for *most* other matrices.
00255   int* b_firstcol = new int[2*numBrows];
00256   int* b_lastcol = b_firstcol+numBrows;
00257   int temp;
00258   for(i=0; i<numBrows; ++i) {
00259     b_firstcol[i] = max_all_b;
00260     b_lastcol[i] = min_all_b;
00261 
00262     int Blen_i = Bview.numEntriesPerRow[i];
00263     if (Blen_i < 1) continue;
00264     int* Bindices_i = Bview.indices[i];
00265 
00266     if (Bview.remote[i]) {
00267       for(k=0; k<Blen_i; ++k) {
00268         temp = Bview.importColMap->GID(Bindices_i[k]);
00269         if (temp < b_firstcol[i]) b_firstcol[i] = temp;
00270         if (temp > b_lastcol[i]) b_lastcol[i] = temp;
00271       }
00272     }
00273     else {
00274       for(k=0; k<Blen_i; ++k) {
00275         temp = bcols[Bindices_i[k]];
00276         if (temp < b_firstcol[i]) b_firstcol[i] = temp;
00277         if (temp > b_lastcol[i]) b_lastcol[i] = temp;
00278       }
00279     }
00280   }
00281 
00282   Epetra_Util util;
00283 
00284   int* Aind = iwork;
00285   int* Bind = iwork+maxlen;
00286 
00287   bool C_filled = C.Filled();
00288 
00289   //To form C = A*B^T, we're going to execute this expression:
00290   //
00291   // C(i,j) = sum_k( A(i,k)*B(j,k) )
00292   //
00293   //This is the easiest case of all to code (easier than A*B, A^T*B, A^T*B^T).
00294   //But it requires the use of a 'sparsedot' function (we're simply forming
00295   //dot-products with row A_i and row B_j for all i and j).
00296 
00297   //loop over the rows of A.
00298   for(i=0; i<Aview.numRows; ++i) {
00299     if (Aview.remote[i]) {
00300       continue;
00301     }
00302 
00303     int* Aindices_i = Aview.indices[i];
00304     double* Aval_i  = Aview.values[i];
00305     int A_len_i = Aview.numEntriesPerRow[i];
00306     if (A_len_i < 1) {
00307       continue;
00308     }
00309 
00310     for(k=0; k<A_len_i; ++k) {
00311       Aind[k] = Aview.colMap->GID(Aindices_i[k]);
00312       avals[k] = Aval_i[k];
00313     }
00314 
00315     util.Sort(true, A_len_i, Aind, 1, &avals, 0, NULL);
00316 
00317     int mina = Aind[0];
00318     int maxa = Aind[A_len_i-1];
00319 
00320     if (mina > max_all_b || maxa < min_all_b) {
00321       continue;
00322     }
00323 
00324     int global_row = Aview.rowMap->GID(i);
00325 
00326     //loop over the rows of B and form results C_ij = dot(A(i,:),B(j,:))
00327     for(j=0; j<Bview.numRows; ++j) {
00328       if (b_firstcol[j] > maxa || b_lastcol[j] < mina) {
00329         continue;
00330       }
00331 
00332       int* Bindices_j = Bview.indices[j];
00333       int B_len_j = Bview.numEntriesPerRow[j];
00334       if (B_len_j < 1) {
00335         continue;
00336       }
00337 
00338       int tmp, Blen = 0;
00339 
00340       if (Bview.remote[j]) {
00341         for(k=0; k<B_len_j; ++k) {
00342     tmp = Bview.importColMap->GID(Bindices_j[k]);
00343           if (tmp < mina || tmp > maxa) {
00344             continue;
00345           }
00346 
00347           bvals[Blen] = Bview.values[j][k];
00348           Bind[Blen++] = tmp;
00349   }
00350       }
00351       else {
00352         for(k=0; k<B_len_j; ++k) {
00353     tmp = bcols[Bindices_j[k]];
00354           if (tmp < mina || tmp > maxa) {
00355             continue;
00356           }
00357 
00358           bvals[Blen] = Bview.values[j][k];
00359           Bind[Blen++] = tmp;
00360   }
00361       }
00362 
00363       if (Blen < 1) {
00364         continue;
00365       }
00366 
00367       util.Sort(true, Blen, Bind, 1, &bvals, 0, NULL);
00368 
00369       double C_ij = sparsedot(avals, Aind, A_len_i,
00370             bvals, Bind, Blen);
00371 
00372       if (C_ij == 0.0) {
00373   continue;
00374       }
00375       int global_col = Bview.rowMap->GID(j);
00376 
00377       int err = C_filled ?
00378         C.SumIntoGlobalValues(global_row, 1, &C_ij, &global_col)
00379         :
00380         C.InsertGlobalValues(global_row, 1, &C_ij, &global_col);
00381 
00382       if (err < 0) {
00383   return(err);
00384       }
00385       if (err > 0) {
00386   if (C_filled) {
00387     //C.Filled()==true, and C doesn't have all the necessary nonzero
00388           //locations, or global_row or global_col is out of range (less
00389           //than 0 or non local).
00390           std::cerr << "EpetraExt::MatrixMatrix::Multiply Warning: failed "
00391               << "to insert value in result matrix at position "<<global_row
00392              <<"," <<global_col<<", possibly because result matrix has a "
00393              << "column-map that doesn't include column "<<global_col
00394              <<" on this proc." <<std::endl;
00395     return(err);
00396   }
00397       }
00398     }
00399   }
00400 
00401   delete [] iwork;
00402   delete [] bvals;
00403   delete [] b_firstcol;
00404 
00405   return(returnValue);
00406 }
00407 
00408 //kernel method for computing the local portion of C = A^T*B
00409 int mult_Atrans_B(CrsMatrixStruct& Aview,
00410       CrsMatrixStruct& Bview,
00411       CrsWrapper& C)
00412 {
00413   int C_firstCol = Bview.colMap->MinLID();
00414   int C_lastCol = Bview.colMap->MaxLID();
00415 
00416   int C_firstCol_import = 0;
00417   int C_lastCol_import = -1;
00418 
00419   if (Bview.importColMap != NULL) {
00420     C_firstCol_import = Bview.importColMap->MinLID();
00421     C_lastCol_import = Bview.importColMap->MaxLID();
00422   }
00423 
00424   int C_numCols = C_lastCol - C_firstCol + 1;
00425   int C_numCols_import = C_lastCol_import - C_firstCol_import + 1;
00426 
00427   if (C_numCols_import > C_numCols) C_numCols = C_numCols_import;
00428 
00429   double* C_row_i = new double[C_numCols];
00430   int* C_colInds = new int[C_numCols];
00431 
00432   int i, j, k;
00433 
00434   for(j=0; j<C_numCols; ++j) {
00435     C_row_i[j] = 0.0;
00436     C_colInds[j] = 0;
00437   }
00438 
00439   //To form C = A^T*B, compute a series of outer-product updates.
00440   //
00441   // for (ith column of A^T) { 
00442   //   C_i = outer product of A^T(:,i) and B(i,:)
00443   // Where C_i is the ith matrix update,
00444   //       A^T(:,i) is the ith column of A^T, and
00445   //       B(i,:) is the ith row of B.
00446   //
00447 
00448   //dumpCrsMatrixStruct(Aview);
00449   //dumpCrsMatrixStruct(Bview);
00450   int localProc = Bview.colMap->Comm().MyPID();
00451 
00452   int* Arows = Aview.rowMap->MyGlobalElements();
00453 
00454   bool C_filled = C.Filled();
00455 
00456   //loop over the rows of A (which are the columns of A^T).
00457   for(i=0; i<Aview.numRows; ++i) {
00458 
00459     int* Aindices_i = Aview.indices[i];
00460     double* Aval_i  = Aview.values[i];
00461 
00462     //we'll need to get the row of B corresponding to Arows[i],
00463     //where Arows[i] is the GID of A's ith row.
00464     int Bi = Bview.rowMap->LID(Arows[i]);
00465     if (Bi<0) {
00466       cout << "mult_Atrans_B ERROR, proc "<<localProc<<" needs row "
00467      <<Arows[i]<<" of matrix B, but doesn't have it."<<endl;
00468       return(-1);
00469     }
00470 
00471     int* Bcol_inds = Bview.indices[Bi];
00472     double* Bvals_i = Bview.values[Bi];
00473 
00474     //for each column-index Aj in the i-th row of A, we'll update
00475     //global-row GID(Aj) of the result matrix C. In that row of C,
00476     //we'll update column-indices given by the column-indices in the
00477     //ith row of B that we're now holding (Bcol_inds).
00478 
00479     //First create a list of GIDs for the column-indices
00480     //that we'll be updating.
00481 
00482     int Blen = Bview.numEntriesPerRow[Bi];
00483     if (Bview.remote[Bi]) {
00484       for(j=0; j<Blen; ++j) {
00485         C_colInds[j] = Bview.importColMap->GID(Bcol_inds[j]);
00486       }
00487     }
00488     else {
00489       for(j=0; j<Blen; ++j) {
00490         C_colInds[j] = Bview.colMap->GID(Bcol_inds[j]);
00491       }
00492     }
00493 
00494     //loop across the i-th row of A (column of A^T)
00495     for(j=0; j<Aview.numEntriesPerRow[i]; ++j) {
00496 
00497       int Aj = Aindices_i[j];
00498       double Aval = Aval_i[j];
00499 
00500       int global_row;
00501       if (Aview.remote[i]) {
00502   global_row = Aview.importColMap->GID(Aj);
00503       }
00504       else {
00505   global_row = Aview.colMap->GID(Aj);
00506       }
00507 
00508       if (!C.RowMap().MyGID(global_row)) {
00509         continue;
00510       }
00511 
00512       for(k=0; k<Blen; ++k) {
00513         C_row_i[k] = Aval*Bvals_i[k];
00514       }
00515 
00516       //
00517       //Now add this row-update to C.
00518       //
00519 
00520       int err = C_filled ?
00521         C.SumIntoGlobalValues(global_row, Blen, C_row_i, C_colInds)
00522         :
00523         C.InsertGlobalValues(global_row, Blen, C_row_i, C_colInds);
00524 
00525       if (err < 0) {
00526         return(err);
00527       }
00528       if (err > 0) {
00529         if (C_filled) {
00530           //C is Filled, and doesn't have all the necessary nonzero locations.
00531           return(err);
00532         }
00533       }
00534     }
00535   }
00536 
00537   delete [] C_row_i;
00538   delete [] C_colInds;
00539 
00540   return(0);
00541 }
00542 
00543 //kernel method for computing the local portion of C = A^T*B^T
00544 int mult_Atrans_Btrans(CrsMatrixStruct& Aview,
00545            CrsMatrixStruct& Bview,
00546            CrsWrapper& C)
00547 {
00548   int C_firstCol = Aview.rowMap->MinLID();
00549   int C_lastCol = Aview.rowMap->MaxLID();
00550 
00551   int C_firstCol_import = 0;
00552   int C_lastCol_import = -1;
00553 
00554   if (Aview.importColMap != NULL) {
00555     C_firstCol_import = Aview.importColMap->MinLID();
00556     C_lastCol_import = Aview.importColMap->MaxLID();
00557   }
00558 
00559   int C_numCols = C_lastCol - C_firstCol + 1;
00560   int C_numCols_import = C_lastCol_import - C_firstCol_import + 1;
00561 
00562   if (C_numCols_import > C_numCols) C_numCols = C_numCols_import;
00563 
00564   double* dwork = new double[C_numCols];
00565   int* iwork = new int[C_numCols];
00566 
00567   double* C_col_j = dwork;
00568   int* C_inds = iwork;
00569 
00570   //cout << "Aview: " << endl;
00571   //dumpCrsMatrixStruct(Aview);
00572 
00573   //cout << "Bview: " << endl;
00574   //dumpCrsMatrixStruct(Bview);
00575 
00576 
00577   int i, j, k;
00578 
00579   for(j=0; j<C_numCols; ++j) {
00580     C_col_j[j] = 0.0;
00581     C_inds[j] = -1;
00582   }
00583 
00584   int* A_col_inds = Aview.colMap->MyGlobalElements();
00585   int* A_col_inds_import = Aview.importColMap ?
00586     Aview.importColMap->MyGlobalElements() : 0;
00587 
00588   const Epetra_Map* Crowmap = &(C.RowMap());
00589 
00590   //To form C = A^T*B^T, we're going to execute this expression:
00591   //
00592   // C(i,j) = sum_k( A(k,i)*B(j,k) )
00593   //
00594   //Our goal, of course, is to navigate the data in A and B once, without
00595   //performing searches for column-indices, etc. In other words, we avoid
00596   //column-wise operations like the plague...
00597 
00598   int* Brows = Bview.rowMap->MyGlobalElements();
00599 
00600   //loop over the rows of B
00601   for(j=0; j<Bview.numRows; ++j) {
00602     int* Bindices_j = Bview.indices[j];
00603     double* Bvals_j = Bview.values[j];
00604 
00605     int global_col = Brows[j];
00606 
00607     //loop across columns in the j-th row of B and for each corresponding
00608     //row in A, loop across columns and accumulate product
00609     //A(k,i)*B(j,k) into our partial sum quantities in C_col_j. In other
00610     //words, as we stride across B(j,:), we use selected rows in A to
00611     //calculate updates for column j of the result matrix C.
00612 
00613     for(k=0; k<Bview.numEntriesPerRow[j]; ++k) {
00614 
00615       int bk = Bindices_j[k];
00616       double Bval = Bvals_j[k];
00617 
00618       int global_k;
00619       if (Bview.remote[j]) {
00620   global_k = Bview.importColMap->GID(bk);
00621       }
00622       else {
00623   global_k = Bview.colMap->GID(bk);
00624       }
00625 
00626       //get the corresponding row in A
00627       int ak = Aview.rowMap->LID(global_k);
00628       if (ak<0) {
00629   continue;
00630       }
00631 
00632       int* Aindices_k = Aview.indices[ak];
00633       double* Avals_k = Aview.values[ak];
00634 
00635       int C_len = 0;
00636 
00637       if (Aview.remote[ak]) {
00638   for(i=0; i<Aview.numEntriesPerRow[ak]; ++i) {
00639     C_col_j[C_len] = Avals_k[i]*Bval;
00640           C_inds[C_len++] = A_col_inds_import[Aindices_k[i]];
00641   }
00642       }
00643       else {
00644   for(i=0; i<Aview.numEntriesPerRow[ak]; ++i) {
00645     C_col_j[C_len] = Avals_k[i]*Bval;
00646           C_inds[C_len++] = A_col_inds[Aindices_k[i]];
00647   }
00648       }
00649 
00650       //Now loop across the C_col_j values and put non-zeros into C.
00651 
00652       for(i=0; i < C_len ; ++i) {
00653   if (C_col_j[i] == 0.0) continue;
00654 
00655   int global_row = C_inds[i];
00656   if (!Crowmap->MyGID(global_row)) {
00657     continue;
00658   }
00659 
00660   int err = C.SumIntoGlobalValues(global_row, 1, &(C_col_j[i]), &global_col);
00661 
00662   if (err < 0) {
00663     return(err);
00664   }
00665   else {
00666           if (err > 0) {
00667       err = C.InsertGlobalValues(global_row, 1, &(C_col_j[i]), &global_col);
00668       if (err < 0) {
00669               return(err);
00670             }
00671     }
00672   }
00673       }
00674 
00675     }
00676   }
00677 
00678   delete [] dwork;
00679   delete [] iwork;
00680 
00681   return(0);
00682 }
00683 
00684 int import_and_extract_views(const Epetra_CrsMatrix& M,
00685            const Epetra_Map& targetMap,
00686            CrsMatrixStruct& Mview)
00687 {
00688   //The goal of this method is to populate the 'Mview' struct with views of the
00689   //rows of M, including all rows that correspond to elements in 'targetMap'.
00690   //
00691   //If targetMap includes local elements that correspond to remotely-owned rows
00692   //of M, then those remotely-owned rows will be imported into
00693   //'Mview.importMatrix', and views of them will be included in 'Mview'.
00694 
00695   Mview.deleteContents();
00696 
00697   const Epetra_Map& Mrowmap = M.RowMap();
00698 
00699   int numProcs = Mrowmap.Comm().NumProc();
00700 
00701   Mview.numRows = targetMap.NumMyElements();
00702 
00703   int* Mrows = targetMap.MyGlobalElements();
00704 
00705   if (Mview.numRows > 0) {
00706     Mview.numEntriesPerRow = new int[Mview.numRows];
00707     Mview.indices = new int*[Mview.numRows];
00708     Mview.values = new double*[Mview.numRows];
00709     Mview.remote = new bool[Mview.numRows];
00710   }
00711 
00712   Mview.numRemote = 0;
00713 
00714   int i;
00715   for(i=0; i<Mview.numRows; ++i) {
00716     int mlid = Mrowmap.LID(Mrows[i]);
00717     if (mlid < 0) {
00718       Mview.remote[i] = true;
00719       ++Mview.numRemote;
00720     }
00721     else {
00722       EPETRA_CHK_ERR( M.ExtractMyRowView(mlid, Mview.numEntriesPerRow[i],
00723            Mview.values[i], Mview.indices[i]) );
00724       Mview.remote[i] = false;
00725     }
00726   }
00727 
00728   Mview.origRowMap = &(M.RowMap());
00729   Mview.rowMap = &targetMap;
00730   Mview.colMap = &(M.ColMap());
00731   Mview.domainMap = &(M.DomainMap());
00732   Mview.importColMap = NULL;
00733 
00734   if (numProcs < 2) {
00735     if (Mview.numRemote > 0) {
00736       cerr << "EpetraExt::MatrixMatrix::Multiply ERROR, numProcs < 2 but "
00737      << "attempting to import remote matrix rows."<<endl;
00738       return(-1);
00739     }
00740 
00741     //If only one processor we don't need to import any remote rows, so return.
00742     return(0);
00743   }
00744 
00745   //
00746   //Now we will import the needed remote rows of M, if the global maximum
00747   //value of numRemote is greater than 0.
00748   //
00749 
00750   int globalMaxNumRemote = 0;
00751   Mrowmap.Comm().MaxAll(&Mview.numRemote, &globalMaxNumRemote, 1);
00752 
00753   if (globalMaxNumRemote > 0) {
00754     //Create a map that describes the remote rows of M that we need.
00755 
00756     int* MremoteRows = Mview.numRemote>0 ? new int[Mview.numRemote] : NULL;
00757     int offset = 0;
00758     for(i=0; i<Mview.numRows; ++i) {
00759       if (Mview.remote[i]) {
00760   MremoteRows[offset++] = Mrows[i];
00761       }
00762     }
00763 
00764     Epetra_Map MremoteRowMap(-1, Mview.numRemote, MremoteRows,
00765            Mrowmap.IndexBase(), Mrowmap.Comm());
00766 
00767     //Create an importer with target-map MremoteRowMap and 
00768     //source-map Mrowmap.
00769     Epetra_Import importer(MremoteRowMap, Mrowmap);
00770 
00771     //Now create a new matrix into which we can import the remote rows of M
00772     //that we need.
00773     Mview.importMatrix = new Epetra_CrsMatrix(Copy, MremoteRowMap, 1);
00774 
00775     EPETRA_CHK_ERR( Mview.importMatrix->Import(M, importer, Insert) );
00776 
00777     EPETRA_CHK_ERR( Mview.importMatrix->FillComplete(M.DomainMap(), M.RangeMap()) );
00778 
00779     //Finally, use the freshly imported data to fill in the gaps in our views
00780     //of rows of M.
00781     for(i=0; i<Mview.numRows; ++i) {
00782       if (Mview.remote[i]) {
00783   int importLID = MremoteRowMap.LID(Mrows[i]);
00784   EPETRA_CHK_ERR( Mview.importMatrix->ExtractMyRowView(importLID,
00785               Mview.numEntriesPerRow[i],
00786               Mview.values[i],
00787               Mview.indices[i]) );
00788       }
00789     }
00790 
00791     Mview.importColMap = &(Mview.importMatrix->ColMap());
00792 
00793     delete [] MremoteRows;
00794   }
00795 
00796   return(0);
00797 }
00798 
00799 int form_map_union(const Epetra_Map* map1,
00800        const Epetra_Map* map2,
00801        const Epetra_Map*& mapunion)
00802 {
00803   //form the union of two maps
00804 
00805   if (map1 == NULL) {
00806     mapunion = new Epetra_Map(*map2);
00807     return(0);
00808   }
00809 
00810   if (map2 == NULL) {
00811     mapunion = new Epetra_Map(*map1);
00812     return(0);
00813   }
00814 
00815   int map1_len       = map1->NumMyElements();
00816   int* map1_elements = map1->MyGlobalElements();
00817   int map2_len       = map2->NumMyElements();
00818   int* map2_elements = map2->MyGlobalElements();
00819 
00820   int* union_elements = new int[map1_len+map2_len];
00821 
00822   int map1_offset = 0, map2_offset = 0, union_offset = 0;
00823 
00824   while(map1_offset < map1_len && map2_offset < map2_len) {
00825     int map1_elem = map1_elements[map1_offset];
00826     int map2_elem = map2_elements[map2_offset];
00827 
00828     if (map1_elem < map2_elem) {
00829       union_elements[union_offset++] = map1_elem;
00830       ++map1_offset;
00831     }
00832     else if (map1_elem > map2_elem) {
00833       union_elements[union_offset++] = map2_elem;
00834       ++map2_offset;
00835     }
00836     else {
00837       union_elements[union_offset++] = map1_elem;
00838       ++map1_offset;
00839       ++map2_offset;
00840     }
00841   }
00842 
00843   int i;
00844   for(i=map1_offset; i<map1_len; ++i) {
00845     union_elements[union_offset++] = map1_elements[i];
00846   }
00847 
00848   for(i=map2_offset; i<map2_len; ++i) {
00849     union_elements[union_offset++] = map2_elements[i];
00850   }
00851 
00852   mapunion = new Epetra_Map(-1, union_offset, union_elements,
00853           map1->IndexBase(), map1->Comm());
00854 
00855   delete [] union_elements;
00856 
00857   return(0);
00858 }
00859 
00860 Epetra_Map* find_rows_containing_cols(const Epetra_CrsMatrix& M,
00861                                       const Epetra_Map& column_map)
00862 {
00863   //The goal of this function is to find all rows in the matrix M that contain
00864   //column-indices which are in 'column_map'. A map containing those rows is
00865   //returned. (The returned map will then be used as the source row-map for
00866   //importing those rows into an overlapping distribution.)
00867 
00868   std::pair<int,int> my_col_range = get_col_range(column_map);
00869 
00870   const Epetra_Comm& Comm = M.RowMap().Comm();
00871   int num_procs = Comm.NumProc();
00872   int my_proc = Comm.MyPID();
00873   std::vector<int> send_procs;
00874   send_procs.reserve(num_procs-1);
00875   std::vector<int> col_ranges;
00876   col_ranges.reserve(2*(num_procs-1));
00877   for(int p=0; p<num_procs; ++p) {
00878     if (p == my_proc) continue;
00879     send_procs.push_back(p);
00880     col_ranges.push_back(my_col_range.first);
00881     col_ranges.push_back(my_col_range.second);
00882   }
00883 
00884   Epetra_Distributor* distor = Comm.CreateDistributor();
00885 
00886   int num_recv_procs = 0;
00887   int num_send_procs = send_procs.size();
00888   bool deterministic = true;
00889   int* send_procs_ptr = send_procs.size()>0 ? &send_procs[0] : NULL;
00890   distor->CreateFromSends(num_send_procs, send_procs_ptr, deterministic, num_recv_procs);
00891 
00892   int len_import_chars = 0;
00893   char* import_chars = NULL;
00894 
00895   char* export_chars = col_ranges.size()>0 ? reinterpret_cast<char*>(&col_ranges[0]) : NULL;
00896   int err = distor->Do(export_chars, 2*sizeof(int), len_import_chars, import_chars);
00897   if (err != 0) {
00898     std::cout << "ERROR returned from Distributor::Do."<<std::endl;
00899   }
00900  
00901   int* IntImports = reinterpret_cast<int*>(import_chars);
00902   int num_import_pairs = len_import_chars/(2*sizeof(int));
00903   int offset = 0;
00904   std::vector<int> send_procs2;
00905   send_procs2.reserve(num_procs);
00906   std::vector<int> proc_col_ranges;
00907   std::pair<int,int> M_col_range = get_col_range(M);
00908   for(int i=0; i<num_import_pairs; ++i) {
00909     int first_col = IntImports[offset++];
00910     int last_col =  IntImports[offset++];
00911     if (first_col <= M_col_range.second && last_col >= M_col_range.first) {
00912       send_procs2.push_back(send_procs[i]);
00913       proc_col_ranges.push_back(first_col);
00914       proc_col_ranges.push_back(last_col);
00915     }
00916   }
00917 
00918   std::vector<int> send_rows;
00919   std::vector<int> rows_per_send_proc;
00920   pack_outgoing_rows(M, proc_col_ranges, send_rows, rows_per_send_proc);
00921 
00922   Epetra_Distributor* distor2 = Comm.CreateDistributor();
00923 
00924   int* send_procs2_ptr = send_procs2.size()>0 ? &send_procs2[0] : NULL;
00925   num_recv_procs = 0;
00926   err = distor2->CreateFromSends(send_procs2.size(), send_procs2_ptr, deterministic, num_recv_procs);
00927 
00928   export_chars = send_rows.size()>0 ? reinterpret_cast<char*>(&send_rows[0]) : NULL;
00929   int* rows_per_send_proc_ptr = rows_per_send_proc.size()>0 ? &rows_per_send_proc[0] : NULL;
00930   len_import_chars = 0;
00931   err = distor2->Do(export_chars, (int)sizeof(int), rows_per_send_proc_ptr, len_import_chars, import_chars);
00932 
00933   int* recvd_row_ints = reinterpret_cast<int*>(import_chars);
00934   int num_recvd_rows = len_import_chars/sizeof(int);
00935 
00936   Epetra_Map recvd_rows(-1, num_recvd_rows, recvd_row_ints, column_map.IndexBase(), Comm);
00937 
00938   delete distor;
00939   delete distor2;
00940   delete [] import_chars;
00941 
00942   Epetra_Map* result_map = NULL;
00943 
00944   err = form_map_union(&(M.RowMap()), &recvd_rows, (const Epetra_Map*&)result_map);
00945   if (err != 0) {
00946     return(NULL);
00947   }
00948 
00949   return(result_map);
00950 }
00951 
00952 int MatrixMatrix::Multiply(const Epetra_CrsMatrix& A,
00953          bool transposeA,
00954          const Epetra_CrsMatrix& B,
00955          bool transposeB,
00956          Epetra_CrsMatrix& C,
00957                            bool call_FillComplete_on_result)
00958 {
00959   //
00960   //This method forms the matrix-matrix product C = op(A) * op(B), where
00961   //op(A) == A   if transposeA is false,
00962   //op(A) == A^T if transposeA is true,
00963   //and similarly for op(B).
00964   //
00965 
00966   //A and B should already be Filled.
00967   //(Should we go ahead and call FillComplete() on them if necessary?
00968   // or error out? For now, we choose to error out.)
00969   if (!A.Filled() || !B.Filled()) {
00970     EPETRA_CHK_ERR(-1);
00971   }
00972 
00973   //We're going to refer to the different combinations of op(A) and op(B)
00974   //as scenario 1 through 4.
00975 
00976   int scenario = 1;//A*B
00977   if (transposeB && !transposeA) scenario = 2;//A*B^T
00978   if (transposeA && !transposeB) scenario = 3;//A^T*B
00979   if (transposeA && transposeB)  scenario = 4;//A^T*B^T
00980 
00981   //now check size compatibility
00982   int Aouter = transposeA ? A.NumGlobalCols() : A.NumGlobalRows();
00983   int Bouter = transposeB ? B.NumGlobalRows() : B.NumGlobalCols();
00984   int Ainner = transposeA ? A.NumGlobalRows() : A.NumGlobalCols();
00985   int Binner = transposeB ? B.NumGlobalCols() : B.NumGlobalRows();
00986   if (Ainner != Binner) {
00987     cerr << "MatrixMatrix::Multiply: ERROR, inner dimensions of op(A) and op(B) "
00988          << "must match for matrix-matrix product. op(A) is "
00989          <<Aouter<<"x"<<Ainner << ", op(B) is "<<Binner<<"x"<<Bouter<<endl;
00990     return(-1);
00991   }
00992 
00993   //The result matrix C must at least have a row-map that reflects the
00994   //correct row-size. Don't check the number of columns because rectangular
00995   //matrices which were constructed with only one map can still end up
00996   //having the correct capacity and dimensions when filled.
00997   if (Aouter > C.NumGlobalRows()) {
00998     cerr << "MatrixMatrix::Multiply: ERROR, dimensions of result C must "
00999          << "match dimensions of op(A) * op(B). C has "<<C.NumGlobalRows()
01000          << " rows, should have at least "<<Aouter << endl;
01001     return(-1);
01002   }
01003 
01004   //It doesn't matter whether C is already Filled or not. If it is already
01005   //Filled, it must have space allocated for the positions that will be
01006   //referenced in forming C = op(A)*op(B). If it doesn't have enough space,
01007   //we'll error out later when trying to store result values.
01008 
01009   //We're going to need to import remotely-owned sections of A and/or B
01010   //if more than 1 processor is performing this run, depending on the scenario.
01011   int numProcs = A.Comm().NumProc();
01012 
01013   //If we are to use the transpose of A and/or B, we'll need to be able to 
01014   //access, on the local processor, all rows that contain column-indices in
01015   //the domain-map.
01016   const Epetra_Map* domainMap_A = &(A.DomainMap());
01017   const Epetra_Map* domainMap_B = &(B.DomainMap());
01018 
01019   const Epetra_Map* rowmap_A = &(A.RowMap());
01020   const Epetra_Map* rowmap_B = &(B.RowMap());
01021 
01022   //Declare some 'work-space' maps which may be created depending on
01023   //the scenario, and which will be deleted before exiting this function.
01024   const Epetra_Map* workmap1 = NULL;
01025   const Epetra_Map* workmap2 = NULL;
01026   const Epetra_Map* mapunion1 = NULL;
01027 
01028   //Declare a couple of structs that will be used to hold views of the data
01029   //of A and B, to be used for fast access during the matrix-multiplication.
01030   CrsMatrixStruct Aview;
01031   CrsMatrixStruct Bview;
01032 
01033   const Epetra_Map* targetMap_A = rowmap_A;
01034   const Epetra_Map* targetMap_B = rowmap_B;
01035 
01036   if (numProcs > 1) {
01037     //If op(A) = A^T, find all rows of A that contain column-indices in the
01038     //local portion of the domain-map. (We'll import any remote rows
01039     //that fit this criteria onto the local processor.)
01040     if (transposeA) {
01041       workmap1 = find_rows_containing_cols(A, *domainMap_A);
01042       targetMap_A = workmap1;
01043     }
01044   }
01045 
01046   //Now import any needed remote rows and populate the Aview struct.
01047   EPETRA_CHK_ERR( import_and_extract_views(A, *targetMap_A, Aview) );
01048 
01049   //We will also need local access to all rows of B that correspond to the
01050   //column-map of op(A).
01051   if (numProcs > 1) {
01052     const Epetra_Map* colmap_op_A = NULL;
01053     if (transposeA) {
01054       colmap_op_A = targetMap_A;
01055     }
01056     else {
01057       colmap_op_A = &(A.ColMap());
01058     }
01059 
01060     targetMap_B = colmap_op_A;
01061 
01062     //If op(B) = B^T, find all rows of B that contain column-indices in the
01063     //local-portion of the domain-map, or in the column-map of op(A).
01064     //We'll import any remote rows that fit this criteria onto the
01065     //local processor.
01066     if (transposeB) {
01067       EPETRA_CHK_ERR( form_map_union(colmap_op_A, domainMap_B, mapunion1) );
01068       workmap2 = find_rows_containing_cols(B, *mapunion1);
01069       targetMap_B = workmap2;
01070     }
01071   }
01072 
01073   //Now import any needed remote rows and populate the Bview struct.
01074   EPETRA_CHK_ERR( import_and_extract_views(B, *targetMap_B, Bview) );
01075 
01076   //If the result matrix C is not already FillComplete'd, we will do a
01077   //preprocessing step to create the nonzero structure, then call FillComplete,
01078   if (!C.Filled()) {
01079     CrsWrapper_GraphBuilder crsgraphbuilder(C.RowMap());
01080 
01081     //pass the graph-builder object to the multiplication kernel to fill in all
01082     //the nonzero positions that will be used in the result matrix.
01083     switch(scenario) {
01084     case 1:    EPETRA_CHK_ERR( mult_A_B(Aview, Bview, crsgraphbuilder) );
01085       break;
01086     case 2:    EPETRA_CHK_ERR( mult_A_Btrans(Aview, Bview, crsgraphbuilder) );
01087       break;
01088     case 3:    EPETRA_CHK_ERR( mult_Atrans_B(Aview, Bview, crsgraphbuilder) );
01089       break;
01090     case 4:    EPETRA_CHK_ERR( mult_Atrans_Btrans(Aview, Bview, crsgraphbuilder) );
01091       break;
01092     }
01093 
01094     //now insert all of the nonzero positions into the result matrix.
01095     insert_matrix_locations(crsgraphbuilder, C);
01096 
01097     if (call_FillComplete_on_result) {
01098       const Epetra_Map* domainmap =
01099         transposeB ? &(B.RangeMap()) : &(B.DomainMap());
01100 
01101       const Epetra_Map* rangemap =
01102         transposeA ? &(A.DomainMap()) : &(A.RangeMap());
01103 
01104       EPETRA_CHK_ERR( C.FillComplete(*domainmap, *rangemap) );
01105       call_FillComplete_on_result = false;
01106     }
01107   }
01108 
01109   //Pre-zero the result matrix:
01110   C.PutScalar(0.0);
01111 
01112   //Now call the appropriate method to perform the actual multiplication.
01113 
01114   CrsWrapper_Epetra_CrsMatrix ecrsmat(C);
01115 
01116   switch(scenario) {
01117   case 1:    EPETRA_CHK_ERR( mult_A_B(Aview, Bview, ecrsmat) );
01118     break;
01119   case 2:    EPETRA_CHK_ERR( mult_A_Btrans(Aview, Bview, ecrsmat) );
01120     break;
01121   case 3:    EPETRA_CHK_ERR( mult_Atrans_B(Aview, Bview, ecrsmat) );
01122     break;
01123   case 4:    EPETRA_CHK_ERR( mult_Atrans_Btrans(Aview, Bview, ecrsmat) );
01124     break;
01125   }
01126 
01127   if (call_FillComplete_on_result) {
01128     //We'll call FillComplete on the C matrix before we exit, and give
01129     //it a domain-map and a range-map.
01130     //The domain-map will be the domain-map of B, unless
01131     //op(B)==transpose(B), in which case the range-map of B will be used.
01132     //The range-map will be the range-map of A, unless
01133     //op(A)==transpose(A), in which case the domain-map of A will be used.
01134 
01135     const Epetra_Map* domainmap =
01136       transposeB ? &(B.RangeMap()) : &(B.DomainMap());
01137 
01138     const Epetra_Map* rangemap =
01139       transposeA ? &(A.DomainMap()) : &(A.RangeMap());
01140 
01141     if (!C.Filled()) {
01142       EPETRA_CHK_ERR( C.FillComplete(*domainmap, *rangemap) );
01143     }
01144   }
01145 
01146   //Finally, delete the objects that were potentially created
01147   //during the course of importing remote sections of A and B.
01148 
01149   delete mapunion1; mapunion1 = NULL;
01150   delete workmap1; workmap1 = NULL;
01151   delete workmap2; workmap2 = NULL;
01152 
01153   return(0);
01154 }
01155 
01156 int MatrixMatrix::Add(const Epetra_CrsMatrix& A,
01157                       bool transposeA,
01158                       double scalarA,
01159                       Epetra_CrsMatrix& B,
01160                       double scalarB )
01161 {
01162   //
01163   //This method forms the matrix-matrix sum B = scalarA * op(A) + scalarB * B, where
01164 
01165   //A should already be Filled. It doesn't matter whether B is
01166   //already Filled, but if it is, then its graph must already contain
01167   //all nonzero locations that will be referenced in forming the
01168   //sum.
01169 
01170   if (!A.Filled() ) {
01171     std::cerr << "EpetraExt::MatrixMatrix::Add ERROR, input matrix A.Filled() is false, it is required to be true. (Result matrix B is not required to be Filled())."<<std::endl;
01172     EPETRA_CHK_ERR(-1);
01173   }
01174 
01175   //explicit tranpose A formed as necessary
01176   Epetra_CrsMatrix * Aprime = 0;
01177   EpetraExt::RowMatrix_Transpose * Atrans = 0;
01178   if( transposeA )
01179   {
01180     Atrans = new EpetraExt::RowMatrix_Transpose();
01181     Aprime = &(dynamic_cast<Epetra_CrsMatrix&>(((*Atrans)(const_cast<Epetra_CrsMatrix&>(A)))));
01182   }
01183   else
01184     Aprime = const_cast<Epetra_CrsMatrix*>(&A);
01185 
01186   int MaxNumEntries = EPETRA_MAX( A.MaxNumEntries(), B.MaxNumEntries() );
01187   int A_NumEntries, B_NumEntries;
01188   int * A_Indices = new int[MaxNumEntries];
01189   double * A_Values = new double[MaxNumEntries];
01190   int* B_Indices;
01191   double* B_Values;
01192 
01193   int NumMyRows = B.NumMyRows();
01194   int Row, err;
01195   int ierr = 0;
01196 
01197   if( scalarA )
01198   {
01199     //Loop over B's rows and sum into
01200     for( int i = 0; i < NumMyRows; ++i )
01201     {
01202       Row = B.GRID(i);
01203       EPETRA_CHK_ERR( Aprime->ExtractGlobalRowCopy( Row, MaxNumEntries, A_NumEntries, A_Values, A_Indices ) );
01204 
01205       if (scalarB != 1.0) {
01206         if (!B.Filled()) {
01207           EPETRA_CHK_ERR( B.ExtractGlobalRowView( Row, B_NumEntries,
01208                                                   B_Values, B_Indices));
01209         }
01210         else {
01211           EPETRA_CHK_ERR( B.ExtractMyRowView( i, B_NumEntries,
01212                                               B_Values, B_Indices));
01213         }
01214 
01215         for(int jj=0; jj<B_NumEntries; ++jj) {
01216           B_Values[jj] = scalarB*B_Values[jj];
01217         }
01218       }
01219 
01220       if( scalarA != 1.0 ) {
01221         for( int j = 0; j < A_NumEntries; ++j ) A_Values[j] *= scalarA;
01222       }
01223 
01224       if( B.Filled() ) {//Sum In Values
01225         err = B.SumIntoGlobalValues( Row, A_NumEntries, A_Values, A_Indices );
01226         assert( err >= 0 );
01227         if (err < 0) ierr = err;
01228       }
01229       else {
01230         err = B.InsertGlobalValues( Row, A_NumEntries, A_Values, A_Indices );
01231         assert( err == 0 || err == 1 || err == 3 );
01232         if (err < 0) ierr = err;
01233       }
01234     }
01235   }
01236   else {
01237     EPETRA_CHK_ERR( B.Scale(scalarB) );
01238   }
01239 
01240   delete [] A_Indices;
01241   delete [] A_Values;
01242 
01243   if( Atrans ) delete Atrans;
01244 
01245   return(ierr);
01246 }
01247 
01248 int MatrixMatrix::Add(const Epetra_CrsMatrix& A,
01249                       bool transposeA,
01250                       double scalarA,
01251                       const Epetra_CrsMatrix & B,
01252                       bool transposeB,
01253                       double scalarB,
01254                       Epetra_CrsMatrix * & C)
01255 {
01256   //
01257   //This method forms the matrix-matrix sum C = scalarA * op(A) + scalarB * op(B), where
01258 
01259   //A and B should already be Filled. C should be an empty pointer.
01260 
01261   if (!A.Filled() || !B.Filled() ) {
01262      std::cerr << "EpetraExt::MatrixMatrix::Add ERROR, input matrix A.Filled() or B.Filled() is false,"
01263                << "they are required to be true. (Result matrix C should be an empty pointer)" << std::endl;
01264      EPETRA_CHK_ERR(-1);
01265   }
01266 
01267   Epetra_CrsMatrix * Aprime = 0, * Bprime=0;
01268   EpetraExt::RowMatrix_Transpose * Atrans = 0,* Btrans = 0;
01269 
01270   //explicit tranpose A formed as necessary
01271   if( transposeA ) {
01272      Atrans = new EpetraExt::RowMatrix_Transpose();
01273      Aprime = &(dynamic_cast<Epetra_CrsMatrix&>(((*Atrans)(const_cast<Epetra_CrsMatrix&>(A)))));
01274   }
01275   else
01276      Aprime = const_cast<Epetra_CrsMatrix*>(&A);
01277 
01278   //explicit tranpose B formed as necessary
01279   if( transposeB ) {
01280      Btrans = new EpetraExt::RowMatrix_Transpose();
01281      Bprime = &(dynamic_cast<Epetra_CrsMatrix&>(((*Btrans)(const_cast<Epetra_CrsMatrix&>(B)))));
01282   }
01283   else
01284      Bprime = const_cast<Epetra_CrsMatrix*>(&B);
01285 
01286   // allocate or zero the new matrix
01287   if(C!=0)
01288      C->PutScalar(0.0);
01289   else
01290      C = new Epetra_CrsMatrix(Copy,Aprime->RowMap(),0);
01291 
01292   // build arrays  for easy resuse
01293   int ierr = 0;
01294   Epetra_CrsMatrix * Mat[] = { Aprime,Bprime};
01295   double scalar[] = { scalarA, scalarB};
01296 
01297   // do a loop over each matrix to add: A reordering might be more efficient
01298   for(int k=0;k<2;k++) {
01299      int MaxNumEntries = Mat[k]->MaxNumEntries();
01300      int NumEntries;
01301      int * Indices = new int[MaxNumEntries];
01302      double * Values = new double[MaxNumEntries];
01303    
01304      int NumMyRows = Mat[k]->NumMyRows();
01305      int Row, err;
01306      int ierr = 0;
01307    
01308      //Loop over rows and sum into C
01309      for( int i = 0; i < NumMyRows; ++i ) {
01310         Row = Mat[k]->GRID(i);
01311         EPETRA_CHK_ERR( Mat[k]->ExtractGlobalRowCopy( Row, MaxNumEntries, NumEntries, Values, Indices));
01312    
01313         if( scalar[k] != 1.0 )
01314            for( int j = 0; j < NumEntries; ++j ) Values[j] *= scalar[k];
01315    
01316         if(C->Filled()) { // Sum in values
01317            err = C->SumIntoGlobalValues( Row, NumEntries, Values, Indices );
01318            if (err < 0) ierr = err;
01319         } else { // just add it to the unfilled CRS Matrix
01320            err = C->InsertGlobalValues( Row, NumEntries, Values, Indices );
01321            if (err < 0) ierr = err;
01322         }
01323      }
01324 
01325      delete [] Indices;
01326      delete [] Values;
01327   }
01328 
01329   if( Atrans ) delete Atrans;
01330   if( Btrans ) delete Btrans;
01331 
01332   return(ierr);
01333 }
01334 
01335 } // namespace EpetraExt
01336 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines