EpetraExt Package Browser (Single Doxygen Collection) Development
EpetraExt_MatrixMatrix.cpp
Go to the documentation of this file.
00001 //@HEADER
00002 // ***********************************************************************
00003 //
00004 //     EpetraExt: Epetra Extended - Linear Algebra Services Package
00005 //                 Copyright (2011) Sandia Corporation
00006 //
00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00008 // the U.S. Government retains certain rights in this software.
00009 //
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00038 //
00039 // ***********************************************************************
00040 //@HEADER
00041 
00042 #include <EpetraExt_ConfigDefs.h>
00043 #include <EpetraExt_MatrixMatrix.h>
00044 
00045 #include <EpetraExt_MMHelpers.h>
00046 
00047 #include <EpetraExt_Transpose_RowMatrix.h>
00048 
00049 #include <Epetra_Export.h>
00050 #include <Epetra_Import.h>
00051 #include <Epetra_Util.h>
00052 #include <Epetra_Map.h>
00053 #include <Epetra_Comm.h>
00054 #include <Epetra_CrsMatrix.h>
00055 #include <Epetra_Directory.h>
00056 #include <Epetra_HashTable.h>
00057 #include <Epetra_Distributor.h>
00058 
00059 #ifdef HAVE_VECTOR
00060 #include <vector>
00061 #endif
00062 
00063 namespace EpetraExt {
00064 
00065 //
00066 //Method for internal use... sparsedot forms a dot-product between two
00067 //sparsely-populated 'vectors'.
00068 //Important assumption: assumes the indices in u_ind and v_ind are sorted.
00069 //
00070 double sparsedot(double* u, int* u_ind, int u_len,
00071      double* v, int* v_ind, int v_len)
00072 {
00073   double result = 0.0;
00074 
00075   int v_idx = 0;
00076   int u_idx = 0;
00077 
00078   while(v_idx < v_len && u_idx < u_len) {
00079     int ui = u_ind[u_idx];
00080     int vi = v_ind[v_idx];
00081 
00082     if (ui < vi) {
00083       ++u_idx;
00084     }
00085     else if (ui > vi) {
00086       ++v_idx;
00087     }
00088     else {
00089       result += u[u_idx++]*v[v_idx++];
00090     }
00091   }
00092 
00093   return(result);
00094 }
00095 
00096 
00097 
00098 
00099 //kernel method for computing the local portion of C = A*B
00100 int mult_A_B(CrsMatrixStruct& Aview,
00101        CrsMatrixStruct& Bview,
00102        CrsWrapper& C)
00103 {
00104   int C_firstCol = Bview.colMap->MinLID();
00105   int C_lastCol = Bview.colMap->MaxLID();
00106 
00107   int C_firstCol_import = 0;
00108   int C_lastCol_import = -1;
00109 
00110   int* bcols = Bview.colMap->MyGlobalElements();
00111   int* bcols_import = NULL;
00112   if (Bview.importColMap != NULL) {
00113     C_firstCol_import = Bview.importColMap->MinLID();
00114     C_lastCol_import = Bview.importColMap->MaxLID();
00115 
00116     bcols_import = Bview.importColMap->MyGlobalElements();
00117   }
00118 
00119   int C_numCols = C_lastCol - C_firstCol + 1;
00120   int C_numCols_import = C_lastCol_import - C_firstCol_import + 1;
00121 
00122   if (C_numCols_import > C_numCols) C_numCols = C_numCols_import;
00123 
00124   // Allocate workspace memory
00125   double* dwork = new double[C_numCols];
00126   int* iwork = new int[C_numCols];
00127   int *c_cols=iwork;
00128   double *c_vals=dwork;
00129   int *c_index=new int[C_numCols];
00130 
00131   int  i, j, k;
00132   bool C_filled=C.Filled();
00133 
00134 
00135   //To form C = A*B we're going to execute this expression:
00136   //
00137   // C(i,j) = sum_k( A(i,k)*B(k,j) )
00138   //
00139   //Our goal, of course, is to navigate the data in A and B once, without
00140   //performing searches for column-indices, etc.
00141 
00142 
00143   // Run through all the hash table lookups once and for all
00144   int * Acol2Brow=new int[Aview.colMap->NumMyElements()];
00145   if(Aview.colMap->SameAs(*Bview.rowMap)){
00146     // Maps are the same: Use local IDs as the hash
00147     for(int i=0;i<Aview.colMap->NumMyElements();i++)
00148       Acol2Brow[i]=i;       
00149   }
00150   else {
00151     // Maps are not the same:  Use the map's hash
00152     for(int i=0;i<Aview.colMap->NumMyElements();i++)
00153       Acol2Brow[i]=Bview.rowMap->LID(Aview.colMap->GID(i));
00154   }
00155   
00156   // Mark indices as empty w/ -1
00157   for(k=0;k<C_numCols;k++) c_index[k]=-1;
00158 
00159   //loop over the rows of A.
00160   for(i=0; i<Aview.numRows; ++i) {
00161     //only navigate the local portion of Aview... (It's probable that we
00162     //imported more of A than we need for A*B, because other cases like A^T*B 
00163     //need the extra rows.)
00164     if (Aview.remote[i]) {
00165       continue;
00166     }
00167 
00168     int* Aindices_i = Aview.indices[i];
00169     double* Aval_i  = Aview.values[i];
00170 
00171     int global_row = Aview.rowMap->GID(i);
00172 
00173     //loop across the i-th row of A and for each corresponding row
00174     //in B, loop across colums and accumulate product
00175     //A(i,k)*B(k,j) into our partial sum quantities C_row_i. In other words,
00176     //as we stride across B(k,:) we're calculating updates for row i of the
00177     //result matrix C.
00178 
00179     /* Outline of the revised, ML-inspired algorithm
00180  
00181     C_{i,j} = \sum_k A_{i,k} B_{k,j}
00182 
00183     This algorithm uses a "middle product" formulation, with the loop ordering of 
00184     i, k, j.  This means we compute a row of C at a time, but compute partial sums of 
00185     each entry in row i until we finish the k loop.
00186 
00187     This algorithm also has a few twists worth documenting.  
00188 
00189     1) First off, we compute the Acol2Brow array above.  This array answers the question:
00190 
00191     "Given a LCID in A, what LRID in B corresponds to it?"
00192 
00193     Since this involves LID calls, this is a hash table lookup.  Thus we do this once for every 
00194     LCID in A, rather than call LID inside the MMM loop.
00195 
00196     2) The second major twist involves the c_index, c_cols and c_vals arrays.  The arrays c_cols 
00197     and c_vals store the *local* column index and values accumulator respectively.  These 
00198     arrays are allocated to a size equal to the max number of local columns in C, namely C_numcols.
00199     The value c_current tells us how many non-zeros we currently have in this row.
00200     
00201     So how do we take a LCID and find the right accumulator?  This is where the c_index array
00202     comes in.  At the start (and stop) and the i loop, c_index is filled with -1's.  Now 
00203     whenever we find a LCID in the k loop, we first loop at c_index[lcid].  If this value is
00204     -1 we haven't seen this entry yet.  In which case we add the appropriate stuff to c_cols
00205     and c_vals and then set c_index[lcid] to the location of the accumulator (c_current before
00206     we increment it).  If the value is NOT -1, this tells us the location in the c_vals/c_cols
00207     arrays (namely c_index[lcid]) where our accumulator lives.
00208 
00209     This trick works because we're working with local ids.  We can then loop from 0 to c_current
00210     and reset c_index to -1's when we're done, only touching the arrays that have changed.
00211     While we're at it, we can switch to the global ids so we can call [Insert|SumInto]GlobalValues.
00212     Thus, the effect of this trick is to avoid passes over the index array.
00213 
00214     3) The third major twist involves handling the remote and local components of B separately.
00215     (ML doesn't need to do this, because its local ordering scheme is consistent between the local
00216     and imported components of B.)  Since they have different column maps, they have inconsistent 
00217     local column ids.  This means the "second twist" won't work as stated on both matrices at the 
00218     same time.  While this could be handled any number of ways, I have chosen to do the two parts 
00219     of B separately to make the code easier to read (and reduce the memory footprint of the MMM).
00220     */
00221 
00222     // Local matrix: Zero Current counts for matrix
00223     int c_current=0;    
00224 
00225     // Local matrix: Do the "middle product"
00226     for(k=0; k<Aview.numEntriesPerRow[i]; ++k) {
00227       int Ak=Acol2Brow[Aindices_i[k]];
00228       double Aval = Aval_i[k];
00229       // We're skipping remote entries on this pass.
00230       if(Bview.remote[Ak] || Aval==0) continue;
00231       
00232       int* Bcol_inds = Bview.indices[Ak];
00233       double* Bvals_k = Bview.values[Ak];
00234 
00235       for(j=0; j<Bview.numEntriesPerRow[Ak]; ++j) {
00236   int col=Bcol_inds[j];
00237   if(c_index[col]<0){
00238     // We haven't seen this entry before; add it.  (In ML, on
00239     // the first pass, you haven't seen any of the entries
00240     // before, so they are added without the check.  Not sure
00241     // how much more efficient that would be; depends on branch
00242     // prediction.  We've favored code readability here.)
00243     c_cols[c_current]=col;        
00244     c_vals[c_current]=Aval*Bvals_k[j];
00245     c_index[col]=c_current;
00246     c_current++;
00247   }
00248   else{ 
00249     // We've already seen this entry; accumulate it.
00250     c_vals[c_index[col]]+=Aval*Bvals_k[j];      
00251   }
00252       }
00253     }    
00254     // Local matrix: Reset c_index and switch c_cols to GIDs
00255     for(k=0; k<c_current; k++){
00256       c_index[c_cols[k]]=-1;
00257       c_cols[k]=bcols[c_cols[k]]; // Switch from local to global IDs.     
00258     }
00259     // Local matrix: Insert.
00260     //
00261     // We should check global error results after the algorithm is
00262     // through.  It's probably safer just to let the algorithm run all
00263     // the way through before doing this, since otherwise we have to
00264     // remember to free all allocations carefully.
00265     int err = C_filled ?
00266       C.SumIntoGlobalValues(global_row,c_current,c_vals,c_cols)
00267       :
00268       C.InsertGlobalValues(global_row,c_current,c_vals,c_cols);   
00269 
00270     // Remote matrix: Zero current counts again for matrix
00271     c_current=0;    
00272 
00273     // Remote matrix: Do the "middle product"
00274     for(k=0; k<Aview.numEntriesPerRow[i]; ++k) {
00275       int Ak=Acol2Brow[Aindices_i[k]];
00276       double Aval = Aval_i[k];
00277       // We're skipping local entries on this pass.
00278       if(!Bview.remote[Ak] || Aval==0) continue;
00279       
00280       int* Bcol_inds = Bview.indices[Ak];
00281       double* Bvals_k = Bview.values[Ak];
00282 
00283       for(j=0; j<Bview.numEntriesPerRow[Ak]; ++j) {
00284   int col=Bcol_inds[j];
00285   if(c_index[col]<0){
00286     c_cols[c_current]=col;        
00287     c_vals[c_current]=Aval*Bvals_k[j];
00288     c_index[col]=c_current;
00289     c_current++;
00290   }
00291   else{
00292     c_vals[c_index[col]]+=Aval*Bvals_k[j];      
00293   }
00294       }
00295     }    
00296     // Remote matrix: Reset c_index and switch c_cols to GIDs
00297     for(k=0; k<c_current; k++){
00298       c_index[c_cols[k]]=-1;
00299       c_cols[k]=bcols_import[c_cols[k]];      
00300     }
00301     // Remove matrix: Insert
00302     //
00303     // See above (on error handling).
00304     err = C_filled ?
00305       C.SumIntoGlobalValues(global_row,c_current,c_vals,c_cols)
00306       :
00307       C.InsertGlobalValues(global_row,c_current,c_vals,c_cols);    
00308   }
00309   
00310 
00311   delete [] dwork;
00312   delete [] iwork;
00313   delete [] c_index;
00314   delete [] Acol2Brow;
00315   return(0);
00316 }
00317 
00318 //kernel method for computing the local portion of C = A*B^T
00319 int mult_A_Btrans(CrsMatrixStruct& Aview,
00320       CrsMatrixStruct& Bview,
00321       CrsWrapper& C)
00322 {
00323   int i, j, k;
00324   int returnValue = 0;
00325 
00326   int maxlen = 0;
00327   for(i=0; i<Aview.numRows; ++i) {
00328     if (Aview.numEntriesPerRow[i] > maxlen) maxlen = Aview.numEntriesPerRow[i];
00329   }
00330   for(i=0; i<Bview.numRows; ++i) {
00331     if (Bview.numEntriesPerRow[i] > maxlen) maxlen = Bview.numEntriesPerRow[i];
00332   }
00333 
00334   //cout << "Aview: " << endl;
00335   //dumpCrsMatrixStruct(Aview);
00336 
00337   //cout << "Bview: " << endl;
00338   //dumpCrsMatrixStruct(Bview);
00339 
00340   int numBcols = Bview.colMap->NumMyElements();
00341   int numBrows = Bview.numRows;
00342 
00343   int iworklen = maxlen*2 + numBcols;
00344   int* iwork = new int[iworklen];
00345 
00346   int* bcols = iwork+maxlen*2;
00347   int* bgids = Bview.colMap->MyGlobalElements();
00348   double* bvals = new double[maxlen*2];
00349   double* avals = bvals+maxlen;
00350 
00351   int max_all_b = Bview.colMap->MaxAllGID();
00352   int min_all_b = Bview.colMap->MinAllGID();
00353 
00354   //bcols will hold the GIDs from B's column-map for fast access
00355   //during the computations below
00356   for(i=0; i<numBcols; ++i) {
00357     int blid = Bview.colMap->LID(bgids[i]);
00358     bcols[blid] = bgids[i];
00359   }
00360 
00361   //next create arrays indicating the first and last column-index in
00362   //each row of B, so that we can know when to skip certain rows below.
00363   //This will provide a large performance gain for banded matrices, and
00364   //a somewhat smaller gain for *most* other matrices.
00365   int* b_firstcol = new int[2*numBrows];
00366   int* b_lastcol = b_firstcol+numBrows;
00367   int temp;
00368   for(i=0; i<numBrows; ++i) {
00369     b_firstcol[i] = max_all_b;
00370     b_lastcol[i] = min_all_b;
00371 
00372     int Blen_i = Bview.numEntriesPerRow[i];
00373     if (Blen_i < 1) continue;
00374     int* Bindices_i = Bview.indices[i];
00375 
00376     if (Bview.remote[i]) {
00377       for(k=0; k<Blen_i; ++k) {
00378         temp = Bview.importColMap->GID(Bindices_i[k]);
00379         if (temp < b_firstcol[i]) b_firstcol[i] = temp;
00380         if (temp > b_lastcol[i]) b_lastcol[i] = temp;
00381       }
00382     }
00383     else {
00384       for(k=0; k<Blen_i; ++k) {
00385         temp = bcols[Bindices_i[k]];
00386         if (temp < b_firstcol[i]) b_firstcol[i] = temp;
00387         if (temp > b_lastcol[i]) b_lastcol[i] = temp;
00388       }
00389     }
00390   }
00391 
00392   Epetra_Util util;
00393 
00394   int* Aind = iwork;
00395   int* Bind = iwork+maxlen;
00396 
00397   bool C_filled = C.Filled();
00398 
00399   //To form C = A*B^T, we're going to execute this expression:
00400   //
00401   // C(i,j) = sum_k( A(i,k)*B(j,k) )
00402   //
00403   //This is the easiest case of all to code (easier than A*B, A^T*B, A^T*B^T).
00404   //But it requires the use of a 'sparsedot' function (we're simply forming
00405   //dot-products with row A_i and row B_j for all i and j).
00406 
00407   //loop over the rows of A.
00408   for(i=0; i<Aview.numRows; ++i) {
00409     if (Aview.remote[i]) {
00410       continue;
00411     }
00412 
00413     int* Aindices_i = Aview.indices[i];
00414     double* Aval_i  = Aview.values[i];
00415     int A_len_i = Aview.numEntriesPerRow[i];
00416     if (A_len_i < 1) {
00417       continue;
00418     }
00419 
00420     for(k=0; k<A_len_i; ++k) {
00421       Aind[k] = Aview.colMap->GID(Aindices_i[k]);
00422       avals[k] = Aval_i[k];
00423     }
00424 
00425     util.Sort(true, A_len_i, Aind, 1, &avals, 0, NULL);
00426 
00427     int mina = Aind[0];
00428     int maxa = Aind[A_len_i-1];
00429 
00430     if (mina > max_all_b || maxa < min_all_b) {
00431       continue;
00432     }
00433 
00434     int global_row = Aview.rowMap->GID(i);
00435 
00436     //loop over the rows of B and form results C_ij = dot(A(i,:),B(j,:))
00437     for(j=0; j<Bview.numRows; ++j) {
00438       if (b_firstcol[j] > maxa || b_lastcol[j] < mina) {
00439         continue;
00440       }
00441 
00442       int* Bindices_j = Bview.indices[j];
00443       int B_len_j = Bview.numEntriesPerRow[j];
00444       if (B_len_j < 1) {
00445         continue;
00446       }
00447 
00448       int tmp, Blen = 0;
00449 
00450       if (Bview.remote[j]) {
00451         for(k=0; k<B_len_j; ++k) {
00452     tmp = Bview.importColMap->GID(Bindices_j[k]);
00453           if (tmp < mina || tmp > maxa) {
00454             continue;
00455           }
00456 
00457           bvals[Blen] = Bview.values[j][k];
00458           Bind[Blen++] = tmp;
00459   }
00460       }
00461       else {
00462         for(k=0; k<B_len_j; ++k) {
00463     tmp = bcols[Bindices_j[k]];
00464           if (tmp < mina || tmp > maxa) {
00465             continue;
00466           }
00467 
00468           bvals[Blen] = Bview.values[j][k];
00469           Bind[Blen++] = tmp;
00470   }
00471       }
00472 
00473       if (Blen < 1) {
00474         continue;
00475       }
00476 
00477       util.Sort(true, Blen, Bind, 1, &bvals, 0, NULL);
00478 
00479       double C_ij = sparsedot(avals, Aind, A_len_i,
00480             bvals, Bind, Blen);
00481 
00482       if (C_ij == 0.0) {
00483   continue;
00484       }
00485       int global_col = Bview.rowMap->GID(j);
00486 
00487       int err = C_filled ?
00488         C.SumIntoGlobalValues(global_row, 1, &C_ij, &global_col)
00489         :
00490         C.InsertGlobalValues(global_row, 1, &C_ij, &global_col);
00491 
00492       if (err < 0) {
00493   return(err);
00494       }
00495       if (err > 0) {
00496   if (C_filled) {
00497     //C.Filled()==true, and C doesn't have all the necessary nonzero
00498           //locations, or global_row or global_col is out of range (less
00499           //than 0 or non local).
00500           std::cerr << "EpetraExt::MatrixMatrix::Multiply Warning: failed "
00501               << "to insert value in result matrix at position "<<global_row
00502              <<"," <<global_col<<", possibly because result matrix has a "
00503              << "column-map that doesn't include column "<<global_col
00504              <<" on this proc." <<std::endl;
00505     return(err);
00506   }
00507       }
00508     }
00509   }
00510 
00511   delete [] iwork;
00512   delete [] bvals;
00513   delete [] b_firstcol;
00514 
00515   return(returnValue);
00516 }
00517 
00518 //kernel method for computing the local portion of C = A^T*B
00519 int mult_Atrans_B(CrsMatrixStruct& Aview,
00520       CrsMatrixStruct& Bview,
00521       CrsWrapper& C)
00522 {
00523   int C_firstCol = Bview.colMap->MinLID();
00524   int C_lastCol = Bview.colMap->MaxLID();
00525 
00526   int C_firstCol_import = 0;
00527   int C_lastCol_import = -1;
00528 
00529   if (Bview.importColMap != NULL) {
00530     C_firstCol_import = Bview.importColMap->MinLID();
00531     C_lastCol_import = Bview.importColMap->MaxLID();
00532   }
00533 
00534   int C_numCols = C_lastCol - C_firstCol + 1;
00535   int C_numCols_import = C_lastCol_import - C_firstCol_import + 1;
00536 
00537   if (C_numCols_import > C_numCols) C_numCols = C_numCols_import;
00538 
00539   double* C_row_i = new double[C_numCols];
00540   int* C_colInds = new int[C_numCols];
00541 
00542   int i, j, k;
00543 
00544   for(j=0; j<C_numCols; ++j) {
00545     C_row_i[j] = 0.0;
00546     C_colInds[j] = 0;
00547   }
00548 
00549   //To form C = A^T*B, compute a series of outer-product updates.
00550   //
00551   // for (ith column of A^T) { 
00552   //   C_i = outer product of A^T(:,i) and B(i,:)
00553   // Where C_i is the ith matrix update,
00554   //       A^T(:,i) is the ith column of A^T, and
00555   //       B(i,:) is the ith row of B.
00556   //
00557 
00558   //dumpCrsMatrixStruct(Aview);
00559   //dumpCrsMatrixStruct(Bview);
00560   int localProc = Bview.colMap->Comm().MyPID();
00561 
00562   int* Arows = Aview.rowMap->MyGlobalElements();
00563 
00564   bool C_filled = C.Filled();
00565 
00566   //loop over the rows of A (which are the columns of A^T).
00567   for(i=0; i<Aview.numRows; ++i) {
00568 
00569     int* Aindices_i = Aview.indices[i];
00570     double* Aval_i  = Aview.values[i];
00571 
00572     //we'll need to get the row of B corresponding to Arows[i],
00573     //where Arows[i] is the GID of A's ith row.
00574     int Bi = Bview.rowMap->LID(Arows[i]);
00575     if (Bi<0) {
00576       cout << "mult_Atrans_B ERROR, proc "<<localProc<<" needs row "
00577      <<Arows[i]<<" of matrix B, but doesn't have it."<<endl;
00578       return(-1);
00579     }
00580 
00581     int* Bcol_inds = Bview.indices[Bi];
00582     double* Bvals_i = Bview.values[Bi];
00583 
00584     //for each column-index Aj in the i-th row of A, we'll update
00585     //global-row GID(Aj) of the result matrix C. In that row of C,
00586     //we'll update column-indices given by the column-indices in the
00587     //ith row of B that we're now holding (Bcol_inds).
00588 
00589     //First create a list of GIDs for the column-indices
00590     //that we'll be updating.
00591 
00592     int Blen = Bview.numEntriesPerRow[Bi];
00593     if (Bview.remote[Bi]) {
00594       for(j=0; j<Blen; ++j) {
00595         C_colInds[j] = Bview.importColMap->GID(Bcol_inds[j]);
00596       }
00597     }
00598     else {
00599       for(j=0; j<Blen; ++j) {
00600         C_colInds[j] = Bview.colMap->GID(Bcol_inds[j]);
00601       }
00602     }
00603 
00604     //loop across the i-th row of A (column of A^T)
00605     for(j=0; j<Aview.numEntriesPerRow[i]; ++j) {
00606 
00607       int Aj = Aindices_i[j];
00608       double Aval = Aval_i[j];
00609 
00610       int global_row;
00611       if (Aview.remote[i]) {
00612   global_row = Aview.importColMap->GID(Aj);
00613       }
00614       else {
00615   global_row = Aview.colMap->GID(Aj);
00616       }
00617 
00618       if (!C.RowMap().MyGID(global_row)) {
00619         continue;
00620       }
00621 
00622       for(k=0; k<Blen; ++k) {
00623         C_row_i[k] = Aval*Bvals_i[k];
00624       }
00625 
00626       //
00627       //Now add this row-update to C.
00628       //
00629 
00630       int err = C_filled ?
00631         C.SumIntoGlobalValues(global_row, Blen, C_row_i, C_colInds)
00632         :
00633         C.InsertGlobalValues(global_row, Blen, C_row_i, C_colInds);
00634 
00635       if (err < 0) {
00636         return(err);
00637       }
00638       if (err > 0) {
00639         if (C_filled) {
00640           //C is Filled, and doesn't have all the necessary nonzero locations.
00641           return(err);
00642         }
00643       }
00644     }
00645   }
00646 
00647   delete [] C_row_i;
00648   delete [] C_colInds;
00649 
00650   return(0);
00651 }
00652 
00653 //kernel method for computing the local portion of C = A^T*B^T
00654 int mult_Atrans_Btrans(CrsMatrixStruct& Aview,
00655            CrsMatrixStruct& Bview,
00656            CrsWrapper& C)
00657 {
00658   int C_firstCol = Aview.rowMap->MinLID();
00659   int C_lastCol = Aview.rowMap->MaxLID();
00660 
00661   int C_firstCol_import = 0;
00662   int C_lastCol_import = -1;
00663 
00664   if (Aview.importColMap != NULL) {
00665     C_firstCol_import = Aview.importColMap->MinLID();
00666     C_lastCol_import = Aview.importColMap->MaxLID();
00667   }
00668 
00669   int C_numCols = C_lastCol - C_firstCol + 1;
00670   int C_numCols_import = C_lastCol_import - C_firstCol_import + 1;
00671 
00672   if (C_numCols_import > C_numCols) C_numCols = C_numCols_import;
00673 
00674   double* dwork = new double[C_numCols];
00675   int* iwork = new int[C_numCols];
00676 
00677   double* C_col_j = dwork;
00678   int* C_inds = iwork;
00679 
00680   //cout << "Aview: " << endl;
00681   //dumpCrsMatrixStruct(Aview);
00682 
00683   //cout << "Bview: " << endl;
00684   //dumpCrsMatrixStruct(Bview);
00685 
00686 
00687   int i, j, k;
00688 
00689   for(j=0; j<C_numCols; ++j) {
00690     C_col_j[j] = 0.0;
00691     C_inds[j] = -1;
00692   }
00693 
00694   int* A_col_inds = Aview.colMap->MyGlobalElements();
00695   int* A_col_inds_import = Aview.importColMap ?
00696     Aview.importColMap->MyGlobalElements() : 0;
00697 
00698   const Epetra_Map* Crowmap = &(C.RowMap());
00699 
00700   //To form C = A^T*B^T, we're going to execute this expression:
00701   //
00702   // C(i,j) = sum_k( A(k,i)*B(j,k) )
00703   //
00704   //Our goal, of course, is to navigate the data in A and B once, without
00705   //performing searches for column-indices, etc. In other words, we avoid
00706   //column-wise operations like the plague...
00707 
00708   int* Brows = Bview.rowMap->MyGlobalElements();
00709 
00710   //loop over the rows of B
00711   for(j=0; j<Bview.numRows; ++j) {
00712     int* Bindices_j = Bview.indices[j];
00713     double* Bvals_j = Bview.values[j];
00714 
00715     int global_col = Brows[j];
00716 
00717     //loop across columns in the j-th row of B and for each corresponding
00718     //row in A, loop across columns and accumulate product
00719     //A(k,i)*B(j,k) into our partial sum quantities in C_col_j. In other
00720     //words, as we stride across B(j,:), we use selected rows in A to
00721     //calculate updates for column j of the result matrix C.
00722 
00723     for(k=0; k<Bview.numEntriesPerRow[j]; ++k) {
00724 
00725       int bk = Bindices_j[k];
00726       double Bval = Bvals_j[k];
00727 
00728       int global_k;
00729       if (Bview.remote[j]) {
00730   global_k = Bview.importColMap->GID(bk);
00731       }
00732       else {
00733   global_k = Bview.colMap->GID(bk);
00734       }
00735 
00736       //get the corresponding row in A
00737       int ak = Aview.rowMap->LID(global_k);
00738       if (ak<0) {
00739   continue;
00740       }
00741 
00742       int* Aindices_k = Aview.indices[ak];
00743       double* Avals_k = Aview.values[ak];
00744 
00745       int C_len = 0;
00746 
00747       if (Aview.remote[ak]) {
00748   for(i=0; i<Aview.numEntriesPerRow[ak]; ++i) {
00749     C_col_j[C_len] = Avals_k[i]*Bval;
00750           C_inds[C_len++] = A_col_inds_import[Aindices_k[i]];
00751   }
00752       }
00753       else {
00754   for(i=0; i<Aview.numEntriesPerRow[ak]; ++i) {
00755     C_col_j[C_len] = Avals_k[i]*Bval;
00756           C_inds[C_len++] = A_col_inds[Aindices_k[i]];
00757   }
00758       }
00759 
00760       //Now loop across the C_col_j values and put non-zeros into C.
00761 
00762       for(i=0; i < C_len ; ++i) {
00763   if (C_col_j[i] == 0.0) continue;
00764 
00765   int global_row = C_inds[i];
00766   if (!Crowmap->MyGID(global_row)) {
00767     continue;
00768   }
00769 
00770   int err = C.SumIntoGlobalValues(global_row, 1, &(C_col_j[i]), &global_col);
00771 
00772   if (err < 0) {
00773     return(err);
00774   }
00775   else {
00776           if (err > 0) {
00777       err = C.InsertGlobalValues(global_row, 1, &(C_col_j[i]), &global_col);
00778       if (err < 0) {
00779               return(err);
00780             }
00781     }
00782   }
00783       }
00784 
00785     }
00786   }
00787 
00788   delete [] dwork;
00789   delete [] iwork;
00790 
00791   return(0);
00792 }
00793 
00794 int import_and_extract_views(const Epetra_CrsMatrix& M,
00795            const Epetra_Map& targetMap,
00796            CrsMatrixStruct& Mview)
00797 {
00798   //The goal of this method is to populate the 'Mview' struct with views of the
00799   //rows of M, including all rows that correspond to elements in 'targetMap'.
00800   //
00801   //If targetMap includes local elements that correspond to remotely-owned rows
00802   //of M, then those remotely-owned rows will be imported into
00803   //'Mview.importMatrix', and views of them will be included in 'Mview'.
00804 
00805   Mview.deleteContents();
00806 
00807   const Epetra_Map& Mrowmap = M.RowMap();
00808 
00809   int numProcs = Mrowmap.Comm().NumProc();
00810 
00811   Mview.numRows = targetMap.NumMyElements();
00812 
00813   int* Mrows = targetMap.MyGlobalElements();
00814 
00815   if (Mview.numRows > 0) {
00816     Mview.numEntriesPerRow = new int[Mview.numRows];
00817     Mview.indices = new int*[Mview.numRows];
00818     Mview.values = new double*[Mview.numRows];
00819     Mview.remote = new bool[Mview.numRows];
00820   }
00821 
00822   Mview.numRemote = 0;
00823 
00824   int i;
00825   for(i=0; i<Mview.numRows; ++i) {
00826     int mlid = Mrowmap.LID(Mrows[i]);
00827     if (mlid < 0) {
00828       Mview.remote[i] = true;
00829       ++Mview.numRemote;
00830     }
00831     else {
00832       EPETRA_CHK_ERR( M.ExtractMyRowView(mlid, Mview.numEntriesPerRow[i],
00833            Mview.values[i], Mview.indices[i]) );
00834       Mview.remote[i] = false;
00835     }
00836   }
00837 
00838   Mview.origRowMap = &(M.RowMap());
00839   Mview.rowMap = &targetMap;
00840   Mview.colMap = &(M.ColMap());
00841   Mview.domainMap = &(M.DomainMap());
00842   Mview.importColMap = NULL;
00843 
00844   if (numProcs < 2) {
00845     if (Mview.numRemote > 0) {
00846       cerr << "EpetraExt::MatrixMatrix::Multiply ERROR, numProcs < 2 but "
00847      << "attempting to import remote matrix rows."<<endl;
00848       return(-1);
00849     }
00850 
00851     //If only one processor we don't need to import any remote rows, so return.
00852     return(0);
00853   }
00854 
00855   //
00856   //Now we will import the needed remote rows of M, if the global maximum
00857   //value of numRemote is greater than 0.
00858   //
00859 
00860   int globalMaxNumRemote = 0;
00861   Mrowmap.Comm().MaxAll(&Mview.numRemote, &globalMaxNumRemote, 1);
00862 
00863   if (globalMaxNumRemote > 0) {
00864     //Create a map that describes the remote rows of M that we need.
00865 
00866     int* MremoteRows = Mview.numRemote>0 ? new int[Mview.numRemote] : NULL;
00867     int offset = 0;
00868     for(i=0; i<Mview.numRows; ++i) {
00869       if (Mview.remote[i]) {
00870   MremoteRows[offset++] = Mrows[i];
00871       }
00872     }
00873 
00874     Epetra_Map MremoteRowMap(-1, Mview.numRemote, MremoteRows,
00875            Mrowmap.IndexBase(), Mrowmap.Comm());
00876 
00877     //Create an importer with target-map MremoteRowMap and 
00878     //source-map Mrowmap.
00879     Epetra_Import importer(MremoteRowMap, Mrowmap);
00880 
00881     //Now create a new matrix into which we can import the remote rows of M
00882     //that we need.
00883     Mview.importMatrix = new Epetra_CrsMatrix(Copy, MremoteRowMap, 1);
00884 
00885     EPETRA_CHK_ERR( Mview.importMatrix->Import(M, importer, Insert) );
00886 
00887     EPETRA_CHK_ERR( Mview.importMatrix->FillComplete(M.DomainMap(), M.RangeMap()) );
00888 
00889     //Finally, use the freshly imported data to fill in the gaps in our views
00890     //of rows of M.
00891     for(i=0; i<Mview.numRows; ++i) {
00892       if (Mview.remote[i]) {
00893   int importLID = MremoteRowMap.LID(Mrows[i]);
00894   EPETRA_CHK_ERR( Mview.importMatrix->ExtractMyRowView(importLID,
00895               Mview.numEntriesPerRow[i],
00896               Mview.values[i],
00897               Mview.indices[i]) );
00898       }
00899     }
00900 
00901     Mview.importColMap = &(Mview.importMatrix->ColMap());
00902 
00903     delete [] MremoteRows;
00904   }
00905 
00906   return(0);
00907 }
00908 
00909 int form_map_union(const Epetra_Map* map1,
00910        const Epetra_Map* map2,
00911        const Epetra_Map*& mapunion)
00912 {
00913   //form the union of two maps
00914 
00915   if (map1 == NULL) {
00916     mapunion = new Epetra_Map(*map2);
00917     return(0);
00918   }
00919 
00920   if (map2 == NULL) {
00921     mapunion = new Epetra_Map(*map1);
00922     return(0);
00923   }
00924 
00925   int map1_len       = map1->NumMyElements();
00926   int* map1_elements = map1->MyGlobalElements();
00927   int map2_len       = map2->NumMyElements();
00928   int* map2_elements = map2->MyGlobalElements();
00929 
00930   int* union_elements = new int[map1_len+map2_len];
00931 
00932   int map1_offset = 0, map2_offset = 0, union_offset = 0;
00933 
00934   while(map1_offset < map1_len && map2_offset < map2_len) {
00935     int map1_elem = map1_elements[map1_offset];
00936     int map2_elem = map2_elements[map2_offset];
00937 
00938     if (map1_elem < map2_elem) {
00939       union_elements[union_offset++] = map1_elem;
00940       ++map1_offset;
00941     }
00942     else if (map1_elem > map2_elem) {
00943       union_elements[union_offset++] = map2_elem;
00944       ++map2_offset;
00945     }
00946     else {
00947       union_elements[union_offset++] = map1_elem;
00948       ++map1_offset;
00949       ++map2_offset;
00950     }
00951   }
00952 
00953   int i;
00954   for(i=map1_offset; i<map1_len; ++i) {
00955     union_elements[union_offset++] = map1_elements[i];
00956   }
00957 
00958   for(i=map2_offset; i<map2_len; ++i) {
00959     union_elements[union_offset++] = map2_elements[i];
00960   }
00961 
00962   mapunion = new Epetra_Map(-1, union_offset, union_elements,
00963           map1->IndexBase(), map1->Comm());
00964 
00965   delete [] union_elements;
00966 
00967   return(0);
00968 }
00969 
00970 Epetra_Map* find_rows_containing_cols(const Epetra_CrsMatrix& M,
00971                                       const Epetra_Map& column_map)
00972 {
00973   //The goal of this function is to find all rows in the matrix M that contain
00974   //column-indices which are in 'column_map'. A map containing those rows is
00975   //returned. (The returned map will then be used as the source row-map for
00976   //importing those rows into an overlapping distribution.)
00977 
00978   std::pair<int,int> my_col_range = get_col_range(column_map);
00979 
00980   const Epetra_Comm& Comm = M.RowMap().Comm();
00981   int num_procs = Comm.NumProc();
00982   int my_proc = Comm.MyPID();
00983   std::vector<int> send_procs;
00984   send_procs.reserve(num_procs-1);
00985   std::vector<int> col_ranges;
00986   col_ranges.reserve(2*(num_procs-1));
00987   for(int p=0; p<num_procs; ++p) {
00988     if (p == my_proc) continue;
00989     send_procs.push_back(p);
00990     col_ranges.push_back(my_col_range.first);
00991     col_ranges.push_back(my_col_range.second);
00992   }
00993 
00994   Epetra_Distributor* distor = Comm.CreateDistributor();
00995 
00996   int num_recv_procs = 0;
00997   int num_send_procs = send_procs.size();
00998   bool deterministic = true;
00999   int* send_procs_ptr = send_procs.size()>0 ? &send_procs[0] : NULL;
01000   distor->CreateFromSends(num_send_procs, send_procs_ptr, deterministic, num_recv_procs);
01001 
01002   int len_import_chars = 0;
01003   char* import_chars = NULL;
01004 
01005   char* export_chars = col_ranges.size()>0 ? reinterpret_cast<char*>(&col_ranges[0]) : NULL;
01006   int err = distor->Do(export_chars, 2*sizeof(int), len_import_chars, import_chars);
01007   if (err != 0) {
01008     std::cout << "ERROR returned from Distributor::Do."<<std::endl;
01009   }
01010  
01011   int* IntImports = reinterpret_cast<int*>(import_chars);
01012   int num_import_pairs = len_import_chars/(2*sizeof(int));
01013   int offset = 0;
01014   std::vector<int> send_procs2;
01015   send_procs2.reserve(num_procs);
01016   std::vector<int> proc_col_ranges;
01017   std::pair<int,int> M_col_range = get_col_range(M);
01018   for(int i=0; i<num_import_pairs; ++i) {
01019     int first_col = IntImports[offset++];
01020     int last_col =  IntImports[offset++];
01021     if (first_col <= M_col_range.second && last_col >= M_col_range.first) {
01022       send_procs2.push_back(send_procs[i]);
01023       proc_col_ranges.push_back(first_col);
01024       proc_col_ranges.push_back(last_col);
01025     }
01026   }
01027 
01028   std::vector<int> send_rows;
01029   std::vector<int> rows_per_send_proc;
01030   pack_outgoing_rows(M, proc_col_ranges, send_rows, rows_per_send_proc);
01031 
01032   Epetra_Distributor* distor2 = Comm.CreateDistributor();
01033 
01034   int* send_procs2_ptr = send_procs2.size()>0 ? &send_procs2[0] : NULL;
01035   num_recv_procs = 0;
01036   err = distor2->CreateFromSends(send_procs2.size(), send_procs2_ptr, deterministic, num_recv_procs);
01037 
01038   export_chars = send_rows.size()>0 ? reinterpret_cast<char*>(&send_rows[0]) : NULL;
01039   int* rows_per_send_proc_ptr = rows_per_send_proc.size()>0 ? &rows_per_send_proc[0] : NULL;
01040   len_import_chars = 0;
01041   err = distor2->Do(export_chars, (int)sizeof(int), rows_per_send_proc_ptr, len_import_chars, import_chars);
01042 
01043   int* recvd_row_ints = reinterpret_cast<int*>(import_chars);
01044   int num_recvd_rows = len_import_chars/sizeof(int);
01045 
01046   Epetra_Map recvd_rows(-1, num_recvd_rows, recvd_row_ints, column_map.IndexBase(), Comm);
01047 
01048   delete distor;
01049   delete distor2;
01050   delete [] import_chars;
01051 
01052   Epetra_Map* result_map = NULL;
01053 
01054   err = form_map_union(&(M.RowMap()), &recvd_rows, (const Epetra_Map*&)result_map);
01055   if (err != 0) {
01056     return(NULL);
01057   }
01058 
01059   return(result_map);
01060 }
01061 
01062 int MatrixMatrix::Multiply(const Epetra_CrsMatrix& A,
01063          bool transposeA,
01064          const Epetra_CrsMatrix& B,
01065          bool transposeB,
01066          Epetra_CrsMatrix& C,
01067                            bool call_FillComplete_on_result)
01068 {
01069   //
01070   //This method forms the matrix-matrix product C = op(A) * op(B), where
01071   //op(A) == A   if transposeA is false,
01072   //op(A) == A^T if transposeA is true,
01073   //and similarly for op(B).
01074   //
01075 
01076   //A and B should already be Filled.
01077   //(Should we go ahead and call FillComplete() on them if necessary?
01078   // or error out? For now, we choose to error out.)
01079   if (!A.Filled() || !B.Filled()) {
01080     EPETRA_CHK_ERR(-1);
01081   }
01082 
01083   //We're going to refer to the different combinations of op(A) and op(B)
01084   //as scenario 1 through 4.
01085 
01086   int scenario = 1;//A*B
01087   if (transposeB && !transposeA) scenario = 2;//A*B^T
01088   if (transposeA && !transposeB) scenario = 3;//A^T*B
01089   if (transposeA && transposeB)  scenario = 4;//A^T*B^T
01090 
01091   //now check size compatibility
01092   int Aouter = transposeA ? A.NumGlobalCols() : A.NumGlobalRows();
01093   int Bouter = transposeB ? B.NumGlobalRows() : B.NumGlobalCols();
01094   int Ainner = transposeA ? A.NumGlobalRows() : A.NumGlobalCols();
01095   int Binner = transposeB ? B.NumGlobalCols() : B.NumGlobalRows();
01096   if (Ainner != Binner) {
01097     cerr << "MatrixMatrix::Multiply: ERROR, inner dimensions of op(A) and op(B) "
01098          << "must match for matrix-matrix product. op(A) is "
01099          <<Aouter<<"x"<<Ainner << ", op(B) is "<<Binner<<"x"<<Bouter<<endl;
01100     return(-1);
01101   }
01102 
01103   //The result matrix C must at least have a row-map that reflects the
01104   //correct row-size. Don't check the number of columns because rectangular
01105   //matrices which were constructed with only one map can still end up
01106   //having the correct capacity and dimensions when filled.
01107   if (Aouter > C.NumGlobalRows()) {
01108     cerr << "MatrixMatrix::Multiply: ERROR, dimensions of result C must "
01109          << "match dimensions of op(A) * op(B). C has "<<C.NumGlobalRows()
01110          << " rows, should have at least "<<Aouter << endl;
01111     return(-1);
01112   }
01113 
01114   //It doesn't matter whether C is already Filled or not. If it is already
01115   //Filled, it must have space allocated for the positions that will be
01116   //referenced in forming C = op(A)*op(B). If it doesn't have enough space,
01117   //we'll error out later when trying to store result values.
01118 
01119   //We're going to need to import remotely-owned sections of A and/or B
01120   //if more than 1 processor is performing this run, depending on the scenario.
01121   int numProcs = A.Comm().NumProc();
01122 
01123   //If we are to use the transpose of A and/or B, we'll need to be able to 
01124   //access, on the local processor, all rows that contain column-indices in
01125   //the domain-map.
01126   const Epetra_Map* domainMap_A = &(A.DomainMap());
01127   const Epetra_Map* domainMap_B = &(B.DomainMap());
01128 
01129   const Epetra_Map* rowmap_A = &(A.RowMap());
01130   const Epetra_Map* rowmap_B = &(B.RowMap());
01131 
01132   //Declare some 'work-space' maps which may be created depending on
01133   //the scenario, and which will be deleted before exiting this function.
01134   const Epetra_Map* workmap1 = NULL;
01135   const Epetra_Map* workmap2 = NULL;
01136   const Epetra_Map* mapunion1 = NULL;
01137 
01138   //Declare a couple of structs that will be used to hold views of the data
01139   //of A and B, to be used for fast access during the matrix-multiplication.
01140   CrsMatrixStruct Aview;
01141   CrsMatrixStruct Bview;
01142 
01143   const Epetra_Map* targetMap_A = rowmap_A;
01144   const Epetra_Map* targetMap_B = rowmap_B;
01145 
01146   if (numProcs > 1) {
01147     //If op(A) = A^T, find all rows of A that contain column-indices in the
01148     //local portion of the domain-map. (We'll import any remote rows
01149     //that fit this criteria onto the local processor.)
01150     if (transposeA) {
01151       workmap1 = find_rows_containing_cols(A, *domainMap_A);
01152       targetMap_A = workmap1;
01153     }
01154   }
01155 
01156   //Now import any needed remote rows and populate the Aview struct.
01157   EPETRA_CHK_ERR( import_and_extract_views(A, *targetMap_A, Aview) );
01158 
01159 
01160   // Make sure B's views are consistent with A even in serial for A*B (scenario 1)
01161   if(scenario==1){
01162     targetMap_B = &(A.ColMap());
01163   }
01164 
01165   if (numProcs > 1) {
01166     // Get the target map
01167     const Epetra_Map* colmap_op_A = NULL;
01168     if (transposeA) {
01169       colmap_op_A = targetMap_A;
01170     }
01171     else {
01172       colmap_op_A = &(A.ColMap());
01173     }
01174     targetMap_B = colmap_op_A;
01175 
01176 
01177     //If op(B) = B^T, find all rows of B that contain column-indices in the
01178     //local-portion of the domain-map, or in the column-map of op(A).
01179     //We'll import any remote rows that fit this criteria onto the
01180     //local processor.
01181     if (transposeB) {
01182       EPETRA_CHK_ERR( form_map_union(colmap_op_A, domainMap_B, mapunion1) );
01183       workmap2 = find_rows_containing_cols(B, *mapunion1);
01184       targetMap_B = workmap2;
01185     }
01186   }
01187 
01188   //Now import any needed remote rows and populate the Bview struct.
01189   EPETRA_CHK_ERR( import_and_extract_views(B, *targetMap_B, Bview) );
01190 
01191   // Zero if filled
01192   if(C.Filled()) C.PutScalar(0.0);
01193 
01194   //Now call the appropriate method to perform the actual multiplication.
01195   CrsWrapper_Epetra_CrsMatrix ecrsmat(C);
01196 
01197   switch(scenario) {
01198   case 1:    EPETRA_CHK_ERR( mult_A_B(Aview, Bview, ecrsmat) );
01199     break;
01200   case 2:    EPETRA_CHK_ERR( mult_A_Btrans(Aview, Bview, ecrsmat) );
01201     break;
01202   case 3:    EPETRA_CHK_ERR( mult_Atrans_B(Aview, Bview, ecrsmat) );
01203     break;
01204   case 4:    EPETRA_CHK_ERR( mult_Atrans_Btrans(Aview, Bview, ecrsmat) );
01205     break;
01206   }
01207 
01208   if (call_FillComplete_on_result) {
01209     //We'll call FillComplete on the C matrix before we exit, and give
01210     //it a domain-map and a range-map.
01211     //The domain-map will be the domain-map of B, unless
01212     //op(B)==transpose(B), in which case the range-map of B will be used.
01213     //The range-map will be the range-map of A, unless
01214     //op(A)==transpose(A), in which case the domain-map of A will be used.
01215 
01216     const Epetra_Map* domainmap =
01217       transposeB ? &(B.RangeMap()) : &(B.DomainMap());
01218 
01219     const Epetra_Map* rangemap =
01220       transposeA ? &(A.DomainMap()) : &(A.RangeMap());
01221 
01222     if (!C.Filled()) {
01223       EPETRA_CHK_ERR( C.FillComplete(*domainmap, *rangemap) );
01224     }
01225   }
01226 
01227   //Finally, delete the objects that were potentially created
01228   //during the course of importing remote sections of A and B.
01229 
01230   delete mapunion1; mapunion1 = NULL;
01231   delete workmap1; workmap1 = NULL;
01232   delete workmap2; workmap2 = NULL;
01233 
01234   return(0);
01235 }
01236 
01237 int MatrixMatrix::Add(const Epetra_CrsMatrix& A,
01238                       bool transposeA,
01239                       double scalarA,
01240                       Epetra_CrsMatrix& B,
01241                       double scalarB )
01242 {
01243   //
01244   //This method forms the matrix-matrix sum B = scalarA * op(A) + scalarB * B, where
01245 
01246   //A should already be Filled. It doesn't matter whether B is
01247   //already Filled, but if it is, then its graph must already contain
01248   //all nonzero locations that will be referenced in forming the
01249   //sum.
01250 
01251   if (!A.Filled() ) {
01252     std::cerr << "EpetraExt::MatrixMatrix::Add ERROR, input matrix A.Filled() is false, it is required to be true. (Result matrix B is not required to be Filled())."<<std::endl;
01253     EPETRA_CHK_ERR(-1);
01254   }
01255 
01256   //explicit tranpose A formed as necessary
01257   Epetra_CrsMatrix * Aprime = 0;
01258   EpetraExt::RowMatrix_Transpose * Atrans = 0;
01259   if( transposeA )
01260   {
01261     Atrans = new EpetraExt::RowMatrix_Transpose();
01262     Aprime = &(dynamic_cast<Epetra_CrsMatrix&>(((*Atrans)(const_cast<Epetra_CrsMatrix&>(A)))));
01263   }
01264   else
01265     Aprime = const_cast<Epetra_CrsMatrix*>(&A);
01266 
01267   int MaxNumEntries = EPETRA_MAX( A.MaxNumEntries(), B.MaxNumEntries() );
01268   int A_NumEntries, B_NumEntries;
01269   int * A_Indices = new int[MaxNumEntries];
01270   double * A_Values = new double[MaxNumEntries];
01271   int* B_Indices;
01272   double* B_Values;
01273 
01274   int NumMyRows = B.NumMyRows();
01275   int Row, err;
01276   int ierr = 0;
01277 
01278   if( scalarA )
01279   {
01280     //Loop over B's rows and sum into
01281     for( int i = 0; i < NumMyRows; ++i )
01282     {
01283       Row = B.GRID(i);
01284       EPETRA_CHK_ERR( Aprime->ExtractGlobalRowCopy( Row, MaxNumEntries, A_NumEntries, A_Values, A_Indices ) );
01285 
01286       if (scalarB != 1.0) {
01287         if (!B.Filled()) {
01288           EPETRA_CHK_ERR( B.ExtractGlobalRowView( Row, B_NumEntries,
01289                                                   B_Values, B_Indices));
01290         }
01291         else {
01292           EPETRA_CHK_ERR( B.ExtractMyRowView( i, B_NumEntries,
01293                                               B_Values, B_Indices));
01294         }
01295 
01296         for(int jj=0; jj<B_NumEntries; ++jj) {
01297           B_Values[jj] = scalarB*B_Values[jj];
01298         }
01299       }
01300 
01301       if( scalarA != 1.0 ) {
01302         for( int j = 0; j < A_NumEntries; ++j ) A_Values[j] *= scalarA;
01303       }
01304 
01305       if( B.Filled() ) {//Sum In Values
01306         err = B.SumIntoGlobalValues( Row, A_NumEntries, A_Values, A_Indices );
01307         assert( err >= 0 );
01308         if (err < 0) ierr = err;
01309       }
01310       else {
01311         err = B.InsertGlobalValues( Row, A_NumEntries, A_Values, A_Indices );
01312         assert( err == 0 || err == 1 || err == 3 );
01313         if (err < 0) ierr = err;
01314       }
01315     }
01316   }
01317   else {
01318     EPETRA_CHK_ERR( B.Scale(scalarB) );
01319   }
01320 
01321   delete [] A_Indices;
01322   delete [] A_Values;
01323 
01324   if( Atrans ) delete Atrans;
01325 
01326   return(ierr);
01327 }
01328 
01329 int MatrixMatrix::Add(const Epetra_CrsMatrix& A,
01330                       bool transposeA,
01331                       double scalarA,
01332                       const Epetra_CrsMatrix & B,
01333                       bool transposeB,
01334                       double scalarB,
01335                       Epetra_CrsMatrix * & C)
01336 {
01337   //
01338   //This method forms the matrix-matrix sum C = scalarA * op(A) + scalarB * op(B), where
01339 
01340   //A and B should already be Filled. C should be an empty pointer.
01341 
01342   if (!A.Filled() || !B.Filled() ) {
01343      std::cerr << "EpetraExt::MatrixMatrix::Add ERROR, input matrix A.Filled() or B.Filled() is false,"
01344                << "they are required to be true. (Result matrix C should be an empty pointer)" << std::endl;
01345      EPETRA_CHK_ERR(-1);
01346   }
01347 
01348   Epetra_CrsMatrix * Aprime = 0, * Bprime=0;
01349   EpetraExt::RowMatrix_Transpose * Atrans = 0,* Btrans = 0;
01350 
01351   //explicit tranpose A formed as necessary
01352   if( transposeA ) {
01353      Atrans = new EpetraExt::RowMatrix_Transpose();
01354      Aprime = &(dynamic_cast<Epetra_CrsMatrix&>(((*Atrans)(const_cast<Epetra_CrsMatrix&>(A)))));
01355   }
01356   else
01357      Aprime = const_cast<Epetra_CrsMatrix*>(&A);
01358 
01359   //explicit tranpose B formed as necessary
01360   if( transposeB ) {
01361      Btrans = new EpetraExt::RowMatrix_Transpose();
01362      Bprime = &(dynamic_cast<Epetra_CrsMatrix&>(((*Btrans)(const_cast<Epetra_CrsMatrix&>(B)))));
01363   }
01364   else
01365      Bprime = const_cast<Epetra_CrsMatrix*>(&B);
01366 
01367   // allocate or zero the new matrix
01368   if(C!=0)
01369      C->PutScalar(0.0);
01370   else
01371      C = new Epetra_CrsMatrix(Copy,Aprime->RowMap(),0);
01372 
01373   // build arrays  for easy resuse
01374   int ierr = 0;
01375   Epetra_CrsMatrix * Mat[] = { Aprime,Bprime};
01376   double scalar[] = { scalarA, scalarB};
01377 
01378   // do a loop over each matrix to add: A reordering might be more efficient
01379   for(int k=0;k<2;k++) {
01380      int MaxNumEntries = Mat[k]->MaxNumEntries();
01381      int NumEntries;
01382      int * Indices = new int[MaxNumEntries];
01383      double * Values = new double[MaxNumEntries];
01384    
01385      int NumMyRows = Mat[k]->NumMyRows();
01386      int Row, err;
01387      int ierr = 0;
01388    
01389      //Loop over rows and sum into C
01390      for( int i = 0; i < NumMyRows; ++i ) {
01391         Row = Mat[k]->GRID(i);
01392         EPETRA_CHK_ERR( Mat[k]->ExtractGlobalRowCopy( Row, MaxNumEntries, NumEntries, Values, Indices));
01393    
01394         if( scalar[k] != 1.0 )
01395            for( int j = 0; j < NumEntries; ++j ) Values[j] *= scalar[k];
01396    
01397         if(C->Filled()) { // Sum in values
01398            err = C->SumIntoGlobalValues( Row, NumEntries, Values, Indices );
01399            if (err < 0) ierr = err;
01400         } else { // just add it to the unfilled CRS Matrix
01401            err = C->InsertGlobalValues( Row, NumEntries, Values, Indices );
01402            if (err < 0) ierr = err;
01403         }
01404      }
01405 
01406      delete [] Indices;
01407      delete [] Values;
01408   }
01409 
01410   if( Atrans ) delete Atrans;
01411   if( Btrans ) delete Btrans;
01412 
01413   return(ierr);
01414 }
01415 
01416 } // namespace EpetraExt
01417 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines