EpetraExt Package Browser (Single Doxygen Collection) Development
EpetraExt_MatrixMatrix_mult_A_B.cpp
Go to the documentation of this file.
00001 //@HEADER
00002 // ***********************************************************************
00003 //
00004 //     EpetraExt: Epetra Extended - Linear Algebra Services Package
00005 //                 Copyright (2011) Sandia Corporation
00006 //
00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00008 // the U.S. Government retains certain rights in this software.
00009 //
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00038 //
00039 // ***********************************************************************
00040 //@HEADER
00041 
00042 #include <EpetraExt_ConfigDefs.h>
00043 #include <EpetraExt_MatrixMatrix.h>
00044 
00045 #include <EpetraExt_MMHelpers.h>
00046 
00047 #include <EpetraExt_Transpose_RowMatrix.h>
00048 
00049 #include <Epetra_Export.h>
00050 #include <Epetra_Import.h>
00051 #include <Epetra_Util.h>
00052 #include <Epetra_Map.h>
00053 #include <Epetra_Comm.h>
00054 #include <Epetra_CrsMatrix.h>
00055 #include <Epetra_Vector.h>
00056 #include <Epetra_Directory.h>
00057 #include <Epetra_HashTable.h>
00058 #include <Epetra_Distributor.h>
00059 #include <Epetra_IntSerialDenseVector.h>
00060 
00061 #ifdef HAVE_VECTOR
00062 #include <vector>
00063 #endif
00064 
00065 #ifdef HAVE_MPI
00066 #include <Epetra_MpiDistributor.h>
00067 #endif
00068 
00069 
00070 #include <Teuchos_TimeMonitor.hpp>
00071 
00072 namespace EpetraExt {
00073 
00074 /*****************************************************************************/
00075 /*****************************************************************************/
00076 /*****************************************************************************/
00077 static inline int C_estimate_nnz(const Epetra_CrsMatrix & A, const Epetra_CrsMatrix &B){
00078   // Follows the NZ estimate in ML's ml_matmatmult.c
00079   int Aest=(A.NumMyRows()>0)? A.NumMyNonzeros()/A.NumMyRows():100;
00080   int Best=(B.NumMyRows()>0)? B.NumMyNonzeros()/B.NumMyRows():100;
00081 
00082   int nnzperrow=(int)(sqrt((double)Aest) + sqrt((double)Best) - 1);
00083   nnzperrow*=nnzperrow;
00084  
00085   return (int)(A.NumMyRows()*nnzperrow*0.75 + 100);
00086 }
00087 
00088 /*****************************************************************************/
00089 /*****************************************************************************/
00090 /*****************************************************************************/
00091 static inline int auto_resize(std::vector<int> &x,int num_new){
00092   int newsize=x.size() + EPETRA_MAX((int)x.size(),num_new);
00093   x.resize(newsize);
00094   return newsize;
00095 }
00096 
00097 
00098 /*****************************************************************************/
00099 /*****************************************************************************/
00100 /*****************************************************************************/
00101 template<typename int_type>
00102 int aztecoo_and_ml_compatible_map_union(const Epetra_CrsMatrix &B, const LightweightCrsMatrix &Bimport, Epetra_Map*& unionmap, std::vector<int>& Cremotepids,
00103           std::vector<int> &Bcols2Ccols, std::vector<int> &Icols2Ccols)
00104 {
00105 #ifdef HAVE_MPI
00106  
00107 #ifdef ENABLE_MMM_TIMINGS
00108   using Teuchos::TimeMonitor;
00109   Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM M5 CMap 1")));
00110 #endif
00111 
00112  // So we need to merge the ColMap of B and the TargetMap of Bimport in an AztecOO/Ifpack/ML compliant order.
00113   Epetra_Util util;
00114   int i,j,MyPID = B.Comm().MyPID(), NumProc = B.Comm().NumProc();
00115   int Bstart=0, Istart=0, Cstart=0,Pstart=0;
00116 
00117   const Epetra_Map & BColMap       = B.ColMap();
00118   const Epetra_Map & DomainMap     = B.DomainMap();
00119   const LightweightMap & IColMap   = Bimport.ColMap_;
00120 
00121   int Nb         = BColMap.NumMyElements();
00122   int_type * Bgids = 0;
00123   BColMap.MyGlobalElementsPtr(Bgids);
00124   int Ni         = IColMap.NumMyElements();
00125   int_type * Igids    = 0;
00126   if(Ni>0)
00127     IColMap.MyGlobalElementsPtr(Igids);
00128 
00129   if((int)Bcols2Ccols.size() != Nb) Bcols2Ccols.resize(Nb);
00130   if((int)Icols2Ccols.size() != Ni) Icols2Ccols.resize(Ni);
00131 
00132   // Since we're getting called, we know we have to be using an MPI implementation of Epetra.  
00133   // Which means we should have an MpiDistributor for both B and Bimport.  
00134   // Unless all of B's columns are owned by the calling proc (e.g. MueLu for A*Ptent w/ uncoupled aggregation)
00135   Epetra_MpiDistributor *Distor=0;
00136   if(B.Importer()) { 
00137     Distor=dynamic_cast<Epetra_MpiDistributor*>(&B.Importer()->Distributor());
00138     if(!Distor) EPETRA_CHK_ERR(-2);
00139   }
00140 
00141   // **********************
00142   // Stage 1: Get the owning PIDs
00143   // **********************
00144   // Note: if B doesn't have an importer, the calling proc owns all its colids...
00145   std::vector<int> Bpids(Nb), Ipids(Ni);
00146   if(B.Importer()) {EPETRA_CHK_ERR(util.GetPids(*B.Importer(),Bpids,true));}
00147   else Bpids.assign(Nb,-1);
00148 
00149   if(Ni != (int) Bimport.ColMapOwningPIDs_.size()) {
00150     EPETRA_CHK_ERR(-21);
00151   }
00152   for(i=0;i<Ni;i++){
00153     Ipids[i] = (Bimport.ColMapOwningPIDs_[i]==MyPID)?(-1):(Bimport.ColMapOwningPIDs_[i]);    
00154   }
00155 
00156   // **********************
00157   // Stage 2: Allocate memory (make things too big)
00158   // **********************
00159   int Csize=Nb+Ni;
00160   int Psize=Nb+Ni;
00161   std::vector<int_type> Cgids(Csize);
00162   Cremotepids.resize(Psize);
00163 
00164 #ifdef ENABLE_MMM_TIMINGS
00165   MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM M5 CMap 2")));
00166 #endif
00167 
00168   // **********************
00169   // Stage 3: Local Unknowns
00170   // **********************
00171   if(!B.Importer() || (B.Importer()->NumSameIDs() == DomainMap.NumMyElements())) {
00172     // B's colmap has all of the domain elements.  We can just copy those into the start of the array.
00173     DomainMap.MyGlobalElements(&Cgids[0]);
00174     Cstart=DomainMap.NumMyElements();
00175     Bstart=DomainMap.NumMyElements();
00176 
00177     for(i=0; i<DomainMap.NumMyElements(); i++) Bcols2Ccols[i] = i;
00178   }
00179   else {
00180     // There are more entries in the DomainMap than B's ColMap.  So we stream through both B and Bimport for the copy.
00181     int NumDomainElements     = DomainMap.NumMyElements();
00182     for(i = 0; i < NumDomainElements; i++) {      
00183       int_type GID = (int_type) DomainMap.GID64(i);
00184       int LID = BColMap.LID(GID);
00185       // B has this guy
00186       if(LID!=-1) {
00187   Bcols2Ccols[LID]=Cstart;
00188   Cgids[Cstart] = GID;
00189   Cstart++;
00190   Bstart++;
00191       }
00192       else {
00193   // B import has this guy
00194   LID = IColMap.LID(GID);
00195   if(LID!=-1) {
00196     Icols2Ccols[LID]=Cstart;
00197     Cgids[Cstart] = GID;
00198     Cstart++;
00199   } 
00200       }
00201     }
00202   }
00203 
00204   // Now advance Ilast to the last owned unknown in Bimport
00205   for(i=0,j=0; i<Ni && Ipids[i]==-1; i++) {
00206     while(Cgids[j]!=Igids[i]) j++;
00207     Icols2Ccols[i]=j;
00208   }
00209   Istart=i;
00210 
00211 
00212 #ifdef ENABLE_MMM_TIMINGS
00213   MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM M5 CMap 3")));
00214 #endif
00215 
00216 
00217   // **********************
00218   // Stage 4: Processor-by-processor set_union
00219   // **********************
00220   // NOTE: Intial sizes for Btemp/Itemp from B's distributor.  This should be exact for Btemp and a decent guess for Itemp
00221   int initial_temp_length = 0;
00222   const int * lengths_from=0;
00223   if(Distor) {
00224     lengths_from= Distor->LengthsFrom();
00225     for(i=0; i < Distor->NumReceives(); i++) initial_temp_length += lengths_from[i];
00226   }
00227   else initial_temp_length=100;
00228 
00229   std::vector<int_type> Btemp(initial_temp_length),  Itemp(initial_temp_length);
00230   std::vector<int> Btemp2(initial_temp_length), Itemp2(initial_temp_length);
00231   
00232 
00233   while (Bstart < Nb || Istart < Ni) {
00234     int Bproc=NumProc+1, Iproc=NumProc+1, Cproc;
00235 
00236     // Find the next active processor ID
00237     if(Bstart < Nb) Bproc=Bpids[Bstart];
00238     if(Istart < Ni) Iproc=Ipids[Istart];
00239 
00240     Cproc = (Bproc < Iproc)?Bproc:Iproc;
00241     
00242     if(Bproc == Cproc && Iproc != Cproc) {
00243       // Only B has this processor.  Copy the data.
00244       // B: Find the beginning of the next processor
00245       for(i=Bstart; i<Nb && Bpids[i]==Bproc; i++) {}
00246       int Bnext=i;
00247       
00248       // Copy data to C
00249       int tCsize = Bnext-Bstart;
00250       if(Btemp.size() < (size_t)tCsize) {Btemp2.resize(tCsize);}
00251 
00252       for(i=Bstart; i<Bnext; i++) {
00253   Cremotepids[i-Bstart+Pstart] = Cproc;
00254   Cgids[i-Bstart+Cstart]       = Bgids[i]; 
00255   Btemp2[i-Bstart]             = i;
00256       }
00257 
00258       // Sort & record reindexing
00259       int *Bptr2 = &Btemp2[0]; 
00260       util.Sort(true, tCsize, &Cgids[Cstart], 0, 0, 1, &Bptr2, 0, 0);
00261 
00262       for(i=0, j=Cstart; i<tCsize; i++){
00263   while(Cgids[j] != Bgids[Btemp2[i]]) j++;
00264   Bcols2Ccols[Btemp2[i]] =  j;  
00265       }
00266       Cstart+=tCsize;
00267       Pstart+=tCsize;
00268       Bstart=Bnext;
00269     }
00270     else if(Bproc != Cproc && Iproc == Cproc) {
00271       // Only I has this processor.  Copy the data.
00272       // I: Find the beginning of the next processor
00273       for(i=Istart; i<Ni && Ipids[i]==Iproc; i++) {}
00274       int Inext=i;
00275 
00276       // Copy data to C
00277       int tCsize = Inext-Istart;
00278       if(Itemp.size() < (size_t)tCsize) {Itemp2.resize(tCsize);}
00279 
00280       for(i=Istart; i<Inext; i++) {
00281   Cremotepids[i-Istart+Pstart] = Cproc;
00282   Cgids[i-Istart+Cstart]       = Igids[i]; 
00283   Itemp2[i-Istart]             = i;
00284       }
00285 
00286       // Sort & record reindexing
00287       int *Iptr2 = &Itemp2[0]; 
00288       util.Sort(true, tCsize, &Cgids[Cstart], 0, 0, 1, &Iptr2, 0, 0);
00289 
00290       for(i=0, j=Cstart; i<tCsize; i++){
00291   while(Cgids[j] != Igids[Itemp2[i]]) j++;
00292   Icols2Ccols[Itemp2[i]] =  j;  
00293       }
00294       Cstart+=tCsize;
00295       Pstart+=tCsize;
00296       Istart=Inext;
00297     }
00298     else {
00299       // Both B and I have this processor, so we need to do a set_union.  So we need to sort.
00300       int Bnext, Inext;
00301       // B: Find the beginning of the next processor
00302       for(i=Bstart; i<Nb && Bpids[i]==Bproc; i++) {}
00303       Bnext=i;
00304 
00305       // I: Find the beginning of the next processor
00306       for(i=Istart; i<Ni && Ipids[i]==Iproc; i++) {}
00307       Inext=i;
00308 
00309       // Copy data to temp
00310       int tBsize = Bnext-Bstart;
00311       int tIsize = Inext-Istart;
00312       //      int tCsize = tBsize+tIsize;
00313       if(Btemp.size() < (size_t)tBsize) {Btemp.resize(tBsize); Btemp2.resize(tBsize);}
00314       if(Itemp.size() < (size_t)tIsize) {Itemp.resize(tIsize); Itemp2.resize(tIsize);}
00315 
00316       for(i=Bstart; i<Bnext; i++) {Btemp[i-Bstart]=Bgids[i]; Btemp2[i-Bstart]=i;}
00317       for(i=Istart; i<Inext; i++) {Itemp[i-Istart]=Igids[i]; Itemp2[i-Istart]=i;}
00318 
00319       // Sort & set_union
00320       int *Bptr2 = &Btemp2[0]; int *Iptr2 = &Itemp2[0];
00321       util.Sort(true, tBsize, &Btemp[0], 0, 0, 1, &Bptr2, 0, 0);
00322       util.Sort(true, tIsize, &Itemp[0], 0, 0, 1, &Iptr2, 0, 0);
00323       typename std::vector<int_type>::iterator mycstart = Cgids.begin()+Cstart;
00324       typename std::vector<int_type>::iterator last_el=std::set_union(Btemp.begin(),Btemp.begin()+tBsize,Itemp.begin(),Itemp.begin()+tIsize,mycstart);
00325 
00326       for(i=0, j=Cstart; i<tBsize; i++){
00327   while(Cgids[j] != Bgids[Btemp2[i]]) j++;
00328   Bcols2Ccols[Btemp2[i]] =  j;  
00329       }
00330  
00331       for(i=0, j=Cstart; i<tIsize; i++){
00332   while(Cgids[j] != Igids[Itemp2[i]]) j++;
00333   Icols2Ccols[Itemp2[i]] =  j;  
00334       }
00335      
00336       for(i=Pstart; i<(last_el - mycstart) + Pstart; i++) Cremotepids[i]=Cproc;
00337       Cstart = (last_el - mycstart) + Cstart;
00338       Pstart = (last_el - mycstart) + Pstart;
00339       Bstart=Bnext;
00340       Istart=Inext;
00341     }
00342   } // end while
00343 
00344 #ifdef ENABLE_MMM_TIMINGS
00345   MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM M5 CMap 4")));
00346 #endif
00347 
00348   // Resize the RemotePIDs down
00349   Cremotepids.resize(Pstart);
00350 
00351   // **********************
00352   // Stage 5: Call constructor
00353   // **********************
00354   // Make the map
00355   unionmap=new Epetra_Map((int_type) -1,Cstart,&Cgids[0], (int_type) B.ColMap().IndexBase64(),
00356     B.Comm(),B.ColMap().DistributedGlobal(),(int_type) B.ColMap().MinAllGID64(),(int_type) B.ColMap().MaxAllGID64());
00357   return 0;
00358 #else
00359   return -1;
00360 #endif
00361   }
00362 
00363 /*****************************************************************************/
00364 /*****************************************************************************/
00365 /*****************************************************************************/
00366 // Provide a "resize" operation for double*'s. 
00367 inline void resize_doubles(int nold,int nnew,double*& d){
00368   if(nnew > nold){
00369     double *tmp = new double[nnew];
00370     for(int i=0; i<nold; i++)
00371       tmp[i]=d[i];
00372     delete [] d;
00373     d=tmp;
00374   }
00375 }
00376 
00377 
00378 /*****************************************************************************/
00379 /*****************************************************************************/
00380 /*****************************************************************************/
00381 template<typename int_type>
00382 int  mult_A_B_newmatrix(const Epetra_CrsMatrix & A,
00383       const Epetra_CrsMatrix & B,
00384       const CrsMatrixStruct& Bview,
00385       std::vector<int> & Bcol2Ccol,
00386       std::vector<int> & Bimportcol2Ccol,
00387       std::vector<int>& Cremotepids,
00388       Epetra_CrsMatrix& C
00389 ){
00390 #ifdef ENABLE_MMM_TIMINGS
00391   using Teuchos::TimeMonitor;
00392   Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Newmatrix SerialCore")));
00393 #endif
00394 
00395   // *****************************
00396   // Improved Parallel Gustavson in Local IDs
00397   // *****************************
00398   const Epetra_Map * colmap_C = &(C.ColMap());
00399 
00400 
00401   int NumMyDiagonals=0; // Counter to speed up ESFC
00402 
00403   int m=A.NumMyRows();
00404   int n=colmap_C->NumMyElements();
00405   int i,j,k;
00406 
00407   // DataPointers for A
00408   int *Arowptr, *Acolind;
00409   double *Avals;
00410   EPETRA_CHK_ERR(A.ExtractCrsDataPointers(Arowptr,Acolind,Avals));
00411 
00412   // DataPointers for B, Bimport
00413   int *Browptr, *Bcolind;
00414   double *Bvals;
00415   EPETRA_CHK_ERR(B.ExtractCrsDataPointers(Browptr,Bcolind,Bvals));
00416 
00417   int *Irowptr=0, *Icolind=0;
00418   double *Ivals=0;
00419   if(Bview.importMatrix){
00420     Irowptr = &Bview.importMatrix->rowptr_[0];
00421     Icolind = (Bview.importMatrix->colind_.size()>0)?(&Bview.importMatrix->colind_[0]):0;
00422     Ivals   = (Bview.importMatrix->vals_.size()>0)?(&Bview.importMatrix->vals_[0]):0;
00423   }
00424 
00425   // MemorySetup: If somebody else is sharing this C's graphdata, make a new one.
00426   // This is needed because I'm about to walk all over the CrsGrapData...
00427   C.ExpertMakeUniqueCrsGraphData();
00428 
00429   // The status array will contain the index into colind where this entry was last deposited.
00430   // c_status[i] < CSR_ip - not in the row yet.
00431   // c_status[i] >= CSR_ip, this is the entry where you can find the data
00432   // We start with this filled with -1's indicating that there are no entries yet.
00433   std::vector<int> c_status(n, -1);
00434 
00435   // Classic csr assembly (low memory edition)
00436   int CSR_alloc=C_estimate_nnz(A,B);
00437   if(CSR_alloc < n) CSR_alloc = n;
00438   int CSR_ip=0,OLD_ip=0;
00439   Epetra_IntSerialDenseVector & CSR_rowptr = C.ExpertExtractIndexOffset();
00440   Epetra_IntSerialDenseVector & CSR_colind = C.ExpertExtractIndices();  
00441   double *&                     CSR_vals   = C.ExpertExtractValues();
00442 
00443   CSR_rowptr.Resize(m+1);
00444   CSR_colind.Resize(CSR_alloc);
00445   resize_doubles(0,CSR_alloc,CSR_vals);
00446 
00447   // Static Profile stuff
00448   std::vector<int> NumEntriesPerRow(m);
00449 
00450   // For each row of A/C
00451   for(i=0; i<m; i++){            
00452     bool found_diagonal=false;
00453     CSR_rowptr[i]=CSR_ip;
00454 
00455     for(k=Arowptr[i]; k<Arowptr[i+1]; k++){
00456       int Ak      = Acolind[k];
00457       double Aval = Avals[k];
00458       if(Aval==0) continue;
00459       
00460       if(Bview.targetMapToOrigRow[Ak] != -1){
00461   // Local matrix
00462   int Bk = Bview.targetMapToOrigRow[Ak];
00463   for(j=Browptr[Bk]; j<Browptr[Bk+1]; ++j) {
00464     int Cj=Bcol2Ccol[Bcolind[j]];
00465 
00466     if(Cj==i && !found_diagonal) {found_diagonal=true; NumMyDiagonals++;}
00467 
00468     if(c_status[Cj]<OLD_ip){
00469       // New entry
00470       c_status[Cj]=CSR_ip;
00471       CSR_colind[CSR_ip]=Cj;
00472       CSR_vals[CSR_ip]=Aval*Bvals[j];
00473       CSR_ip++;
00474     }
00475     else
00476       CSR_vals[c_status[Cj]]+=Aval*Bvals[j];
00477   }
00478       }
00479       else{
00480   // Remote matrix
00481   int Ik = Bview.targetMapToImportRow[Ak];
00482   for(j=Irowptr[Ik]; j<Irowptr[Ik+1]; ++j) {
00483     int Cj=Bimportcol2Ccol[Icolind[j]];
00484     
00485     if(Cj==i && !found_diagonal) {found_diagonal=true; NumMyDiagonals++;}
00486 
00487     if(c_status[Cj]<OLD_ip){
00488       // New entry
00489       c_status[Cj]=CSR_ip;
00490       CSR_colind[CSR_ip]=Cj;
00491       CSR_vals[CSR_ip]=Aval*Ivals[j];
00492       CSR_ip++;
00493     }
00494     else
00495       CSR_vals[c_status[Cj]]+=Aval*Ivals[j];
00496   }
00497       }
00498     }
00499     NumEntriesPerRow[i]=CSR_ip-CSR_rowptr[i];
00500 
00501     // Resize for next pass if needed
00502     if(CSR_ip + n > CSR_alloc){
00503       resize_doubles(CSR_alloc,2*CSR_alloc,CSR_vals);
00504       CSR_alloc*=2;
00505       CSR_colind.Resize(CSR_alloc);
00506     }
00507     OLD_ip=CSR_ip;
00508   }
00509 
00510   CSR_rowptr[m]=CSR_ip;
00511 
00512 #ifdef ENABLE_MMM_TIMINGS
00513   MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Newmatrix Final Sort")));
00514 #endif
00515 
00516   // Sort the entries
00517   Epetra_Util::SortCrsEntries(m, &CSR_rowptr[0], &CSR_colind[0], &CSR_vals[0]);
00518 
00519 #ifdef ENABLE_MMM_TIMINGS
00520   MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Newmatrix Fast IE")));
00521 #endif
00522 
00523   // Do a fast build of C's importer
00524   Epetra_Import * Cimport=0; 
00525   int *RemotePIDs = Cremotepids.size()?&Cremotepids[0]:0;
00526   int NumExports=0;
00527   int *ExportLIDs=0, *ExportPIDs=0;
00528   if(Bview.importMatrix) { 
00529     NumExports = Bview.importMatrix->ExportLIDs_.size();
00530     ExportLIDs = Bview.importMatrix->ExportLIDs_.size()?&Bview.importMatrix->ExportLIDs_[0]:0;
00531     ExportPIDs = Bview.importMatrix->ExportPIDs_.size()?&Bview.importMatrix->ExportPIDs_[0]:0;
00532   }
00533   else if(B.Importer()) {
00534     // Grab the exports from B proper
00535     NumExports = B.Importer()->NumExportIDs();
00536     ExportLIDs = B.Importer()->ExportLIDs();
00537     ExportPIDs = B.Importer()->ExportPIDs();    
00538   }
00539 
00540 
00541   if(B.Importer() && C.ColMap().SameAs(B.ColMap())) 
00542     Cimport = new Epetra_Import(*B.Importer()); // Because the domain maps are the same
00543   else if(!C.ColMap().SameAs(B.DomainMap()))
00544     Cimport = new Epetra_Import(C.ColMap(),B.DomainMap(),Cremotepids.size(),RemotePIDs,NumExports,ExportLIDs,ExportPIDs);
00545 
00546 
00547 #ifdef ENABLE_MMM_TIMINGS
00548   MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Newmatrix ESFC")));
00549 #endif
00550 
00551   // Update the CrsGraphData
00552   C.ExpertStaticFillComplete(B.DomainMap(),A.RangeMap(),Cimport,0,NumMyDiagonals);
00553 
00554   return 0;
00555 }
00556 
00557 
00558 
00559 
00560 /*****************************************************************************/
00561 /*****************************************************************************/
00562 /*****************************************************************************/
00563 template<typename int_type>
00564 int mult_A_B_reuse(const Epetra_CrsMatrix & A,
00565        const Epetra_CrsMatrix & B,
00566        CrsMatrixStruct& Bview,
00567        std::vector<int> & Bcol2Ccol,
00568        std::vector<int> & Bimportcol2Ccol,
00569        Epetra_CrsMatrix& C){
00570 
00571 #ifdef ENABLE_MMM_TIMINGS
00572   using Teuchos::TimeMonitor;
00573   Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Reuse SerialCore")));
00574 #endif
00575 
00576   // *****************************
00577   // Improved Parallel Gustavson in Local IDs
00578   // *****************************
00579   const Epetra_Map * colmap_C = &(C.ColMap());
00580 
00581   int m=A.NumMyRows();
00582   int n=colmap_C->NumMyElements();
00583   int i,j,k;
00584 
00585   // DataPointers for A
00586   int *Arowptr, *Acolind;
00587   double *Avals;
00588   EPETRA_CHK_ERR(A.ExtractCrsDataPointers(Arowptr,Acolind,Avals));
00589 
00590   // DataPointers for B, Bimport
00591   int *Browptr, *Bcolind;
00592   double *Bvals;
00593   EPETRA_CHK_ERR(B.ExtractCrsDataPointers(Browptr,Bcolind,Bvals));
00594 
00595   int *Irowptr=0, *Icolind=0;
00596   double *Ivals=0;
00597   if(Bview.importMatrix){
00598     Irowptr = &Bview.importMatrix->rowptr_[0];
00599     Icolind = &Bview.importMatrix->colind_[0];
00600     Ivals   = &Bview.importMatrix->vals_[0];
00601   }
00602 
00603   // DataPointers for C
00604   int *CSR_rowptr, *CSR_colind;
00605   double *CSR_vals;
00606   EPETRA_CHK_ERR(C.ExtractCrsDataPointers(CSR_rowptr,CSR_colind,CSR_vals));
00607 
00608 
00609   // The status array will contain the index into colind where this dude was last deposited.
00610   // c_status[i] < CSR_ip - not in the row yet.
00611   // c_status[i] >= CSR_ip, this is the entry where you can find the data
00612   // We start with this filled with -1's indicating that there are no entries yet.
00613   std::vector<int> c_status(n, -1);
00614 
00615   // Classic csr assembly
00616   int CSR_alloc=CSR_rowptr[m] - CSR_rowptr[0];
00617   int CSR_ip=0,OLD_ip=0;
00618 
00619  // For each row of A/C
00620   for(i=0; i<m; i++){            
00621     for(k=Arowptr[i]; k<Arowptr[i+1]; k++){
00622       int Ak=Acolind[k];
00623       double Aval = Avals[k];
00624       if(Aval==0) continue;
00625 
00626       if(Bview.targetMapToOrigRow[Ak] != -1){
00627   // Local matrix
00628   int Bk = Bview.targetMapToOrigRow[Ak];
00629   for(j=Browptr[Bk]; j<Browptr[Bk+1]; ++j) {
00630     int Cj=Bcol2Ccol[Bcolind[j]];
00631 
00632     if(c_status[Cj]<OLD_ip){
00633       // New entry
00634       if(CSR_ip >= CSR_alloc) EPETRA_CHK_ERR(-13);
00635       c_status[Cj]=CSR_ip;
00636       CSR_colind[CSR_ip]=Cj;
00637       CSR_vals[CSR_ip]=Aval*Bvals[j];
00638       CSR_ip++;
00639     }
00640     else
00641       CSR_vals[c_status[Cj]]+=Aval*Bvals[j];
00642   }
00643       }
00644       else{
00645   // Remote matrix
00646   int Ik = Bview.targetMapToImportRow[Ak];
00647   for(j=Irowptr[Ik]; j<Irowptr[Ik+1]; ++j) {
00648     int Cj=Bimportcol2Ccol[Icolind[j]];
00649 
00650     if(c_status[Cj]<OLD_ip){
00651       // New entry
00652       if(CSR_ip >= CSR_alloc) EPETRA_CHK_ERR(-14);
00653       c_status[Cj]=CSR_ip;
00654       CSR_colind[CSR_ip]=Cj;
00655       CSR_vals[CSR_ip]=Aval*Ivals[j];
00656       CSR_ip++;
00657     }
00658     else
00659       CSR_vals[c_status[Cj]]+=Aval*Ivals[j];
00660   }
00661       }
00662     }
00663     OLD_ip=CSR_ip;
00664   }
00665 
00666 #ifdef ENABLE_MMM_TIMINGS
00667   MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Reuse Final Sort")));
00668 #endif
00669 
00670   // Sort the entries
00671   Epetra_Util::SortCrsEntries(m, &CSR_rowptr[0], &CSR_colind[0], &CSR_vals[0]);
00672 
00673   return 0;
00674 }
00675 
00676 
00677 
00678 /*****************************************************************************/
00679 /*****************************************************************************/
00680 /*****************************************************************************/
00681 //kernel method for computing the local portion of C = A*B
00682 template<typename int_type>
00683   int mult_A_B_general(const Epetra_CrsMatrix & A,
00684            CrsMatrixStruct & Aview,
00685            const Epetra_CrsMatrix & B,
00686            CrsMatrixStruct& Bview,
00687            Epetra_CrsMatrix& C,
00688            bool call_FillComplete_on_result)
00689 {
00690   int C_firstCol = Bview.colMap->MinLID();
00691   int C_lastCol = Bview.colMap->MaxLID();
00692 
00693   int C_firstCol_import = 0;
00694   int C_lastCol_import = -1;
00695 
00696   int_type* bcols = 0;
00697   Bview.colMap->MyGlobalElementsPtr(bcols);
00698   int_type* bcols_import = NULL;
00699   if (Bview.importMatrix != NULL) {
00700     C_firstCol_import = Bview.importMatrix->ColMap_.MinLID();
00701     C_lastCol_import = Bview.importMatrix->ColMap_.MaxLID();
00702     Bview.importMatrix->ColMap_.MyGlobalElementsPtr(bcols_import);
00703   }
00704 
00705   int C_numCols = C_lastCol - C_firstCol + 1;
00706   int C_numCols_import = C_lastCol_import - C_firstCol_import + 1;
00707 
00708   if (C_numCols_import > C_numCols) C_numCols = C_numCols_import;
00709 
00710   // Allocate workspace memory
00711   double* dwork = new double[C_numCols];
00712   int_type* iwork = new int_type[C_numCols];
00713   int_type *c_cols=iwork;
00714   double *c_vals=dwork;
00715   int *c_index=new int[C_numCols];
00716 
00717   int  i, j, k;
00718   bool C_filled=C.Filled();
00719 
00720   // DataPointers for A
00721   int *Arowptr, *Acolind;
00722   double *Avals;
00723   EPETRA_CHK_ERR(A.ExtractCrsDataPointers(Arowptr,Acolind,Avals));
00724 
00725   //To form C = A*B we're going to execute this expression:
00726   //
00727   // C(i,j) = sum_k( A(i,k)*B(k,j) )
00728   //
00729   //Our goal, of course, is to navigate the data in A and B once, without
00730   //performing searches for column-indices, etc.
00731   
00732   // Mark indices as empty w/ -1
00733   for(k=0;k<C_numCols;k++) c_index[k]=-1;
00734 
00735   //loop over the rows of A.
00736   for(i=0; i<A.NumMyRows(); ++i) {
00737 
00738     int_type global_row = (int_type) Aview.rowMap->GID64(i);
00739 
00740     //loop across the i-th row of A and for each corresponding row
00741     //in B, loop across colums and accumulate product
00742     //A(i,k)*B(k,j) into our partial sum quantities C_row_i. In other words,
00743     //as we stride across B(k,:) we're calculating updates for row i of the
00744     //result matrix C.
00745 
00746     /* Outline of the revised, ML-inspired algorithm
00747  
00748     C_{i,j} = \sum_k A_{i,k} B_{k,j}
00749 
00750     This algorithm uses a "middle product" formulation, with the loop ordering of 
00751     i, k, j.  This means we compute a row of C at a time, but compute partial sums of 
00752     each entry in row i until we finish the k loop.
00753 
00754     This algorithm also has a few twists worth documenting.  
00755 
00756     1) The first major twist involves the c_index, c_cols and c_vals arrays.  The arrays c_cols 
00757     and c_vals store the *local* column index and values accumulator respectively.  These 
00758     arrays are allocated to a size equal to the max number of local columns in C, namely C_numcols.
00759     The value c_current tells us how many non-zeros we currently have in this row.
00760     
00761     So how do we take a LCID and find the right accumulator?  This is where the c_index array
00762     comes in.  At the start (and stop) and the i loop, c_index is filled with -1's.  Now 
00763     whenever we find a LCID in the k loop, we first loop at c_index[lcid].  If this value is
00764     -1 we haven't seen this entry yet.  In which case we add the appropriate stuff to c_cols
00765     and c_vals and then set c_index[lcid] to the location of the accumulator (c_current before
00766     we increment it).  If the value is NOT -1, this tells us the location in the c_vals/c_cols
00767     arrays (namely c_index[lcid]) where our accumulator lives.
00768 
00769     This trick works because we're working with local ids.  We can then loop from 0 to c_current
00770     and reset c_index to -1's when we're done, only touching the arrays that have changed.
00771     While we're at it, we can switch to the global ids so we can call [Insert|SumInto]GlobalValues.
00772     Thus, the effect of this trick is to avoid passes over the index array.
00773 
00774     2) The second major twist involves handling the remote and local components of B separately.
00775     (ML doesn't need to do this, because its local ordering scheme is consistent between the local
00776     and imported components of B.)  Since they have different column maps, they have inconsistent 
00777     local column ids.  This means the "second twist" won't work as stated on both matrices at the 
00778     same time.  While this could be handled any number of ways, I have chosen to do the two parts 
00779     of B separately to make the code easier to read (and reduce the memory footprint of the MMM).
00780     */
00781 
00782     // Local matrix: Zero Current counts for matrix
00783     int c_current=0;    
00784 
00785     // Local matrix: Do the "middle product"
00786     for(k=Arowptr[i]; k<Arowptr[i+1]; ++k) {
00787       int Ak      = Acolind[k];
00788       double Aval = Avals[k];
00789       // We're skipping remote entries on this pass.
00790       if(Bview.remote[Ak] || Aval==0) continue;
00791       
00792       int* Bcol_inds = Bview.indices[Ak];
00793       double* Bvals_k = Bview.values[Ak];
00794 
00795       for(j=0; j<Bview.numEntriesPerRow[Ak]; ++j) {
00796   int col=Bcol_inds[j];
00797   if(c_index[col]<0){
00798     // We haven't seen this entry before; add it.  (In ML, on
00799     // the first pass, you haven't seen any of the entries
00800     // before, so they are added without the check.  Not sure
00801     // how much more efficient that would be; depends on branch
00802     // prediction.  We've favored code readability here.)
00803     c_cols[c_current]=col;        
00804     c_vals[c_current]=Aval*Bvals_k[j];
00805     c_index[col]=c_current;
00806     c_current++;
00807   }
00808   else{ 
00809     // We've already seen this entry; accumulate it.
00810     c_vals[c_index[col]]+=Aval*Bvals_k[j];      
00811   }
00812       }
00813     }    
00814     // Local matrix: Reset c_index and switch c_cols to GIDs
00815     for(k=0; k<c_current; k++){
00816       c_index[c_cols[k]]=-1;
00817       c_cols[k]=bcols[c_cols[k]]; // Switch from local to global IDs.     
00818     }
00819     // Local matrix: Insert.
00820     //
00821     // We should check global error results after the algorithm is
00822     // through.  It's probably safer just to let the algorithm run all
00823     // the way through before doing this, since otherwise we have to
00824     // remember to free all allocations carefully.
00825     int err = C_filled ?
00826       C.SumIntoGlobalValues(global_row,c_current,c_vals,c_cols)
00827       :
00828       C.InsertGlobalValues(global_row,c_current,c_vals,c_cols);   
00829 
00830     // Remote matrix: Zero current counts again for matrix
00831     c_current=0;    
00832 
00833     // Remote matrix: Do the "middle product"
00834     for(k=Arowptr[i]; k<Arowptr[i+1]; ++k) {
00835       int Ak      = Acolind[k];
00836       double Aval = Avals[k];
00837       // We're skipping local entries on this pass.
00838       if(!Bview.remote[Ak] || Aval==0) continue;
00839       
00840       int* Bcol_inds = Bview.indices[Ak];
00841       double* Bvals_k = Bview.values[Ak];
00842 
00843       for(j=0; j<Bview.numEntriesPerRow[Ak]; ++j) {
00844   int col=Bcol_inds[j];
00845   if(c_index[col]<0){
00846     c_cols[c_current]=col;        
00847     c_vals[c_current]=Aval*Bvals_k[j];
00848     c_index[col]=c_current;
00849     c_current++;
00850   }
00851   else{
00852     c_vals[c_index[col]]+=Aval*Bvals_k[j];      
00853   }
00854       }
00855     }    
00856     // Remote matrix: Reset c_index and switch c_cols to GIDs
00857     for(k=0; k<c_current; k++){
00858       c_index[c_cols[k]]=-1;
00859       c_cols[k]=bcols_import[c_cols[k]];      
00860     }
00861     // Remove matrix: Insert
00862     //
00863     // See above (on error handling).
00864     err = C_filled ?
00865       C.SumIntoGlobalValues(global_row,c_current,c_vals,c_cols)
00866       :
00867       C.InsertGlobalValues(global_row,c_current,c_vals,c_cols);    
00868   }
00869   
00870   // Since Multiply won't do this
00871   if(call_FillComplete_on_result)
00872     C.FillComplete(B.DomainMap(),A.RangeMap());
00873 
00874   delete [] dwork;
00875   delete [] iwork;
00876   delete [] c_index;
00877   return(0);
00878 }
00879 
00880 
00881 
00882 
00883 
00884 /*****************************************************************************/
00885 /*****************************************************************************/
00886 /*****************************************************************************/
00887 template<typename int_type>
00888 int MatrixMatrix::Tmult_A_B(const Epetra_CrsMatrix & A,
00889          CrsMatrixStruct & Aview,
00890          const Epetra_CrsMatrix & B,
00891          CrsMatrixStruct& Bview,
00892          Epetra_CrsMatrix& C,
00893          bool call_FillComplete_on_result){
00894 
00895   int i,rv;
00896   Epetra_Map* mapunion = 0;
00897   const Epetra_Map * colmap_B = &(B.ColMap());
00898   const Epetra_Map * colmap_C = &(C.ColMap());
00899 
00900   std::vector<int> Cremotepids;
00901   std::vector<int> Bcol2Ccol(B.ColMap().NumMyElements());
00902   std::vector<int> Bimportcol2Ccol;
00903   if(Bview.importMatrix) Bimportcol2Ccol.resize(Bview.importMatrix->ColMap_.NumMyElements());
00904 
00905 #ifdef ENABLE_MMM_TIMINGS
00906   using Teuchos::TimeMonitor;
00907   Teuchos::RCP<Teuchos::TimeMonitor> MM;
00908 #endif  
00909 
00910   // If the user doesn't want us to call FillComplete, use the general routine
00911   if(!call_FillComplete_on_result) {
00912 #ifdef ENABLE_MMM_TIMINGS
00913     MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM General Multiply")));
00914 #endif
00915     rv=mult_A_B_general<int_type>(A,Aview,B,Bview,C,false);
00916     return rv;
00917   }
00918 
00919   // Is this a "clean" matrix
00920   bool NewFlag=!C.IndicesAreLocal() && !C.IndicesAreGlobal();
00921 
00922   // Does ExtractCrsDataPointers work?
00923   int *C_rowptr, *C_colind;
00924   double * C_vals;
00925   C.ExtractCrsDataPointers(C_rowptr,C_colind,C_vals);
00926   bool ExtractFailFlag=!C_rowptr || !C_colind || !C_vals;
00927 
00928   // It's a new matrix that hasn't been fill-completed, use the general routine
00929   if(!NewFlag && ExtractFailFlag){
00930 #ifdef ENABLE_MMM_TIMINGS
00931     MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM General Multiply")));
00932 #endif
00933     rv=mult_A_B_general<int_type>(A,Aview,B,Bview,C,call_FillComplete_on_result);
00934     return rv;
00935   }
00936 
00937 
00938 #ifdef ENABLE_MMM_TIMINGS
00939   if(NewFlag) MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Newmatrix CMap")));
00940   else MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Reuse CMap")));
00941 #endif
00942 
00943   // If new, build & clobber a colmap for C
00944   if(NewFlag){
00945     if(Bview.importMatrix) {
00946       EPETRA_CHK_ERR( aztecoo_and_ml_compatible_map_union<int_type>(B,*Bview.importMatrix,mapunion,Cremotepids,Bcol2Ccol,Bimportcol2Ccol) );
00947       EPETRA_CHK_ERR( C.ReplaceColMap(*mapunion) );
00948     }
00949     else  {
00950       EPETRA_CHK_ERR( C.ReplaceColMap(B.ColMap()) );
00951       for(i=0;i<colmap_B->NumMyElements();i++) Bcol2Ccol[i]=i;
00952       
00953       // Copy B's remote list (if any)
00954       if(B.Importer()) 
00955   EPETRA_CHK_ERR( Epetra_Util::GetRemotePIDs(*B.Importer(),Cremotepids));
00956     }
00957   }
00958 
00959 #ifdef ENABLE_MMM_TIMINGS
00960   if(NewFlag)  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Newmatrix Lookups")));
00961   else MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM Reuse Lookups")));
00962 #endif
00963 
00964   // ********************************************
00965   // Setup Bcol2Ccol / Bimportcol2Ccol lookups
00966   // ********************************************
00967   // Note: If we ran the map_union, we have this information already
00968 
00969   if(!NewFlag) {
00970     if(colmap_B->SameAs(*colmap_C)){
00971       // Maps are the same: Use local IDs as the hash
00972       for(i=0;i<colmap_B->NumMyElements();i++)
00973   Bcol2Ccol[i]=i;       
00974     }
00975     else {
00976       // Maps are not the same:  Use the map's hash
00977       for(i=0;i<colmap_B->NumMyElements();i++){
00978   Bcol2Ccol[i]=colmap_C->LID((int_type) colmap_B->GID64(i));
00979   if(Bcol2Ccol[i]==-1) EPETRA_CHK_ERR(-11);
00980       }
00981     }
00982     
00983     if(Bview.importMatrix){
00984       Bimportcol2Ccol.resize(Bview.importMatrix->ColMap_.NumMyElements());
00985       for(i=0;i<Bview.importMatrix->ColMap_.NumMyElements();i++){
00986       Bimportcol2Ccol[i]=colmap_C->LID((int_type) Bview.importMatrix->ColMap_.GID64(i));
00987       if(Bimportcol2Ccol[i]==-1) EPETRA_CHK_ERR(-12);
00988       }
00989       
00990     }
00991   }
00992 #ifdef ENABLE_MMM_TIMINGS
00993   MM=Teuchos::null;
00994 #endif
00995 
00996   // Call the appropriate core routine
00997   if(NewFlag) {
00998     EPETRA_CHK_ERR(mult_A_B_newmatrix<int_type>(A,B,Bview,Bcol2Ccol,Bimportcol2Ccol,Cremotepids,C));
00999   }
01000   else {
01001     // This always has a real map
01002     EPETRA_CHK_ERR(mult_A_B_reuse<int_type>(A,B,Bview,Bcol2Ccol,Bimportcol2Ccol,C));
01003   }
01004 
01005   // Cleanup      
01006   delete mapunion;
01007   return 0;
01008 }
01009 
01010 
01011 /*****************************************************************************/
01012 /*****************************************************************************/
01013 /*****************************************************************************/
01014 int MatrixMatrix::mult_A_B(const Epetra_CrsMatrix & A,
01015          CrsMatrixStruct & Aview,
01016          const Epetra_CrsMatrix & B,
01017          CrsMatrixStruct& Bview,
01018          Epetra_CrsMatrix& C,
01019          bool call_FillComplete_on_result){
01020 
01021 #ifndef EPETRA_NO_32BIT_GLOBAL_INDICES
01022   if(A.RowMap().GlobalIndicesInt() && B.RowMap().GlobalIndicesInt()) {
01023   return Tmult_A_B<int>(A, Aview, B, Bview, C, call_FillComplete_on_result);
01024   }
01025   else
01026 #endif
01027 #ifndef EPETRA_NO_64BIT_GLOBAL_INDICES
01028   if(A.RowMap().GlobalIndicesLongLong() && B.RowMap().GlobalIndicesLongLong()) {
01029   return Tmult_A_B<long long>(A, Aview, B, Bview, C, call_FillComplete_on_result);
01030   }
01031   else
01032 #endif
01033     throw "EpetraExt::MatrixMatrix::mult_A_B: GlobalIndices type unknown";
01034 }
01035 
01036 
01037 /*****************************************************************************/
01038 /*****************************************************************************/
01039 /*****************************************************************************/
01040 template<typename int_type>
01041 int jacobi_A_B_reuse(double omega,
01042          const Epetra_Vector & Dinv,
01043          const Epetra_CrsMatrix & A,
01044          const Epetra_CrsMatrix & B,
01045          CrsMatrixStruct& Bview,
01046          std::vector<int> & Bcol2Ccol,
01047          std::vector<int> & Bimportcol2Ccol,
01048          Epetra_CrsMatrix& C){
01049 
01050 #ifdef ENABLE_MMM_TIMINGS
01051   using Teuchos::TimeMonitor;
01052   Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi Reuse SerialCore")));
01053 #endif
01054 
01055   // *****************************
01056   // Improved Parallel Gustavson in Local IDs
01057   // *****************************
01058   const Epetra_Map * colmap_C = &(C.ColMap());
01059 
01060   int m=A.NumMyRows();
01061   int n=colmap_C->NumMyElements();
01062   int i,j,k;
01063 
01064   // DataPointers for A
01065   int *Arowptr, *Acolind;
01066   double *Avals;
01067   EPETRA_CHK_ERR(A.ExtractCrsDataPointers(Arowptr,Acolind,Avals));
01068 
01069   // DataPointers for B, Bimport
01070   int *Browptr, *Bcolind;
01071   double *Bvals;
01072   EPETRA_CHK_ERR(B.ExtractCrsDataPointers(Browptr,Bcolind,Bvals));
01073 
01074   int *Irowptr=0, *Icolind=0;
01075   double *Ivals=0;
01076   if(Bview.importMatrix){
01077     Irowptr = &Bview.importMatrix->rowptr_[0];
01078     Icolind = &Bview.importMatrix->colind_[0];
01079     Ivals   = &Bview.importMatrix->vals_[0];
01080   }
01081 
01082   // Data pointer for Dinv
01083   const double *Dvals = Dinv.Values();
01084 
01085   // DataPointers for C
01086   int *CSR_rowptr, *CSR_colind;
01087   double *CSR_vals;
01088   EPETRA_CHK_ERR(C.ExtractCrsDataPointers(CSR_rowptr,CSR_colind,CSR_vals));
01089 
01090 
01091   // The status array will contain the index into colind where this dude was last deposited.
01092   // c_status[i] < CSR_ip - not in the row yet.
01093   // c_status[i] >= CSR_ip, this is the entry where you can find the data
01094   // We start with this filled with -1's indicating that there are no entries yet.
01095   std::vector<int> c_status(n, -1);
01096 
01097   // Classic csr assembly
01098   int CSR_alloc=CSR_rowptr[m] - CSR_rowptr[0];
01099   int CSR_ip=0,OLD_ip=0;
01100 
01101   // For each row of C
01102   for(i=0; i<m; i++){ 
01103     double Dval = Dvals[i];
01104 
01105     // Entries of B
01106     for(k=Browptr[i]; k<Browptr[i+1]; k++){
01107       //      int Bk      = Bcolind[k];
01108       double Bval = Bvals[k];
01109       if(Bval==0) continue;
01110       int Ck=Bcol2Ccol[Bcolind[k]];
01111       
01112       // Assume no repeated entries in B
01113       c_status[Ck]=CSR_ip;
01114       CSR_colind[CSR_ip]=Ck;
01115       CSR_vals[CSR_ip]= Bvals[k];
01116       CSR_ip++;
01117     }
01118 
01119     // Entries of -omega * Dinv * A * B          
01120     for(k=Arowptr[i]; k<Arowptr[i+1]; k++){
01121       int Ak=Acolind[k];
01122       double Aval = Avals[k];
01123       if(Aval==0) continue;
01124 
01125       if(Bview.targetMapToOrigRow[Ak] != -1){
01126   // Local matrix
01127   int Bk = Bview.targetMapToOrigRow[Ak];
01128   for(j=Browptr[Bk]; j<Browptr[Bk+1]; ++j) {
01129     int Cj=Bcol2Ccol[Bcolind[j]];
01130 
01131     if(c_status[Cj]<OLD_ip){
01132       // New entry
01133       if(CSR_ip >= CSR_alloc) EPETRA_CHK_ERR(-13);
01134       c_status[Cj]=CSR_ip;
01135       CSR_colind[CSR_ip]=Cj;
01136       CSR_vals[CSR_ip]= - omega * Dval * Aval * Bvals[j];
01137       CSR_ip++;
01138     }
01139     else
01140       CSR_vals[c_status[Cj]]-= omega * Dval * Aval * Bvals[j];
01141   }
01142       }
01143       else{
01144   // Remote matrix
01145   int Ik = Bview.targetMapToImportRow[Ak];
01146   for(j=Irowptr[Ik]; j<Irowptr[Ik+1]; ++j) {
01147     int Cj=Bimportcol2Ccol[Icolind[j]];
01148 
01149     if(c_status[Cj]<OLD_ip){
01150       // New entry
01151       if(CSR_ip >= CSR_alloc) EPETRA_CHK_ERR(-14);
01152       c_status[Cj]=CSR_ip;
01153       CSR_colind[CSR_ip]=Cj;
01154       CSR_vals[CSR_ip]= - omega * Dval * Aval * Ivals[j];
01155       CSR_ip++;
01156     }
01157     else
01158       CSR_vals[c_status[Cj]]-=omega * Dval * Aval * Ivals[j];
01159   }
01160       }
01161     }
01162     OLD_ip=CSR_ip;
01163   }
01164 
01165 #ifdef ENABLE_MMM_TIMINGS
01166   MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi Reuse Final Sort")));
01167 #endif
01168   // Sort the entries
01169   Epetra_Util::SortCrsEntries(m, &CSR_rowptr[0], &CSR_colind[0], &CSR_vals[0]);
01170 
01171   return 0;
01172 }
01173 
01174 
01175 /*****************************************************************************/
01176 /*****************************************************************************/
01177 /*****************************************************************************/
01178 template<typename int_type>
01179 int jacobi_A_B_newmatrix(double omega,
01180        const Epetra_Vector & Dinv,
01181        const Epetra_CrsMatrix & A,
01182        const Epetra_CrsMatrix & B,
01183        CrsMatrixStruct& Bview,
01184        std::vector<int> & Bcol2Ccol,
01185        std::vector<int> & Bimportcol2Ccol,
01186        std::vector<int>& Cremotepids,
01187        Epetra_CrsMatrix& C)
01188 {
01189 #ifdef ENABLE_MMM_TIMINGS
01190   using Teuchos::TimeMonitor;
01191   Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi NewMatrix SerialCore")));
01192 #endif
01193 
01194   // *****************************
01195   // Improved Parallel Gustavson in Local IDs
01196   // *****************************
01197   const Epetra_Map * colmap_C = &(C.ColMap());
01198   int NumMyDiagonals=0; // Counter to speed up ESFC
01199 
01200   int m=A.NumMyRows();
01201   int n=colmap_C->NumMyElements();
01202   int i,j,k;
01203 
01204   // DataPointers for A
01205   int *Arowptr, *Acolind;
01206   double *Avals;
01207   EPETRA_CHK_ERR(A.ExtractCrsDataPointers(Arowptr,Acolind,Avals));
01208 
01209   // DataPointers for B, Bimport
01210   int *Browptr, *Bcolind;
01211   double *Bvals;
01212   EPETRA_CHK_ERR(B.ExtractCrsDataPointers(Browptr,Bcolind,Bvals));
01213 
01214   // Data pointer for Dinv
01215   const double *Dvals = Dinv.Values();
01216 
01217   int *Irowptr=0, *Icolind=0;
01218   double *Ivals=0;
01219   if(Bview.importMatrix){
01220     Irowptr = &Bview.importMatrix->rowptr_[0];
01221     Icolind = (Bview.importMatrix->colind_.size()>0)?(&Bview.importMatrix->colind_[0]):0;
01222     Ivals   = (Bview.importMatrix->vals_.size()>0)?(&Bview.importMatrix->vals_[0]):0;
01223   }
01224 
01225   // MemorySetup: If somebody else is sharing this C's graphdata, make a new one.
01226   // This is needed because I'm about to walk all over the CrsGrapData...
01227   C.ExpertMakeUniqueCrsGraphData();
01228 
01229   // The status array will contain the index into colind where this entry was last deposited.
01230   // c_status[i] < CSR_ip - not in the row yet.
01231   // c_status[i] >= CSR_ip, this is the entry where you can find the data
01232   // We start with this filled with -1's indicating that there are no entries yet.
01233   std::vector<int> c_status(n, -1);
01234 
01235   // Classic csr assembly (low memory edition)
01236   int CSR_alloc=C_estimate_nnz(A,B);
01237   if(CSR_alloc < B.NumMyNonzeros()) CSR_alloc = B.NumMyNonzeros(); // update for Jacobi
01238   int CSR_ip=0,OLD_ip=0;
01239   Epetra_IntSerialDenseVector & CSR_rowptr = C.ExpertExtractIndexOffset();
01240   Epetra_IntSerialDenseVector & CSR_colind = C.ExpertExtractIndices();  
01241   double *&                     CSR_vals   = C.ExpertExtractValues();
01242 
01243   CSR_rowptr.Resize(m+1);
01244   CSR_colind.Resize(CSR_alloc);
01245   resize_doubles(0,CSR_alloc,CSR_vals);
01246 
01247   // Static Profile stuff
01248   std::vector<int> NumEntriesPerRow(m);
01249 
01250   // For each row of C
01251   for(i=0; i<m; i++){
01252     bool found_diagonal=false;
01253     CSR_rowptr[i]=CSR_ip;
01254     double Dval = Dvals[i];
01255 
01256     // Entries of B
01257     for(k=Browptr[i]; k<Browptr[i+1]; k++){
01258       //int Bk      = Bcolind[k];
01259       double Bval = Bvals[k];
01260       if(Bval==0) continue;
01261       int Ck=Bcol2Ccol[Bcolind[k]];
01262       
01263       // Assume no repeated entries in B
01264       c_status[Ck]=CSR_ip;
01265       CSR_colind[CSR_ip]=Ck;
01266       CSR_vals[CSR_ip]= Bvals[k];
01267       CSR_ip++;
01268     }
01269 
01270     // Entries of -omega * Dinv * A * B
01271     for(k=Arowptr[i]; k<Arowptr[i+1]; k++){
01272       int Ak      = Acolind[k];
01273       double Aval = Avals[k];
01274       if(Aval==0) continue;
01275       
01276       if(Bview.targetMapToOrigRow[Ak] != -1){
01277   // Local matrix
01278   int Bk = Bview.targetMapToOrigRow[Ak];
01279   for(j=Browptr[Bk]; j<Browptr[Bk+1]; ++j) {
01280     int Cj=Bcol2Ccol[Bcolind[j]];
01281 
01282     if(Cj==i && !found_diagonal) {found_diagonal=true; NumMyDiagonals++;}
01283 
01284     if(c_status[Cj]<OLD_ip){
01285       // New entry
01286       c_status[Cj]=CSR_ip;
01287       CSR_colind[CSR_ip]=Cj;
01288       CSR_vals[CSR_ip]= - omega * Dval* Aval * Bvals[j];
01289       CSR_ip++;
01290     }
01291     else
01292       CSR_vals[c_status[Cj]]-= omega * Dval * Aval * Bvals[j];
01293   }
01294       }
01295       else{
01296   // Remote matrix
01297   int Ik = Bview.targetMapToImportRow[Ak];
01298   for(j=Irowptr[Ik]; j<Irowptr[Ik+1]; ++j) {
01299     int Cj=Bimportcol2Ccol[Icolind[j]];
01300     
01301     if(Cj==i && !found_diagonal) {found_diagonal=true; NumMyDiagonals++;}
01302 
01303     if(c_status[Cj]<OLD_ip){
01304       // New entry
01305       c_status[Cj]=CSR_ip;
01306       CSR_colind[CSR_ip]=Cj;
01307       CSR_vals[CSR_ip]= - omega * Dval * Aval * Ivals[j];
01308       CSR_ip++;
01309     }
01310     else
01311       CSR_vals[c_status[Cj]]-= omega * Dval * Aval * Ivals[j];
01312   }
01313       }
01314     }
01315     NumEntriesPerRow[i]=CSR_ip-CSR_rowptr[i];
01316 
01317     // Resize for next pass if needed
01318     if(CSR_ip + n > CSR_alloc){
01319       resize_doubles(CSR_alloc,2*CSR_alloc,CSR_vals);
01320       CSR_alloc*=2;
01321       CSR_colind.Resize(CSR_alloc);
01322     }
01323     OLD_ip=CSR_ip;
01324   }
01325 
01326   CSR_rowptr[m]=CSR_ip;
01327 
01328 #ifdef ENABLE_MMM_TIMINGS
01329   MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi NewMatrix Final Sort")));
01330 #endif
01331 
01332   // Sort the entries
01333   Epetra_Util::SortCrsEntries(m, &CSR_rowptr[0], &CSR_colind[0], &CSR_vals[0]);
01334 
01335 #ifdef ENABLE_MMM_TIMINGS
01336   MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi NewMatrix Fast IE")));
01337 #endif
01338 
01339   // Do a fast build of C's importer
01340   Epetra_Import * Cimport=0; 
01341   int *RemotePIDs = Cremotepids.size()?&Cremotepids[0]:0;
01342   int NumExports=0;
01343   int *ExportLIDs=0, *ExportPIDs=0;
01344   if(Bview.importMatrix) { 
01345     NumExports = Bview.importMatrix->ExportLIDs_.size();
01346     ExportLIDs = Bview.importMatrix->ExportLIDs_.size()?&Bview.importMatrix->ExportLIDs_[0]:0;
01347     ExportPIDs = Bview.importMatrix->ExportPIDs_.size()?&Bview.importMatrix->ExportPIDs_[0]:0;
01348   }
01349   else if(B.Importer()) {
01350     // Grab the exports from B proper
01351     NumExports = B.Importer()->NumExportIDs();
01352     ExportLIDs = B.Importer()->ExportLIDs();
01353     ExportPIDs = B.Importer()->ExportPIDs();    
01354   }
01355 
01356   if(!C.ColMap().SameAs(B.DomainMap()))
01357     Cimport = new Epetra_Import(C.ColMap(),B.DomainMap(),Cremotepids.size(),RemotePIDs,NumExports,ExportLIDs,ExportPIDs);
01358 
01359 #ifdef ENABLE_MMM_TIMINGS
01360   MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi NewMatrix ESFC")));
01361 #endif
01362 
01363   // Update the CrsGraphData
01364   C.ExpertStaticFillComplete(B.DomainMap(),A.RangeMap(),Cimport,0,NumMyDiagonals);
01365 
01366   return 0;
01367 }
01368 
01369 
01370 
01371 /*****************************************************************************/
01372 /*****************************************************************************/
01373 /*****************************************************************************/
01374 template<typename int_type>
01375 int MatrixMatrix::Tjacobi_A_B(double omega,
01376            const Epetra_Vector & Dinv,
01377            const Epetra_CrsMatrix & A,
01378            CrsMatrixStruct & Aview,
01379            const Epetra_CrsMatrix & B,
01380            CrsMatrixStruct& Bview,
01381            Epetra_CrsMatrix& C,
01382             bool call_FillComplete_on_result){
01383   int i,rv;
01384   Epetra_Map* mapunion = 0;
01385   const Epetra_Map * colmap_B = &(B.ColMap());
01386   const Epetra_Map * colmap_C = &(C.ColMap());
01387 
01388   std::vector<int> Cremotepids;
01389   std::vector<int> Bcol2Ccol(B.ColMap().NumMyElements());
01390   std::vector<int> Bimportcol2Ccol;
01391   if(Bview.importMatrix) Bimportcol2Ccol.resize(Bview.importMatrix->ColMap_.NumMyElements());
01392 
01393 #ifdef ENABLE_MMM_TIMINGS
01394   using Teuchos::TimeMonitor;
01395   Teuchos::RCP<Teuchos::TimeMonitor> MM;
01396 #endif  
01397 
01398   // If the user doesn't want us to call FillComplete, use the general routine
01399   if(!call_FillComplete_on_result) {
01400 #ifdef ENABLE_MMM_TIMINGS
01401     MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi General Multiply")));
01402 #endif
01403     throw std::runtime_error("jacobi_A_B_general not implemented");
01404     //    rv=mult_A_B_general<int_type>(A,Aview,B,Bview,C,false);
01405     return rv;
01406   }
01407 
01408   // Is this a "clean" matrix
01409   bool NewFlag=!C.IndicesAreLocal() && !C.IndicesAreGlobal();
01410 
01411   // Does ExtractCrsDataPointers work?
01412   int *C_rowptr, *C_colind;
01413   double * C_vals;
01414   C.ExtractCrsDataPointers(C_rowptr,C_colind,C_vals);
01415   bool ExtractFailFlag=!C_rowptr || !C_colind || !C_vals;
01416 
01417   // It's a new matrix that hasn't been fill-completed, use the general routine
01418   if(!NewFlag && ExtractFailFlag){
01419 #ifdef ENABLE_MMM_TIMINGS
01420     MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi General Multiply")));
01421 #endif
01422     throw std::runtime_error("jacobi_A_B_general not implemented");
01423     //    rv=mult_A_B_general<int_type>(A,Aview,B,Bview,C,call_FillComplete_on_result);
01424     return rv;
01425   }
01426 
01427 #ifdef ENABLE_MMM_TIMINGS
01428   if(NewFlag) MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi Newmatrix CMap")));
01429   else MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi Reuse CMap")));
01430 #endif
01431 
01432   // If new, build & clobber a colmap for C
01433   if(NewFlag){
01434     if(Bview.importMatrix) {
01435       EPETRA_CHK_ERR( aztecoo_and_ml_compatible_map_union<int_type>(B,*Bview.importMatrix,mapunion,Cremotepids,Bcol2Ccol,Bimportcol2Ccol) );
01436       EPETRA_CHK_ERR( C.ReplaceColMap(*mapunion) );
01437     }
01438     else  {
01439       EPETRA_CHK_ERR( C.ReplaceColMap(B.ColMap()) );
01440       for(i=0;i<colmap_B->NumMyElements();i++) Bcol2Ccol[i]=i;
01441       
01442       // Copy B's remote list (if any)
01443       if(B.Importer()) 
01444   EPETRA_CHK_ERR( Epetra_Util::GetRemotePIDs(*B.Importer(),Cremotepids));
01445     }
01446   }
01447 
01448 #ifdef ENABLE_MMM_TIMINGS
01449   if(NewFlag) MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi Newmatrix Lookups")));
01450   else MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: Jacobi Reuse Lookups")));
01451 #endif
01452 
01453   // ********************************************
01454   // Setup Bcol2Ccol / Bimportcol2Ccol lookups
01455   // ********************************************
01456   // Note: If we ran the map_union, we have this information already
01457 
01458   if(!NewFlag) {
01459     if(colmap_B->SameAs(*colmap_C)){
01460       // Maps are the same: Use local IDs as the hash
01461       for(i=0;i<colmap_B->NumMyElements();i++)
01462   Bcol2Ccol[i]=i;       
01463     }
01464     else {
01465       // Maps are not the same:  Use the map's hash
01466       for(i=0;i<colmap_B->NumMyElements();i++){
01467   Bcol2Ccol[i]=colmap_C->LID((int_type) colmap_B->GID64(i));
01468   if(Bcol2Ccol[i]==-1) EPETRA_CHK_ERR(-11);
01469       }
01470     }
01471     
01472     if(Bview.importMatrix){
01473       Bimportcol2Ccol.resize(Bview.importMatrix->ColMap_.NumMyElements());
01474       for(i=0;i<Bview.importMatrix->ColMap_.NumMyElements();i++){
01475       Bimportcol2Ccol[i]=colmap_C->LID((int_type) Bview.importMatrix->ColMap_.GID64(i));
01476       if(Bimportcol2Ccol[i]==-1) EPETRA_CHK_ERR(-12);
01477       }
01478       
01479     }
01480   }
01481   
01482 
01483   // Call the appropriate core routine
01484   if(NewFlag) {
01485     EPETRA_CHK_ERR(jacobi_A_B_newmatrix<int_type>(omega,Dinv,A,B,Bview,Bcol2Ccol,Bimportcol2Ccol,Cremotepids,C));
01486   }
01487   else {
01488     // This always has a real map
01489     EPETRA_CHK_ERR(jacobi_A_B_reuse<int_type>(omega,Dinv,A,B,Bview,Bcol2Ccol,Bimportcol2Ccol,C));
01490   }
01491 
01492   // Cleanup      
01493   delete mapunion;
01494   return 0;
01495 }
01496 
01497 
01498 /*****************************************************************************/
01499 /*****************************************************************************/
01500 /*****************************************************************************/
01501 int MatrixMatrix::jacobi_A_B(double omega,
01502            const Epetra_Vector & Dinv,
01503            const Epetra_CrsMatrix & A,
01504            CrsMatrixStruct & Aview,
01505            const Epetra_CrsMatrix & B,
01506            CrsMatrixStruct& Bview,
01507            Epetra_CrsMatrix& C,
01508            bool call_FillComplete_on_result)
01509 {
01510 #ifndef EPETRA_NO_32BIT_GLOBAL_INDICES
01511   if(A.RowMap().GlobalIndicesInt() && B.RowMap().GlobalIndicesInt()) {
01512     return Tjacobi_A_B<int>(omega,Dinv,A,Aview,B,Bview,C,call_FillComplete_on_result);
01513   }
01514   else
01515 #endif
01516 #ifndef EPETRA_NO_64BIT_GLOBAL_INDICES
01517     if(A.RowMap().GlobalIndicesLongLong() && B.RowMap().GlobalIndicesLongLong()) {
01518       return Tjacobi_A_B<long long>(omega,Dinv,A,Aview,B,Bview,C,call_FillComplete_on_result);
01519     }
01520     else
01521 #endif
01522     throw "EpetraExt::MatrixMatrix::jacobi_A_B: GlobalIndices type unknown";
01523 }
01524 
01525 
01526 
01527 /*****************************************************************************/
01528 /*****************************************************************************/
01529 /*****************************************************************************/
01530 template<typename int_type>
01531 int MatrixMatrix::Tmult_AT_B_newmatrix(const CrsMatrixStruct & Atransview, const CrsMatrixStruct & Bview, Epetra_CrsMatrix & C) {
01532   using Teuchos::RCP;
01533   using Teuchos::rcp;
01534 
01535 #ifdef ENABLE_MMM_TIMINGS
01536   using Teuchos::TimeMonitor;
01537   Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM-T Transpose")));
01538 #endif
01539 
01540 
01541   /*************************************************************/
01542   /* 2/3) Call mult_A_B_newmatrix w/ fillComplete              */
01543   /*************************************************************/
01544 #ifdef ENABLE_MMM_TIMINGS
01545   MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM-T AB-core")));
01546 #endif
01547   RCP<Epetra_CrsMatrix> Ctemp;
01548 
01549   // If Atrans has no Exporter, we can use C instead of having to create a temp matrix
01550   bool needs_final_export = Atransview.origMatrix->Exporter() != 0;
01551   if(needs_final_export) 
01552     Ctemp = rcp(new Epetra_CrsMatrix(Copy,Atransview.origMatrix->RowMap(),Bview.origMatrix->ColMap(),0));
01553   else {
01554     EPETRA_CHK_ERR( C.ReplaceColMap(Bview.origMatrix->ColMap()) );
01555     Ctemp = rcp(&C,false);// don't allow deallocation
01556   }
01557   
01558   // Multiply
01559   std::vector<int> Bcol2Ccol(Bview.origMatrix->NumMyCols());
01560   for(int i=0; i<Bview.origMatrix->NumMyCols(); i++) 
01561     Bcol2Ccol[i]=i;
01562   std::vector<int> Bimportcol2Ccol,Cremotepids;
01563   if(Bview.origMatrix->Importer()) 
01564     EPETRA_CHK_ERR( Epetra_Util::GetRemotePIDs(*Bview.origMatrix->Importer(),Cremotepids));
01565 
01566   EPETRA_CHK_ERR(mult_A_B_newmatrix<int_type>(*Atransview.origMatrix,*Bview.origMatrix,Bview,
01567                 Bcol2Ccol,Bimportcol2Ccol,Cremotepids,
01568                 *Ctemp));
01569 
01570   /*************************************************************/
01571   /* 4) ExportAndFillComplete matrix (if needed)               */
01572   /*************************************************************/
01573 #ifdef ENABLE_MMM_TIMINGS
01574   MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer("EpetraExt: MMM-T ESFC")));
01575 #endif
01576 
01577   if(needs_final_export) 
01578     C.FusedExport(*Ctemp,*Ctemp->Exporter(),&Bview.origMatrix->DomainMap(),&Atransview.origMatrix->RangeMap(),false);
01579 
01580   return 0;
01581 }
01582 
01583 
01584 
01585 /*****************************************************************************/
01586 /*****************************************************************************/
01587 /*****************************************************************************/
01588 int MatrixMatrix::mult_AT_B_newmatrix(const CrsMatrixStruct & Atransview, const CrsMatrixStruct & Bview, Epetra_CrsMatrix & C) {
01589 
01590 #ifndef EPETRA_NO_32BIT_GLOBAL_INDICES
01591   if(Atransview.origMatrix->RowMap().GlobalIndicesInt() && Bview.origMatrix->RowMap().GlobalIndicesInt()) {
01592     return Tmult_AT_B_newmatrix<int>(Atransview,Bview,C);
01593   }
01594   else
01595 #endif
01596 #ifndef EPETRA_NO_64BIT_GLOBAL_INDICES
01597   if(Atransview.origMatrix->RowMap().GlobalIndicesLongLong() && Bview.origMatrix->RowMap().GlobalIndicesLongLong()) {
01598     return Tmult_AT_B_newmatrix<long long>(Atransview,Bview,C);
01599   }
01600   else
01601 #endif
01602     throw "EpetraExt::MatrixMatrix::mult_AT_B_newmatrix: GlobalIndices type unknown";
01603 }
01604 
01605 
01606 
01607 }//namespace EpetraExt
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines