IFPACK Development
Ifpack_SORa.cpp
00001 #include "Ifpack_SORa.h"
00002 #include "Ifpack.h"
00003 #include "Ifpack_Utils.h"
00004 #include "Teuchos_ParameterList.hpp"
00005 #include "Teuchos_RefCountPtr.hpp"
00006 #include "Epetra_Import.h"
00007 #include "Epetra_Export.h"
00008 #include "Epetra_IntVector.h"
00009 
00010 using Teuchos::RefCountPtr;
00011 using Teuchos::rcp;
00012 
00013 
00014 
00015 #ifdef HAVE_IFPACK_EPETRAEXT
00016 #include "EpetraExt_MatrixMatrix.h"
00017 #include "EpetraExt_RowMatrixOut.h"
00018 #include "EpetraExt_MultiVectorOut.h"
00019 #include "EpetraExt_Transpose_RowMatrix.h"
00020 
00021 
00022 #define ABS(x) ((x)>=0?(x):-(x))
00023 #define MIN(x,y) ((x)<(y)?(x):(y))
00024 #define MAX(x,y) ((x)>(y)?(x):(y))
00025 
00026 // Useful functions horked from ML
00027 int* FindLocalDiricheltRowsFromOnesAndZeros(const Epetra_CrsMatrix & Matrix, int &numBCRows);
00028 void Apply_BCsToMatrixRowsAndColumns(const int *dirichletRows, int numBCRows,const Epetra_IntVector &dirichletColumns,const Epetra_CrsMatrix & Matrix);
00029 Epetra_IntVector * FindLocalDirichletColumnsFromRows(const int *dirichletRows, int numBCRows,const Epetra_CrsMatrix & Matrix);
00030 
00031 Ifpack_SORa::Ifpack_SORa(Epetra_RowMatrix* A):
00032   IsInitialized_(false),
00033   IsComputed_(false),
00034   Label_(),
00035   Alpha_(1.5),
00036   Gamma_(1.0),
00037   NumSweeps_(1),
00038   IsParallel_(false),
00039   HaveOAZBoundaries_(false),
00040   UseInterprocDamping_(false),
00041   UseGlobalDamping_(false),
00042   LambdaMax_(-1.0),
00043   Time_(A->Comm())
00044 {
00045   Epetra_CrsMatrix *Acrs=dynamic_cast<Epetra_CrsMatrix*>(A);
00046   if(Acrs) Acrs_=rcp(Acrs,false);
00047   A_=rcp(A,false);
00048 }
00049 
00050 void Ifpack_SORa::Destroy(){
00051 }
00052 
00053 
00054 
00055 int Ifpack_SORa::Initialize(){
00056   Alpha_            = List_.get("sora: alpha", Alpha_);
00057   Gamma_            = List_.get("sora: gamma",Gamma_);
00058   NumSweeps_        = List_.get("sora: sweeps",NumSweeps_);
00059   HaveOAZBoundaries_= List_.get("sora: oaz boundaries", HaveOAZBoundaries_);
00060   UseInterprocDamping_ = List_.get("sora: use interproc damping",UseInterprocDamping_);
00061   UseGlobalDamping_ = List_.get("sora: use global damping",UseGlobalDamping_);
00062 
00063   if (A_->Comm().NumProc() != 1) IsParallel_ = true;
00064   else {
00065     IsParallel_ = false;    
00066     // Don't use interproc damping, for obvious reasons
00067     UseInterprocDamping_=false;
00068   }
00069 
00070   // Counters
00071   IsInitialized_=true;
00072   NumInitialize_++;
00073   return 0;
00074 }
00075 
00076 int Ifpack_SORa::SetParameters(Teuchos::ParameterList& parameterlist){
00077   List_=parameterlist;
00078   return 0;
00079 }
00080 
00081 
00082 int Ifpack_SORa::Compute(){
00083   if(!IsInitialized_) Initialize();
00084   Epetra_Map *RowMap=const_cast<Epetra_Map*>(&A_->RowMatrixRowMap());
00085   Epetra_Vector Adiag(*RowMap);
00086   Epetra_CrsMatrix *Askew2=0, *Aherm2=0,*W=0;
00087   int *rowptr_s,*colind_s,*rowptr_h,*colind_h;
00088   double *vals_s,*vals_h;
00089   bool RowMatrixMode=(Acrs_==Teuchos::null);
00090 
00091   // Label
00092   sprintf(Label_, "IFPACK SORa (alpha=%5.2f gamma=%5.2f)",Alpha_,Gamma_); 
00093 
00094   if(RowMatrixMode){
00095     if(!A_->Comm().MyPID()) cout<<"SORa: RowMatrix mode enabled"<<endl;
00096     // RowMatrix mode, build Acrs_ the hard way.
00097     Epetra_RowMatrix *Arow=&*A_;
00098     Epetra_Map *ColMap=const_cast<Epetra_Map*>(&A_->RowMatrixColMap());
00099 
00100     int Nmax=A_->MaxNumEntries();
00101     int length;
00102     vector<int> indices(Nmax);
00103     vector<double> values(Nmax); 
00104     Epetra_CrsMatrix *Acrs=new Epetra_CrsMatrix(Copy,*RowMap,Nmax);
00105 
00106     for(int i=0;i<Arow->NumMyRows();i++){
00107       Arow->ExtractMyRowCopy(i,Nmax,length,&values[0],&indices[0]);
00108       for(int j=0;j<length;j++)
00109     indices[j]=ColMap->GID(indices[j]);
00110       Acrs->InsertGlobalValues(RowMap->GID(i),length,&values[0],&indices[0]);
00111     }
00112     Acrs->FillComplete(A_->OperatorDomainMap(),A_->OperatorRangeMap());
00113     Acrs_=rcp(Acrs,true);
00114   }
00115 
00116   // Create Askew2
00117   // Note: Here I let EpetraExt do the thinking for me.  Since it gets all the maps correct for the E + F^T stencil.
00118   // There are probably more efficient ways to do this but this method has the bonus of allowing code reuse.
00119   IFPACK_CHK_ERR(EpetraExt::MatrixMatrix::Add(*Acrs_,false,1,*Acrs_,true,-1,Askew2));
00120   Askew2->FillComplete();
00121   
00122   // Create Aherm2
00123   IFPACK_CHK_ERR(EpetraExt::MatrixMatrix::Add(*Acrs_,false,1,*Acrs_,true,1,Aherm2));
00124   Aherm2->FillComplete();
00125 
00126   int nnz2=Askew2->NumMyNonzeros();
00127   int N=Askew2->NumMyRows();
00128 
00129 
00130   // Grab pointers
00131   IFPACK_CHK_ERR(Askew2->ExtractCrsDataPointers(rowptr_s,colind_s,vals_s));
00132   IFPACK_CHK_ERR(Aherm2->ExtractCrsDataPointers(rowptr_h,colind_h,vals_h));
00133 
00134   // Sanity checking: Make sure the sparsity patterns are the same
00135 #define SANITY_CHECK
00136 #ifdef SANITY_CHECK
00137   for(int i=0;i<N;i++)
00138     if(rowptr_s[i]!=rowptr_h[i]) IFPACK_CHK_ERR(-2);
00139   for(int i=0;i<nnz2;i++)
00140     if(colind_s[i]!=colind_h[i]) IFPACK_CHK_ERR(-3);
00141 #endif
00142 
00143   // Dirichlet Detection & Nuking of Aherm2 and Askew2
00144   // Note: Relies on Aherm2/Askew2 having identical sparsity patterns (see sanity check above)
00145   if(HaveOAZBoundaries_){ 
00146     int numBCRows;
00147     int* dirRows=FindLocalDiricheltRowsFromOnesAndZeros(*Acrs_,numBCRows);
00148     Epetra_IntVector* dirCols=FindLocalDirichletColumnsFromRows(dirRows,numBCRows,*Aherm2);
00149     Apply_BCsToMatrixRowsAndColumns(dirRows,numBCRows,*dirCols,*Aherm2);
00150     Apply_BCsToMatrixRowsAndColumns(dirRows,numBCRows,*dirCols,*Askew2);
00151     delete [] dirRows;
00152     delete dirCols;
00153   }
00154 
00155   // Grab diagonal of A
00156   A_->ExtractDiagonalCopy(Adiag);
00157 
00158   // Allocate the diagonal for W
00159   Epetra_Vector *Wdiag = new Epetra_Vector(*RowMap);
00160 
00161   // Build the W matrix (lower triangle only)
00162   // Note: Relies on EpetraExt giving me identical sparsity patterns for both Askew2 and Aherm2 (see sanity check above)
00163   int maxentries=Askew2->MaxNumEntries();
00164   int* gids=new int [maxentries];
00165   double* newvals=new double[maxentries];
00166   W=new Epetra_CrsMatrix(Copy,*RowMap,0);
00167   for(int i=0;i<N;i++){
00168     // Build the - (1+alpha)/2 E - (1-alpha)/2 F part of the W matrix
00169     int rowgid=Acrs_->GRID(i);
00170     double c_data=0.0;
00171     double ipdamp=0.0;
00172     int idx=0;
00173 
00174     for(int j=rowptr_s[i];j<rowptr_s[i+1];j++){      
00175       int colgid=Askew2->GCID(colind_s[j]);
00176       c_data+=fabs(vals_s[j]);
00177       if(rowgid>colgid){
00178     // Rely on the fact that off-diagonal entries are always numbered last, dropping the entry entirely.
00179     if(colind_s[j] < N) {       
00180       gids[idx]=colgid;
00181       newvals[idx]=vals_h[j]/2 + Alpha_ * vals_s[j]/2;
00182       idx++;
00183     }
00184     else{
00185       ipdamp+=fabs(vals_h[j]/2 + Alpha_ * vals_s[j]/2);
00186     }
00187       }   
00188     }
00189     if(idx>0)
00190       IFPACK_CHK_ERR(W->InsertGlobalValues(rowgid,idx,newvals,gids));
00191 
00192     // Do the diagonal
00193     double w_val= c_data*Alpha_*Gamma_/4 + Adiag[Acrs_->LRID(rowgid)];
00194     if(UseInterprocDamping_) w_val+=ipdamp;
00195 
00196     W->InsertGlobalValues(rowgid,1,&w_val,&rowgid);
00197     IFPACK_CHK_ERR(Wdiag->ReplaceGlobalValues(1,&w_val,&rowgid));
00198   }
00199   W->FillComplete(A_->OperatorDomainMap(),A_->OperatorRangeMap());      
00200   W_=rcp(W);
00201   Wdiag_=rcp(Wdiag);
00202 
00203   // Mark as computed
00204   IsComputed_=true;
00205 
00206   // Global damping, if wanted
00207   if(UseGlobalDamping_) {
00208     PowerMethod(10,LambdaMax_);
00209     if(!A_->Comm().MyPID()) printf("SORa: Global damping parameter = %6.4e (lmax=%6.4e)\n",GetOmega(),LambdaMax_);
00210   }
00211 
00212   // Cleanup
00213   delete [] gids;
00214   delete [] newvals;
00215   delete Aherm2;
00216   delete Askew2;
00217   if(RowMatrixMode) {
00218     Acrs_=Teuchos::null;
00219   }
00220 
00221   // Counters
00222   NumCompute_++;
00223   ComputeTime_ += Time_.ElapsedTime();
00224   return 0;
00225 }
00226 
00227 
00228 
00229 int Ifpack_SORa::ApplyInverse(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const{  
00230   if(!IsComputed_) return -1;
00231   Time_.ResetStartTime();
00232   bool initial_guess_is_zero=false;  
00233   int NumMyRows=W_->NumMyRows();
00234   int NumVectors = X.NumVectors();
00235   Epetra_MultiVector Temp(A_->RowMatrixRowMap(),NumVectors);
00236 
00237   double omega=GetOmega();
00238 
00239   // need to create an auxiliary vector, Xcopy
00240   Teuchos::RefCountPtr<const Epetra_MultiVector> Xcopy;
00241   if (X.Pointers()[0] == Y.Pointers()[0]){
00242     Xcopy = Teuchos::rcp( new Epetra_MultiVector(X) );
00243     // Since the user didn't give us anything better, our initial guess is zero.
00244     Y.Scale(0.0);
00245     initial_guess_is_zero=true;
00246   }
00247   else
00248     Xcopy = Teuchos::rcp( &X, false ); 
00249 
00250   Teuchos::RefCountPtr< Epetra_MultiVector > T2;
00251   // Note: Assuming that the matrix has an importer.  Ifpack_PointRelaxation doesn't do this, but given that 
00252   // I have a CrsMatrix, I'm probably OK.
00253   // Note: This is the lazy man's version sacrificing a few extra flops for avoiding if statements to determine 
00254   // if things are on or off processor.
00255   // Note: T2 must be zero'd out
00256   if (IsParallel_ && W_->Importer())  T2 = Teuchos::rcp( new Epetra_MultiVector(W_->Importer()->TargetMap(),NumVectors,true));
00257   else T2 = Teuchos::rcp( new Epetra_MultiVector(A_->RowMatrixRowMap(),NumVectors,true));
00258 
00259   // Pointer grabs
00260   int* rowptr,*colind;
00261   double *values;
00262   double **t_ptr,** y_ptr, ** t2_ptr, **x_ptr,*d_ptr;
00263   T2->ExtractView(&t2_ptr);
00264   Y.ExtractView(&y_ptr);
00265   Temp.ExtractView(&t_ptr);
00266   Xcopy->ExtractView(&x_ptr);
00267   Wdiag_->ExtractView(&d_ptr);
00268   IFPACK_CHK_ERR(W_->ExtractCrsDataPointers(rowptr,colind,values));
00269 
00270 
00271   for(int i=0; i<NumSweeps_; i++){
00272     // Calculate b-Ax 
00273     if(!initial_guess_is_zero  || i > 0) {      
00274       A_->Apply(Y,Temp);
00275       Temp.Update(1.0,*Xcopy,-1.0);
00276     }
00277     else 
00278       Temp.Update(1.0,*Xcopy,0.0);
00279 
00280     // Note: The off-processor entries of T2 never get touched (they're always zero) and the other entries are updated 
00281     // in this sweep before they are used, so we don't need to reset T2 to zero here.
00282 
00283     // Do backsolve & update
00284     // x = x  + W^{-1} (b - A x)
00285     for(int j=0; j<NumMyRows; j++){
00286       double diag=d_ptr[j];
00287       for (int m=0 ; m<NumVectors; m++) {
00288     double dtmp=0.0;
00289     // Note: Since the diagonal is in the matrix, we need to zero that entry of T2 here to make sure it doesn't contribute.
00290     t2_ptr[m][j]=0.0;
00291     for(int k=rowptr[j];k<rowptr[j+1];k++){
00292       dtmp+= values[k]*t2_ptr[m][colind[k]];
00293     }
00294     // Yes, we need to update both of these.
00295     t2_ptr[m][j] = (t_ptr[m][j]- dtmp)/diag;     
00296     y_ptr[m][j] += omega*t2_ptr[m][j];
00297       }
00298     }
00299   }
00300 
00301   // Counter update
00302   NumApplyInverse_++;
00303   ApplyInverseTime_ += Time_.ElapsedTime();
00304   return 0;
00305 }
00306 
00307 
00308 ostream& Ifpack_SORa::Print(ostream& os) const{
00309   os<<"Ifpack_SORa"<<endl;
00310   os<<" alpha = "<<Alpha_<<endl;
00311   os<<" gamma = "<<Gamma_<<endl;
00312   os<<endl;
00313   return os;
00314 }
00315 
00316 
00317 double Ifpack_SORa::Condest(const Ifpack_CondestType CT, 
00318                              const int MaxIters,
00319                              const double Tol,
00320                              Epetra_RowMatrix* Matrix_in){
00321   return -1.0;
00322 }
00323 
00324 
00325 
00326 
00327 
00328 // ============================================================================
00329 inline int* FindLocalDiricheltRowsFromOnesAndZeros(const Epetra_CrsMatrix & Matrix, int &numBCRows){
00330   int *dirichletRows = new int[Matrix.NumMyRows()];
00331   numBCRows = 0;
00332   for (int i=0; i<Matrix.NumMyRows(); i++) {
00333     int numEntries, *cols;
00334     double *vals;
00335     int ierr = Matrix.ExtractMyRowView(i,numEntries,vals,cols);
00336     if (ierr == 0) {
00337       int nz=0;
00338       for (int j=0; j<numEntries; j++) if (vals[j]!=0) nz++;      
00339       if (nz==1) dirichletRows[numBCRows++] = i;           
00340       // EXPERIMENTAL: Treat Special Inflow Boundaries as Dirichlet Boundaries
00341       if(nz==2) dirichletRows[numBCRows++] = i;        
00342     }/*end if*/
00343   }/*end for*/
00344   return dirichletRows;
00345 }/*end FindLocalDiricheltRowsFromOnesAndZeros*/
00346 
00347 
00348 // ====================================================================== 
00350 inline Epetra_IntVector * FindLocalDirichletColumnsFromRows(const int *dirichletRows, int numBCRows,const Epetra_CrsMatrix & Matrix){
00351   // Zero the columns corresponding to the Dirichlet rows.  Completely ignore the matrix maps.
00352   
00353   // Build rows
00354   Epetra_IntVector ZeroRows(Matrix.RowMap());
00355   Epetra_IntVector *ZeroCols=new Epetra_IntVector(Matrix.ColMap());
00356   ZeroRows.PutValue(0);  
00357   ZeroCols->PutValue(0);  
00358 
00359   // Easy Case: We're all on one processor
00360   if(Matrix.RowMap().SameAs(Matrix.ColMap())){
00361     for (int i=0; i < numBCRows; i++)
00362       (*ZeroCols)[dirichletRows[i]]=1;
00363     return ZeroCols;
00364   }
00365 
00366   // Flag the rows which are zero locally
00367   for (int i=0; i < numBCRows; i++)
00368     ZeroRows[dirichletRows[i]]=1;
00369 
00370   // Boundary exchange to move the data  
00371   if(Matrix.RowMap().SameAs(Matrix.DomainMap())){
00372     // Use A's Importer if we have one
00373     ZeroCols->Import(ZeroRows,*Matrix.Importer(),Insert);     
00374   }
00375   else{
00376     // Use custom importer if we don't
00377     Epetra_Import Importer(Matrix.ColMap(),Matrix.RowMap());
00378     ZeroCols->Import(ZeroRows,Importer,Insert);      
00379   }
00380   return ZeroCols;
00381 }
00382 
00383 
00384 // ====================================================================== 
00385 inline void Apply_BCsToMatrixRowsAndColumns(const int *dirichletRows, int numBCRows,const Epetra_IntVector &dirichletColumns,const Epetra_CrsMatrix & Matrix){
00386   /* This function zeros out rows & columns of Matrix.
00387      Comments: The graph of Matrix is unchanged.
00388   */
00389   // Nuke the rows
00390   for(int i=0;i<numBCRows;i++){
00391     int numEntries, *cols;
00392     double *vals;
00393     Matrix.ExtractMyRowView(dirichletRows[i],numEntries,vals,cols);
00394     for (int j=0; j<numEntries; j++) vals[j]=0.0; 
00395   }/*end for*/
00396 
00397   // Nuke the columns
00398   for (int i=0; i < Matrix.NumMyRows(); i++) {
00399     int numEntries;
00400     double *vals;
00401     int *cols;
00402     Matrix.ExtractMyRowView(i,numEntries,vals,cols);
00403     for (int j=0; j < numEntries; j++) {
00404       if (dirichletColumns[ cols[j] ] > 0)  vals[j] = 0.0;
00405     }/*end for*/
00406   }/*end for*/
00407 }/* end Apply_BCsToMatrixColumns */
00408 
00409 
00410 
00411 
00412 
00413 int Ifpack_SORa::
00414 PowerMethod(const int MaximumIterations,  double& lambda_max)
00415 {
00416   // this is a simple power method
00417   lambda_max = 0.0;
00418   double RQ_top, RQ_bottom, norm;
00419   Epetra_Vector x(A_->OperatorDomainMap());
00420   Epetra_Vector y(A_->OperatorRangeMap());
00421   Epetra_Vector z(A_->OperatorRangeMap());
00422   x.Random();
00423   x.Norm2(&norm);
00424   if (norm == 0.0) IFPACK_CHK_ERR(-1);
00425 
00426   x.Scale(1.0 / norm);
00427 
00428   // Only do 1 sweep per PM step, turn off global damping
00429   int NumSweepsBackup=NumSweeps_;
00430   bool UseGlobalDampingBackup=UseGlobalDamping_;
00431   NumSweeps_=1;UseGlobalDamping_=false;
00432 
00433   for (int iter = 0; iter < MaximumIterations; ++iter)
00434   {
00435     y.PutScalar(0.0);
00436     A_->Apply(x, z);
00437     ApplyInverse(z,y);
00438     y.Update(1.0,x,-1.0);
00439 
00440     // Compute the Rayleigh Quotient
00441     y.Dot(x, &RQ_top);
00442     x.Dot(x, &RQ_bottom);
00443     lambda_max = RQ_top / RQ_bottom;   
00444     y.Norm2(&norm);
00445     if (norm == 0.0) IFPACK_CHK_ERR(-1);
00446     x.Update(1.0 / norm, y, 0.0);
00447 
00448   }
00449 
00450   // Enable if we want to prevent over-relaxation
00451   //  lambda_max=MAX(lambda_max,1.0);
00452 
00453   // Reset parameters
00454   NumSweeps_=NumSweepsBackup;
00455   UseGlobalDamping_=UseGlobalDampingBackup;
00456 
00457   return(0);
00458 }
00459 
00460 #endif
 All Classes Files Functions Variables Enumerations Friends