Epetra_JadOperator.cpp

Go to the documentation of this file.
00001 
00002 //@HEADER
00003 // ************************************************************************
00004 // 
00005 //               Epetra: Linear Algebra Services Package 
00006 //                 Copyright (2001) Sandia Corporation
00007 // 
00008 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00009 // license for use of this work by or on behalf of the U.S. Government.
00010 // 
00011 // This library is free software; you can redistribute it and/or modify
00012 // it under the terms of the GNU Lesser General Public License as
00013 // published by the Free Software Foundation; either version 2.1 of the
00014 // License, or (at your option) any later version.
00015 //  
00016 // This library is distributed in the hope that it will be useful, but
00017 // WITHOUT ANY WARRANTY; without even the implied warranty of
00018 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00019 // Lesser General Public License for more details.
00020 //  
00021 // You should have received a copy of the GNU Lesser General Public
00022 // License along with this library; if not, write to the Free Software
00023 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00024 // USA
00025 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00026 // 
00027 // ************************************************************************
00028 //@HEADER
00029 
00030 #include "Epetra_JadOperator.h"
00031 #include "Epetra_Map.h"
00032 #include "Epetra_Import.h"
00033 #include "Epetra_Export.h"
00034 #include "Epetra_Vector.h"
00035 #include "Epetra_MultiVector.h"
00036 #include "Epetra_Comm.h"
00037 #include "Epetra_Util.h"
00038 #include "Epetra_IntSerialDenseVector.h"
00039 #include "Epetra_RowMatrix.h"
00040 #include "Epetra_CrsMatrix.h"
00041 
00042 // At this point, we are not support reduced storage capabilities
00043 #undef REDUCED_STORAGE_SUPPORT
00044 
00045 // When we do, we will need to add a check for climits vs limits.h because some machine, esp. IRIX 
00046 // platforms do not have climits.
00047 #ifdef REDUCED_STORAGE_SUPPORT
00048 #include <climits>
00049 #endif
00050 
00051 //==============================================================================
00052 Epetra_JadOperator::Epetra_JadOperator(const Epetra_RowMatrix & Matrix, bool UseFloats, bool UseShorts) 
00053   : NormInf_(-1.0),
00054     Comm_(Matrix.RowMatrixRowMap().Comm().Clone()),
00055     OperatorDomainMap_(Matrix.OperatorDomainMap()),
00056     OperatorRangeMap_(Matrix.OperatorRangeMap()),
00057     NumMyRows_(Matrix.NumMyRows()),
00058     NumMyCols_(Matrix.NumMyCols()),
00059     NumMyNonzeros_(Matrix.NumMyNonzeros()),
00060     NumGlobalNonzeros_(Matrix.NumGlobalNonzeros()),
00061     Values_(0),
00062     FloatValues_(0),
00063     Indices_(0),
00064     ShortIndices_(0),
00065     IndexOffset_(0),
00066     RowPerm_(0),
00067     UseTranspose_(Matrix.UseTranspose()),
00068     HasNormInf_(Matrix.HasNormInf()),
00069     UsingFloats_(UseFloats),
00070     UsingShorts_(UseShorts),
00071     NumJaggedDiagonals_(Matrix.MaxNumEntries()),
00072     ImportVector_(0),
00073     ExportVector_(0),
00074     Importer_(0),
00075     Exporter_(0)
00076 {
00077   if (!Matrix.Filled()) throw ReportError("Input matrix must have called FillComplete()", -1);
00078   Allocate(Matrix, UseFloats);
00079   SetLabel("Epetra::JadOperator");
00080 }
00081 
00082 //==============================================================================
00083 Epetra_JadOperator::~Epetra_JadOperator(){
00084 
00085   if (FloatValues_!=0) delete [] FloatValues_;
00086   if (ShortIndices_!=0) delete [] ShortIndices_;
00087 
00088   if (ImportVector_!=0) delete ImportVector_;
00089   ImportVector_=0;
00090   if (ExportVector_!=0) delete ExportVector_;
00091   ExportVector_=0;
00092   if (Importer_!=0) delete Importer_;
00093   Importer_=0;
00094   if (Exporter_!=0) delete Exporter_;
00095   Exporter_=0;
00096   delete Comm_;
00097 }
00098 
00099 //==============================================================================
00100 int Epetra_JadOperator::UpdateValues(const Epetra_RowMatrix & Matrix, bool CheckStructure) {
00101 
00102   int NumEntries;
00103   int * Indices = 0;
00104   double * Values =0;
00105 
00106   int ierr = 0;
00107 
00108   try { // If matrix is an Epetra_CrsMatrix, we can get date much more cheaply
00109 
00110     const Epetra_CrsMatrix & A = dynamic_cast<const Epetra_CrsMatrix &>(Matrix);
00111 
00112     for (int i=0; i<NumMyRows_; i++) {
00113 
00114       EPETRA_CHK_ERR(A.ExtractMyRowView(RowPerm_[i], NumEntries, Values, Indices)); // Get the current row based on the permutation
00115       if (!UsingFloats_) 
00116   for (int j=0; j< NumEntries; j++) Values_[IndexOffset_[j]+i] = Values[i];
00117       else
00118   for (int j=0; j< NumEntries; j++) FloatValues_[IndexOffset_[j]+i] = (float) Values[i];
00119       if (CheckStructure) {
00120   if (!UsingShorts_) 
00121     for (int j=0; j< NumEntries; j++) if (Indices_[IndexOffset_[j]+i] != Indices[i]) ierr = - 1;
00122   else
00123     for (int j=0; j< NumEntries; j++) if (ShortIndices_[IndexOffset_[j]+i] != (unsigned short) Indices[i]) ierr = - 1;
00124       }
00125     }
00126   }
00127   catch (...) { // Otherwise just live with RowMatrix interface
00128 
00129   Epetra_SerialDenseVector curValues(NumJaggedDiagonals_);
00130   Epetra_IntSerialDenseVector curIndices(NumJaggedDiagonals_);
00131   Indices = curIndices.Values();
00132   Values = curValues.Values();
00133     for (int i=0; i<NumMyRows_; i++) {
00134       EPETRA_CHK_ERR(Matrix.ExtractMyRowCopy(RowPerm_[i], NumJaggedDiagonals_, NumEntries, Values, Indices)); // Get current row based on the permutation
00135       if (!UsingFloats_) 
00136   for (int j=0; j< NumEntries; j++) Values_[IndexOffset_[j]+i] = Values[i];
00137       else
00138   for (int j=0; j< NumEntries; j++) FloatValues_[IndexOffset_[j]+i] = (float) Values[i];
00139       if (CheckStructure) {
00140   if (!UsingShorts_) 
00141     for (int j=0; j< NumEntries; j++) if (Indices_[IndexOffset_[j]+i] != Indices[i]) ierr = - 1;
00142   else
00143     for (int j=0; j< NumEntries; j++) if (ShortIndices_[IndexOffset_[j]+i] != (unsigned short) Indices[i]) ierr = - 1;
00144       }
00145     }
00146   }
00147   EPETRA_CHK_ERR(ierr);
00148   return(ierr);
00149 }
00150 
00151 //==============================================================================
00152 int Epetra_JadOperator::Allocate(const Epetra_RowMatrix & Matrix, bool UseFloats) {
00153 
00154   // Check if non-trivial import/export operators
00155   if (!(Matrix.RowMatrixRowMap().SameAs(Matrix.OperatorRangeMap()))) 
00156     Exporter_ = new Epetra_Export(Matrix.RowMatrixRowMap(), Matrix.OperatorRangeMap());
00157   
00158   if (Matrix.RowMatrixImporter()!=0) 
00159     Importer_ = new Epetra_Import(Matrix.RowMatrixColMap(), Matrix.OperatorDomainMap());
00160 
00161   // Allocate IndexOffset storage
00162 
00163   IndexOffset_.Resize(NumJaggedDiagonals_+1);
00164 
00165   // Next compute permutation of rows
00166   RowPerm_.Resize(NumMyRows_);
00167   Epetra_IntSerialDenseVector profile(NumMyRows_);
00168   for (int i=0; i<NumMyRows_; i++) {
00169     int NumEntries;
00170     Matrix.NumMyRowEntries(i, NumEntries);
00171     profile[i] = NumEntries;
00172     RowPerm_[i] = i;
00173   }
00174 
00175   Epetra_Util sorter;
00176   int * RowPerm = RowPerm_.Values();
00177   sorter.Sort(false, NumMyRows_, profile.Values(), 0, 0, 1, &RowPerm);
00178 
00179   // Now build IndexOffsets:  These contain the lengths of the jagged diagonals
00180 
00181   for (int i=0; i<NumJaggedDiagonals_; i++) IndexOffset_[i] = 0;
00182 
00183   int curOffset = NumMyRows_;
00184   int * curIndex = IndexOffset_.Values(); // point to first index (will be incremented immediately below)
00185   for (int i=1; i<NumJaggedDiagonals_+1; i++) {
00186     curIndex++;
00187     while (*curIndex==0) {
00188       if (profile[curOffset-1]<i) curOffset--;
00189       else *curIndex = *(curIndex-1) + curOffset; // Set the length of the current jagged diagonal (plus scan sum)
00190     }
00191   }
00192 
00193   // Next determine how to handle values.  Two possibilities:
00194   // 1) UseFloats is false, so we copy values into a contiguous double array.
00195   // 3) UseFloats is true so we create a single precision copy of matrix values.
00196 
00197   if (!UsingFloats_)  // Allocate storage in Values_
00198     Values_.Resize(NumMyNonzeros_);
00199   else // UseFloats == true
00200     FloatValues_ = new float[NumMyNonzeros_];
00201 
00202   // Next determine how to handle integers.  Two possibilities:
00203   // 1) Local column range is within the range of unsigned short ints, so we copy the indices to an array of unsigned shorts.
00204   // 2) Local column range is outside range of unsigned shorts and we copy to array of ints.
00205   // In both cases we embed the nonzero count per row into the index array.
00206 
00207 #ifdef REDUCED_STORAGE_SUPPORT
00208   if (Matrix.NumMyCols()<=USHRT_MAX && UsingShorts_) UsingShorts_ = true;
00209 #endif
00210   if (!UsingShorts_)
00211     Indices_.Resize(NumMyNonzeros_);
00212   else // Matrix.NumMyCols()<=USHRT_MAX
00213     ShortIndices_ = new unsigned short[NumMyNonzeros_];
00214 
00215   int NumEntries;
00216   int * Indices = 0;
00217   double * Values =0;
00218 
00219   try { // If matrix is an Epetra_CrsMatrix, we can get date much more cheaply
00220 
00221     const Epetra_CrsMatrix & A = dynamic_cast<const Epetra_CrsMatrix &>(Matrix);
00222 
00223     for (int i=0; i<NumMyRows_; i++) {
00224 
00225       EPETRA_CHK_ERR(A.ExtractMyRowView(RowPerm_[i], NumEntries, Values, Indices)); // Get the current row based on the permutation
00226       if (!UsingFloats_) 
00227   for (int j=0; j< NumEntries; j++) Values_[IndexOffset_[j]+i] = Values[j];
00228       else
00229   for (int j=0; j< NumEntries; j++) FloatValues_[IndexOffset_[j]+i] = (float) Values[j];
00230       if (!UsingShorts_) 
00231   for (int j=0; j< NumEntries; j++) Indices_[IndexOffset_[j]+i] = Indices[j];
00232       else
00233   for (int j=0; j< NumEntries; j++) ShortIndices_[IndexOffset_[j]+i] = (unsigned short) Indices[j];
00234     }
00235   }
00236   catch (...) { // Otherwise just live with RowMatrix interface
00237 
00238   Epetra_SerialDenseVector curValues(NumJaggedDiagonals_);
00239   Epetra_IntSerialDenseVector curIndices(NumJaggedDiagonals_);
00240   Indices = curIndices.Values();
00241   Values = curValues.Values();
00242     for (int i=0; i<NumMyRows_; i++) {
00243       EPETRA_CHK_ERR(Matrix.ExtractMyRowCopy(RowPerm_[i], NumJaggedDiagonals_, NumEntries, Values, Indices)); // Get  current row based on the permutation
00244       if (!UsingFloats_) 
00245   for (int j=0; j< NumEntries; j++) Values_[IndexOffset_[j]+i] = Values[j];
00246       else
00247   for (int j=0; j< NumEntries; j++) FloatValues_[IndexOffset_[j]+i] = (float) Values[j];
00248       if (!UsingShorts_) 
00249   for (int j=0; j< NumEntries; j++) Indices_[IndexOffset_[j]+i] = Indices[j];
00250       else
00251   for (int j=0; j< NumEntries; j++) ShortIndices_[IndexOffset_[j]+i] = (unsigned short) Indices[j];
00252     }
00253   }
00254   return(0);
00255 }
00256 //=============================================================================
00257 int Epetra_JadOperator::Apply(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const {
00258   //
00259   // This function forms the product Y = A * Y or Y = A' * X
00260   //
00261 
00262   int NumVectors = X.NumVectors();
00263   if (NumVectors!=Y.NumVectors()) {
00264     EPETRA_CHK_ERR(-1); // Need same number of vectors in each MV
00265   }
00266 
00267   double** Xp = (double**) X.Pointers();
00268   double** Yp = (double**) Y.Pointers();
00269   int LDX = X.ConstantStride() ? X.Stride() : 0;
00270   int LDY = Y.ConstantStride() ? Y.Stride() : 0;
00271   UpdateImportVector(NumVectors); // Make sure Import and Export Vectors are compatible
00272   UpdateExportVector(NumVectors);
00273 
00274   bool TransA = UseTranspose_;
00275   if (!TransA) {
00276 
00277     // If we have a non-trivial importer, we must import elements that are permuted or are on other processors
00278     if (Importer()!=0) {
00279       EPETRA_CHK_ERR(ImportVector_->Import(X, *Importer(), Insert));
00280       Xp = (double**)ImportVector_->Pointers();
00281       LDX = ImportVector_->ConstantStride() ? ImportVector_->Stride() : 0;
00282     }
00283 
00284     // If we have a non-trivial exporter, we must export elements that are permuted or belong to other processors
00285     if (Exporter()!=0) {
00286       Yp = (double**)ExportVector_->Pointers();
00287       LDY = ExportVector_->ConstantStride() ? ExportVector_->Stride() : 0;
00288     }
00289 
00290     // Do actual computation
00291     if (NumVectors==1)
00292       GeneralMV(TransA, *Xp, *Yp);
00293     else
00294       GeneralMM(TransA, Xp, LDX, Yp, LDY, NumVectors);
00295     if (Exporter()!=0) {
00296       Y.PutScalar(0.0);  // Make sure target is zero
00297       Y.Export(*ExportVector_, *Exporter(), Add); // Fill Y with Values from export vector
00298     }
00299     // Handle case of rangemap being a local replicated map
00300     if (!OperatorRangeMap().DistributedGlobal() && Comm().NumProc()>1) EPETRA_CHK_ERR(Y.Reduce());
00301   }
00302   else { // Transpose operation
00303     
00304 
00305     // If we have a non-trivial exporter, we must import elements that are permuted or are on other processors
00306 
00307     if (Exporter()!=0) {
00308       EPETRA_CHK_ERR(ExportVector_->Import(X, *Exporter(), Insert));
00309       Xp = (double**)ExportVector_->Pointers();
00310       LDX = ExportVector_->ConstantStride() ? ExportVector_->Stride() : 0;
00311     }
00312 
00313     // If we have a non-trivial importer, we must export elements that are permuted or belong to other processors
00314     if (Importer()!=0) {
00315       Yp = (double**)ImportVector_->Pointers();
00316       LDY = ImportVector_->ConstantStride() ? ImportVector_->Stride() : 0;
00317     }
00318 
00319     // Do actual computation
00320     if (NumVectors==1)
00321       GeneralMV(TransA, *Xp, *Yp);
00322     else
00323       GeneralMM(TransA, Xp, LDX, Yp, LDY, NumVectors);
00324     if (Importer()!=0) {
00325       Y.PutScalar(0.0);  // Make sure target is zero
00326       EPETRA_CHK_ERR(Y.Export(*ImportVector_, *Importer(), Add)); // Fill Y with Values from export vector
00327     }
00328     // Handle case of rangemap being a local replicated map
00329     if (!OperatorDomainMap().DistributedGlobal() && Comm().NumProc()>1)  EPETRA_CHK_ERR(Y.Reduce());
00330   }
00331 
00332   UpdateFlops(2*NumVectors*NumGlobalNonzeros());
00333   return(0);
00334 }
00335 //=======================================================================================================
00336 void Epetra_JadOperator::UpdateImportVector(int NumVectors) const {
00337   if(Importer() != 0) {
00338     if(ImportVector_ != 0) {
00339       if(ImportVector_->NumVectors() != NumVectors) {
00340      delete ImportVector_;
00341      ImportVector_= 0;
00342       }
00343     }
00344     if(ImportVector_ == 0)
00345       ImportVector_ = new Epetra_MultiVector(Importer_->TargetMap(),NumVectors); // Create import vector if needed
00346   }
00347   return;
00348 }
00349 //=======================================================================================================
00350 void Epetra_JadOperator::UpdateExportVector(int NumVectors) const {
00351   if(Exporter() != 0) {
00352     if(ExportVector_ != 0) {
00353       if(ExportVector_->NumVectors() != NumVectors) {
00354      delete ExportVector_;
00355      ExportVector_= 0;
00356       }
00357     }
00358     if(ExportVector_ == 0)
00359       ExportVector_ = new Epetra_MultiVector(Exporter_->SourceMap(),NumVectors); // Create Export vector if needed
00360   }
00361   return;
00362 }
00363 //=======================================================================================================
00364 void Epetra_JadOperator::GeneralMM(bool TransA, double ** X, int LDX, double ** Y, int LDY, int NumVectors) const {
00365 
00366   if (LDX==0 || LDY==0 || NumVectors==1) {// Can't unroll RHS if X or Y not strided
00367     for (int k=0; k<NumVectors; k++) GeneralMV(TransA, X[k], Y[k]);
00368   }
00369   else if (NumVectors==2) // Special 2 RHS case (does unrolling in both NumVectors and NumJaggedDiagonals)
00370     GeneralMM2RHS(TransA, X[0], LDX, Y[0], LDY);
00371   // Otherwise unroll RHS only
00372   else
00373     GeneralMM3RHS(TransA, X, LDX, Y, LDY, NumVectors);
00374 
00375   return;
00376 }
00377 //=======================================================================================================
00378 void Epetra_JadOperator::GeneralMM3RHS(bool TransA, double ** X, int ldx, double ** Y, int ldy, int NumVectors) const {
00379 
00380   // Routine for 3 or more RHS
00381 
00382   const double * Values = Values_.Values();
00383   const int * Indices = Indices_.Values();
00384   const int * IndexOffset = IndexOffset_.Values();
00385   const int * RowPerm = RowPerm_.Values();
00386   for (int j=0; j<NumVectors; j++) {
00387     double * y = Y[j];
00388     if (!TransA)
00389       for (int i=0; i<NumMyRows_; i++) y[i] = 0.0;
00390     else
00391       for (int i=0; i<NumMyCols_; i++) y[i] = 0.0;
00392   }
00393 
00394   int nv = NumVectors%5; if (nv==0) nv=5;
00395     double * x = X[0];
00396     double * y = Y[0];
00397  
00398 
00399   for (int k=0; k<NumVectors; k+=5) {
00400     
00401     for (int j=0; j<NumJaggedDiagonals_; j++) {
00402       const int * curIndices = Indices+IndexOffset[j];
00403       const double * curValues = Values+IndexOffset[j];
00404       int jaggedDiagonalLength = IndexOffset[j+1]-IndexOffset[j];
00405       switch (nv){
00406       case 1:
00407   {
00408     if (!TransA) {
00409       for (int i=0; i<jaggedDiagonalLength; i++) {
00410         int ix = curIndices[i];
00411         int iy = RowPerm[i];
00412         double val = curValues[i];
00413         y[iy] += val*x[ix];
00414       }
00415     }
00416     else {
00417       for (int i=0; i<jaggedDiagonalLength; i++) {
00418         int iy = curIndices[i];
00419         int ix = RowPerm[i];
00420         double val = curValues[i];
00421         y[iy] += val*x[ix];
00422       }
00423     }
00424     break;
00425   }
00426       case 2:
00427   {
00428     if (!TransA) {
00429       for (int i=0; i<jaggedDiagonalLength; i++) {
00430         int ix = curIndices[i];
00431         int iy = RowPerm[i];
00432         double val = curValues[i];
00433         y[iy] += val*x[ix];
00434         iy+=ldy; ix+=ldx;
00435         y[iy] += val*x[ix];
00436       }
00437     }
00438     else {
00439       for (int i=0; i<jaggedDiagonalLength; i++) {
00440         int iy = curIndices[i];
00441         int ix = RowPerm[i];
00442         double val = curValues[i];
00443         y[iy] += val*x[ix];
00444         iy+=ldy; ix+=ldx;
00445         y[iy] += val*x[ix];
00446       }
00447     }
00448     break;
00449   }
00450       case 3:
00451   {
00452     if (!TransA) {
00453       for (int i=0; i<jaggedDiagonalLength; i++) {
00454         int ix = curIndices[i];
00455         int iy = RowPerm[i];
00456         double val = curValues[i];
00457         y[iy] += val*x[ix];
00458         iy+=ldy; ix+=ldx;
00459         y[iy] += val*x[ix];
00460         iy+=ldy; ix+=ldx;
00461         y[iy] += val*x[ix];
00462       }
00463     }
00464     else {
00465       for (int i=0; i<jaggedDiagonalLength; i++) {
00466         int iy = curIndices[i];
00467         int ix = RowPerm[i];
00468         double val = curValues[i];
00469         y[iy] += val*x[ix];
00470         iy+=ldy; ix+=ldx;
00471         y[iy] += val*x[ix];
00472         iy+=ldy; ix+=ldx;
00473         y[iy] += val*x[ix];
00474       }
00475     }
00476     break;
00477   }
00478       case 4:
00479   {
00480     if (!TransA) {
00481       for (int i=0; i<jaggedDiagonalLength; i++) {
00482         int ix = curIndices[i];
00483         int iy = RowPerm[i];
00484         double val = curValues[i];
00485         y[iy] += val*x[ix];
00486         iy+=ldy; ix+=ldx;
00487         y[iy] += val*x[ix];
00488         iy+=ldy; ix+=ldx;
00489         y[iy] += val*x[ix];
00490         iy+=ldy; ix+=ldx;
00491         y[iy] += val*x[ix];
00492       }
00493     }
00494     else {
00495       for (int i=0; i<jaggedDiagonalLength; i++) {
00496         int iy = curIndices[i];
00497         int ix = RowPerm[i];
00498         double val = curValues[i];
00499         y[iy] += val*x[ix];
00500         iy+=ldy; ix+=ldx;
00501         y[iy] += val*x[ix];
00502         iy+=ldy; ix+=ldx;
00503         y[iy] += val*x[ix];
00504         iy+=ldy; ix+=ldx;
00505         y[iy] += val*x[ix];
00506       }
00507     }
00508     break;
00509   }
00510       case 5:
00511   {
00512     if (!TransA) {
00513       for (int i=0; i<jaggedDiagonalLength; i++) {
00514         int ix = curIndices[i];
00515         int iy = RowPerm[i];
00516         double val = curValues[i];
00517         y[iy] += val*x[ix];
00518         iy+=ldy; ix+=ldx;
00519         y[iy] += val*x[ix];
00520         iy+=ldy; ix+=ldx;
00521         y[iy] += val*x[ix];
00522         iy+=ldy; ix+=ldx;
00523         y[iy] += val*x[ix];
00524         iy+=ldy; ix+=ldx;
00525         y[iy] += val*x[ix];
00526       }
00527     }
00528     else {
00529       for (int i=0; i<jaggedDiagonalLength; i++) {
00530         int iy = curIndices[i];
00531         int ix = RowPerm[i];
00532         double val = curValues[i];
00533         y[iy] += val*x[ix];
00534         iy+=ldy; ix+=ldx;
00535         y[iy] += val*x[ix];
00536         iy+=ldy; ix+=ldx;
00537         y[iy] += val*x[ix];
00538         iy+=ldy; ix+=ldx;
00539         y[iy] += val*x[ix];
00540         iy+=ldy; ix+=ldx;
00541         y[iy] += val*x[ix];
00542       }
00543     }
00544     break;
00545   }
00546       }
00547     }
00548     x += nv*ldx;
00549     y += nv*ldy;
00550     nv = 5; // After initial remainder, we will always do 5 RHS
00551   }
00552   return;
00553 }
00554 //=======================================================================================================
00555 void Epetra_JadOperator::GeneralMM2RHS(bool TransA, double * x, int ldx, double * y, int ldy) const {
00556 
00557   // special 2 rhs case
00558 
00559   const double * Values = Values_.Values();
00560   const int * Indices = Indices_.Values();
00561   const int * IndexOffset = IndexOffset_.Values();
00562   const int * RowPerm = RowPerm_.Values();
00563   if (!TransA) 
00564     for (int i=0; i<NumMyRows_; i++) {
00565       y[i] = 0.0;
00566       y[i+ldy] = 0.0;
00567     }
00568   else
00569     for (int i=0; i<NumMyCols_; i++) {
00570       y[i] = 0.0;
00571       y[i+ldy] = 0.0;
00572     }
00573 
00574   int j = 0;
00575   while (j<NumJaggedDiagonals_) {
00576   int j0 = j;
00577   int jaggedDiagonalLength = IndexOffset[j+1]-IndexOffset[j];
00578     j++;
00579     // check if other diagonals have same length up to a max of 2
00580     while ((j<NumJaggedDiagonals_-1) && (IndexOffset[j+1]-IndexOffset[j]==jaggedDiagonalLength) && (j-j0<2)) j++;
00581     
00582     int numDiags = j-j0;
00583     assert(numDiags<3 && numDiags>0);
00584     assert(j<NumJaggedDiagonals_+1);
00585     
00586     switch (numDiags){
00587     case 1:
00588       {
00589   const int * curIndices = Indices+IndexOffset[j0];
00590   const double * curValues = Values+IndexOffset[j0];
00591   if (!TransA) {
00592     for (int i=0; i<jaggedDiagonalLength; i++) {
00593       int ix = curIndices[i];
00594       int iy = RowPerm[i];
00595       y[iy] += curValues[i]*x[ix];
00596       iy+=ldy; ix+=ldx;
00597       y[iy] += curValues[i]*x[ix];
00598     }
00599   }
00600   else {
00601     for (int i=0; i<jaggedDiagonalLength; i++){
00602       int iy = curIndices[i];
00603       int ix = RowPerm[i];
00604       y[iy] += curValues[i]*x[ix];
00605       iy+=ldy; ix+=ldx;
00606       y[iy] += curValues[i]*x[ix];
00607     }
00608   }
00609   break;
00610       }
00611     case 2:
00612       {
00613   const int * curIndices0 = Indices+IndexOffset[j0];
00614   const double * curValues0 = Values+IndexOffset[j0++];
00615   const int * curIndices1 = Indices+IndexOffset[j0];
00616   const double * curValues1 = Values+IndexOffset[j0];
00617   if (!TransA) {
00618     for (int i=0; i<jaggedDiagonalLength; i++) {
00619       int ix0 = curIndices0[i];
00620       int ix1 = curIndices1[i];
00621       int iy = RowPerm[i];
00622       y[iy] += 
00623         curValues0[i]*x[ix0] +
00624         curValues1[i]*x[ix1];
00625       iy+=ldy; ix0+=ldx; ix1+=ldx;
00626       y[iy] += 
00627         curValues0[i]*x[ix0] +
00628         curValues1[i]*x[ix1];
00629     }
00630   }
00631   else {
00632     for (int i=0; i<jaggedDiagonalLength; i++) {
00633       int iy0 = curIndices0[i];
00634       int iy1 = curIndices1[i];
00635       int ix = RowPerm[i];
00636       double xval = x[ix];
00637       y[iy0] += curValues0[i]*xval;
00638       y[iy1] += curValues1[i]*xval;
00639       ix+=ldx; iy0+=ldy; iy1+=ldy;
00640       xval = x[ix];
00641       y[iy0] += curValues0[i]*xval;
00642       y[iy1] += curValues1[i]*xval;
00643     }
00644   }
00645       }
00646       break;
00647     }
00648   }
00649   return;
00650 }
00651 //=======================================================================================================
00652 void Epetra_JadOperator::GeneralMV(bool TransA, double * x, double * y)  const {
00653   
00654   const double * Values = Values_.Values();
00655   const int * Indices = Indices_.Values();
00656   const int * IndexOffset = IndexOffset_.Values();
00657   const int * RowPerm = RowPerm_.Values();
00658   if (!TransA)
00659     for (int i=0; i<NumMyRows_; i++) y[i] = 0.0;
00660   else
00661     for (int i=0; i<NumMyCols_; i++) y[i] = 0.0;
00662 
00663   int j = 0;
00664   while (j<NumJaggedDiagonals_) {
00665   int j0 = j;
00666   int jaggedDiagonalLength = IndexOffset[j+1]-IndexOffset[j];
00667     j++;
00668     // check if other diagonals have same length up to a max of 5
00669     while ((j<NumJaggedDiagonals_-1) && (IndexOffset[j+1]-IndexOffset[j]==jaggedDiagonalLength) && (j-j0<5)) j++;
00670     
00671     int numDiags = j-j0;
00672     assert(numDiags<6 && numDiags>0);
00673     assert(j<NumJaggedDiagonals_+1);
00674     
00675     switch (numDiags){
00676     case 1:
00677       {
00678   const int * curIndices = Indices+IndexOffset[j0];
00679   const double * curValues = Values+IndexOffset[j0];
00680   if (!TransA) {
00681     for (int i=0; i<jaggedDiagonalLength; i++)
00682       y[RowPerm[i]] += curValues[i]*x[curIndices[i]];
00683   }
00684   else {
00685     for (int i=0; i<jaggedDiagonalLength; i++)
00686       y[curIndices[i]] += curValues[i]*x[RowPerm[i]];
00687   }
00688   break;
00689       }
00690     case 2:
00691       {
00692   const int * curIndices0 = Indices+IndexOffset[j0];
00693   const double * curValues0 = Values+IndexOffset[j0++];
00694   const int * curIndices1 = Indices+IndexOffset[j0];
00695   const double * curValues1 = Values+IndexOffset[j0];
00696   if (!TransA) {
00697     for (int i=0; i<jaggedDiagonalLength; i++) {
00698       y[RowPerm[i]] += 
00699         curValues0[i]*x[curIndices0[i]] +
00700         curValues1[i]*x[curIndices1[i]];
00701     }
00702   }
00703   else {
00704     for (int i=0; i<jaggedDiagonalLength; i++) {
00705       double xval = x[RowPerm[i]];
00706       y[curIndices0[i]] += curValues0[i]*xval;
00707       y[curIndices1[i]] += curValues1[i]*xval;
00708     }
00709   }
00710       }
00711       break;
00712     case 3:
00713       {
00714   const int * curIndices0 = Indices+IndexOffset[j0];
00715   const double * curValues0 = Values+IndexOffset[j0++];
00716   const int * curIndices1 = Indices+IndexOffset[j0];
00717   const double * curValues1 = Values+IndexOffset[j0++];
00718   const int * curIndices2 = Indices+IndexOffset[j0];
00719   const double * curValues2 = Values+IndexOffset[j0];
00720   if (!TransA) {
00721     for (int i=0; i<jaggedDiagonalLength; i++) {
00722       y[RowPerm[i]] += 
00723         curValues0[i]*x[curIndices0[i]] +
00724         curValues1[i]*x[curIndices1[i]] +
00725         curValues2[i]*x[curIndices2[i]];
00726     }
00727   }
00728   else {
00729     for (int i=0; i<jaggedDiagonalLength; i++) {
00730       double xval = x[RowPerm[i]];
00731       y[curIndices0[i]] += curValues0[i]*xval;
00732       y[curIndices1[i]] += curValues1[i]*xval;
00733       y[curIndices2[i]] += curValues2[i]*xval;
00734     }
00735   }
00736       }
00737       break;
00738     case 4:
00739       {
00740   const int * curIndices0 = Indices+IndexOffset[j0];
00741   const double * curValues0 = Values+IndexOffset[j0++];
00742   const int * curIndices1 = Indices+IndexOffset[j0];
00743   const double * curValues1 = Values+IndexOffset[j0++];
00744   const int * curIndices2 = Indices+IndexOffset[j0];
00745   const double * curValues2 = Values+IndexOffset[j0++];
00746   const int * curIndices3 = Indices+IndexOffset[j0];
00747   const double * curValues3 = Values+IndexOffset[j0];
00748   if (!TransA) {
00749     for (int i=0; i<jaggedDiagonalLength; i++) {
00750       y[RowPerm[i]] += 
00751         curValues0[i]*x[curIndices0[i]] +
00752         curValues1[i]*x[curIndices1[i]] +
00753         curValues2[i]*x[curIndices2[i]] +
00754         curValues3[i]*x[curIndices3[i]];
00755     }
00756   }
00757   else {
00758     for (int i=0; i<jaggedDiagonalLength; i++) {
00759       double xval = x[RowPerm[i]];
00760       y[curIndices0[i]] += curValues0[i]*xval;
00761       y[curIndices1[i]] += curValues1[i]*xval;
00762       y[curIndices2[i]] += curValues2[i]*xval;
00763       y[curIndices3[i]] += curValues3[i]*xval;
00764     }
00765   }
00766       }
00767       break;
00768     case 5:
00769       {
00770   const int * curIndices0 = Indices+IndexOffset[j0];
00771   const double * curValues0 = Values+IndexOffset[j0++];
00772   const int * curIndices1 = Indices+IndexOffset[j0];
00773   const double * curValues1 = Values+IndexOffset[j0++];
00774   const int * curIndices2 = Indices+IndexOffset[j0];
00775   const double * curValues2 = Values+IndexOffset[j0++];
00776   const int * curIndices3 = Indices+IndexOffset[j0];
00777   const double * curValues3 = Values+IndexOffset[j0++];
00778   const int * curIndices4 = Indices+IndexOffset[j0];
00779   const double * curValues4 = Values+IndexOffset[j0];
00780   if (!TransA) {
00781     for (int i=0; i<jaggedDiagonalLength; i++) {
00782       y[RowPerm[i]] += 
00783         curValues0[i]*x[curIndices0[i]] +
00784         curValues1[i]*x[curIndices1[i]] +
00785         curValues2[i]*x[curIndices2[i]] +
00786         curValues3[i]*x[curIndices3[i]] +
00787         curValues4[i]*x[curIndices4[i]];
00788     }
00789   }
00790   else {
00791     for (int i=0; i<jaggedDiagonalLength; i++) {
00792       double xval = x[RowPerm[i]];
00793       y[curIndices0[i]] += curValues0[i]*xval;
00794       y[curIndices1[i]] += curValues1[i]*xval;
00795       y[curIndices2[i]] += curValues2[i]*xval;
00796       y[curIndices3[i]] += curValues3[i]*xval;
00797       y[curIndices4[i]] += curValues4[i]*xval;
00798     }
00799   }
00800       }
00801       break;
00802     }
00803   }
00804   return;
00805 }
00806 //=======================================================================================================
00807 void Epetra_JadOperator::Print(ostream& os) const {
00808 
00809   const Epetra_BlockMap * RowMap;
00810   const Epetra_BlockMap * ColMap;
00811   if (Importer_==0) 
00812     ColMap = &(OperatorDomainMap());
00813   else
00814     ColMap = &(Importer_->TargetMap());
00815   if (Exporter_==0) 
00816     RowMap = &(OperatorRangeMap());
00817   else
00818     RowMap = &(Exporter_->SourceMap());
00819 
00820   int MyPID = RowMap->Comm().MyPID();
00821   int NumProc = RowMap->Comm().NumProc();
00822 
00823   for (int iproc=0; iproc < NumProc; iproc++) {
00824     if (MyPID==iproc) {
00825       if (MyPID==0) {
00826   os <<    "Number of Global Nonzeros     = "; os << NumGlobalNonzeros(); os << endl;
00827       }
00828       
00829       os <<  "\nNumber of My Rows               = "; os << NumMyRows_; os << endl;
00830       os <<    "Number of My Jagged Diagonals   = "; os << NumJaggedDiagonals_; os << endl;
00831       os <<    "Number of My Nonzeros           = "; os << NumMyNonzeros_; os << endl; os << endl;
00832 
00833       os << flush;
00834       
00835     }
00836     // Do a few global ops to give I/O a chance to complete
00837     Comm().Barrier();
00838     Comm().Barrier();
00839     Comm().Barrier();
00840   }
00841   
00842   {for (int iproc=0; iproc < NumProc; iproc++) {
00843     if (MyPID==iproc) {
00844       int NumMyRows1 = NumMyRows_;
00845       
00846       if (MyPID==0) {
00847   os.width(8);
00848   os <<  "   Processor ";
00849   os.width(10);
00850   os <<  "   Row Index ";
00851   os.width(10);
00852   os <<  "   Col Index ";
00853   os.width(20);
00854   os <<  "   Value     ";
00855   os << endl;
00856       }
00857       for (int i=0; i<NumMyRows1; i++) {
00858   int Row = RowMap->GID(RowPerm_[i]);; // Get global row number
00859   
00860   for (int j = 0; j < NumJaggedDiagonals_ ; j++) {   
00861     if (IndexOffset_[j+1]-IndexOffset_[j]>i) {
00862       int Index = ColMap->GID(Indices_[IndexOffset_[j]+i]);
00863       double Value = Values_[IndexOffset_[j]+i];
00864       os.width(8);
00865       os <<  MyPID ; os << "    ";  
00866       os.width(10);
00867       os <<  Row ; os << "    ";  
00868       os.width(10);
00869       os <<  Index; os << "    ";
00870       os.width(20);
00871       os <<  Value; os << "    ";
00872       os << endl;
00873     }
00874   }
00875       }
00876                  
00877       os << flush;
00878       
00879     }
00880     // Do a few global ops to give I/O a chance to complete
00881     Comm().Barrier();
00882     Comm().Barrier();
00883     Comm().Barrier();
00884   }}
00885   
00886   return;
00887 }

Generated on Thu Sep 18 12:37:57 2008 for Epetra Package Browser (Single Doxygen Collection) by doxygen 1.3.9.1