Epetra_JadMatrix.cpp

Go to the documentation of this file.
00001 
00002 //@HEADER
00003 // ************************************************************************
00004 // 
00005 //               Epetra: Linear Algebra Services Package 
00006 //                 Copyright (2001) Sandia Corporation
00007 // 
00008 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00009 // license for use of this work by or on behalf of the U.S. Government.
00010 // 
00011 // This library is free software; you can redistribute it and/or modify
00012 // it under the terms of the GNU Lesser General Public License as
00013 // published by the Free Software Foundation; either version 2.1 of the
00014 // License, or (at your option) any later version.
00015 //  
00016 // This library is distributed in the hope that it will be useful, but
00017 // WITHOUT ANY WARRANTY; without even the implied warranty of
00018 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00019 // Lesser General Public License for more details.
00020 //  
00021 // You should have received a copy of the GNU Lesser General Public
00022 // License along with this library; if not, write to the Free Software
00023 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00024 // USA
00025 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00026 // 
00027 // ************************************************************************
00028 //@HEADER
00029 
00030 #include "Epetra_JadMatrix.h"
00031 #include "Epetra_Map.h"
00032 #include "Epetra_Import.h"
00033 #include "Epetra_Export.h"
00034 #include "Epetra_Vector.h"
00035 #include "Epetra_MultiVector.h"
00036 #include "Epetra_Comm.h"
00037 #include "Epetra_Util.h"
00038 #include "Epetra_IntSerialDenseVector.h"
00039 #include "Epetra_RowMatrix.h"
00040 #include "Epetra_CrsMatrix.h"
00041 
00042 //==============================================================================
00043 Epetra_JadMatrix::Epetra_JadMatrix(const Epetra_RowMatrix & Matrix) 
00044   : Epetra_BasicRowMatrix(Matrix.RowMatrixRowMap().Comm()),
00045     Values_(0),
00046     Indices_(0),
00047     IndexOffset_(0),
00048     Profile_(0),
00049     RowPerm_(0),
00050     InvRowPerm_(0),
00051     NumJaggedDiagonals_(Matrix.MaxNumEntries())
00052 {
00053   SetMaps(Matrix.RowMatrixRowMap(), Matrix.RowMatrixColMap(), Matrix.OperatorDomainMap(), Matrix.OperatorRangeMap());
00054   if (!Matrix.Filled()) throw Matrix.RowMatrixRowMap().ReportError("Input matrix must have called FillComplete()", -1);
00055   Allocate(Matrix);
00056   SetLabel("Epetra::JadMatrix");
00057 }
00058 
00059 //==============================================================================
00060 Epetra_JadMatrix::~Epetra_JadMatrix(){}
00061 
00062 //==============================================================================
00063 int Epetra_JadMatrix::UpdateValues(const Epetra_RowMatrix & Matrix, bool CheckStructure) {
00064 
00065   int NumEntries;
00066   int * Indices = 0;
00067   double * Values =0;
00068 
00069   int ierr = 0;
00070 
00071   try { // If matrix is an Epetra_CrsMatrix, we can get date much more cheaply
00072 
00073     const Epetra_CrsMatrix & A = dynamic_cast<const Epetra_CrsMatrix &>(Matrix);
00074 
00075     for (int i1=0; i1<NumMyRows_; i1++) {
00076       
00077       EPETRA_CHK_ERR(A.ExtractMyRowView(i1, NumEntries, Values, Indices)); // Get the current row based on the permutation
00078       int i = InvRowPerm_[i1]; // Determine permuted row location
00079       for (int j=0; j< NumEntries; j++) Values_[IndexOffset_[j]+i] = Values[j];
00080       if (CheckStructure)
00081   for (int j=0; j< NumEntries; j++) if (Indices_[IndexOffset_[j]+i] != Indices[j]) ierr = - 1;
00082     }
00083   }
00084   catch (...) { // Otherwise just live with RowMatrix interface
00085     
00086     Epetra_SerialDenseVector curValues(NumJaggedDiagonals_);
00087     Epetra_IntSerialDenseVector curIndices(NumJaggedDiagonals_);
00088     Indices = curIndices.Values();
00089     Values = curValues.Values();
00090     for (int i1=0; i1<NumMyRows_; i1++) {
00091       EPETRA_CHK_ERR(Matrix.ExtractMyRowCopy(i1, NumJaggedDiagonals_, NumEntries, Values, Indices)); // Get current row based on the permutation
00092       int i = InvRowPerm_[i1]; // Determine permuted row location
00093       for (int j=0; j< NumEntries; j++) Values_[IndexOffset_[j]+i] = Values[j];
00094       if (CheckStructure)
00095   for (int j=0; j< NumEntries; j++) if (Indices_[IndexOffset_[j]+i] != Indices[j]) ierr = - 1;
00096     }
00097   }
00098 
00099   HaveNumericConstants_ = false;
00100   EPETRA_CHK_ERR(ierr);
00101   return(ierr);
00102 }
00103 
00104 //==============================================================================
00105 void Epetra_JadMatrix::Allocate(const Epetra_RowMatrix & Matrix) {
00106 
00107   // Allocate IndexOffset storage
00108   int NumMyRows = Matrix.NumMyRows();
00109   int NumMyNonzeros = Matrix.NumMyNonzeros();
00110 
00111   IndexOffset_.Resize(NumJaggedDiagonals_+1);
00112 
00113   // Next compute permutation of rows
00114   RowPerm_.Resize(NumMyRows);
00115   InvRowPerm_.Resize(NumMyRows);
00116   Profile_.Resize(NumMyRows);
00117   for (int i=0; i<NumMyRows; i++) {
00118     int NumEntries;
00119     Matrix.NumMyRowEntries(i, NumEntries);
00120     Profile_[i] = NumEntries;
00121     RowPerm_[i] = i;
00122   }
00123 
00124   Epetra_Util sorter;
00125   int * RowPerm = RowPerm_.Values();
00126   sorter.Sort(false, NumMyRows, Profile_.Values(), 0, 0, 1, &RowPerm);
00127   //cout << "Profile = " << Profile_ << endl;
00128   //cout << "RowPerm = " << RowPerm_ << endl;
00129   for (int i=0; i<NumMyRows; i++) InvRowPerm_[RowPerm[i]] = i; // Compute inverse row permutation
00130   //cout << "InvRowPerm = " << InvRowPerm_ << endl;
00131 
00132   // Now build IndexOffsets:  These contain the lengths of the jagged diagonals
00133 
00134   for (int i=0; i<NumJaggedDiagonals_; i++) IndexOffset_[i] = 0;
00135 
00136   int curOffset = NumMyRows;
00137   int * curIndex = IndexOffset_.Values(); // point to first index (will be incremented immediately below)
00138   for (int i=1; i<NumJaggedDiagonals_+1; i++) {
00139     curIndex++;
00140     while (*curIndex==0) {
00141       if (Profile_[curOffset-1]<i) curOffset--;
00142       else *curIndex = *(curIndex-1) + curOffset; // Set the length of the current jagged diagonal (plus scan sum)
00143     }
00144   }
00145 
00146   Values_.Resize(NumMyNonzeros);
00147   Indices_.Resize(NumMyNonzeros);
00148 
00149   int NumEntries;
00150   int * Indices = 0;
00151   double * Values =0;
00152 
00153   try { // If matrix is an Epetra_CrsMatrix, we can get data much more cheaply
00154 
00155     const Epetra_CrsMatrix & A = dynamic_cast<const Epetra_CrsMatrix &>(Matrix);
00156 
00157     for (int i1=0; i1<NumMyRows; i1++) {
00158 
00159       A.ExtractMyRowView(i1, NumEntries, Values, Indices); // Get the current row
00160       int i = InvRowPerm_[i1]; // Determine permuted row location
00161       //cout << "i1, i, NumEntries = " << i1 <<" "<< i <<" "<< NumEntries << endl;
00162       for (int j=0; j< NumEntries; j++) {
00163   Values_[IndexOffset_[j]+i] = Values[j];
00164   Indices_[IndexOffset_[j]+i] = Indices[j];
00165       }
00166     }
00167   }
00168   catch (...) { // Otherwise just live with RowMatrix interface
00169 
00170   Epetra_SerialDenseVector curValues(NumJaggedDiagonals_);
00171   Epetra_IntSerialDenseVector curIndices(NumJaggedDiagonals_);
00172   Indices = curIndices.Values();
00173   Values = curValues.Values();
00174     for (int i1=0; i1<NumMyRows; i1++) {
00175       Matrix.ExtractMyRowCopy(i1, NumJaggedDiagonals_, NumEntries, Values, Indices); // Get  current row based on the permutation
00176       int i = InvRowPerm_[i1]; // Determine permuted row location
00177       for (int j=0; j< NumEntries; j++) {
00178   Values_[IndexOffset_[j]+i] = Values[j];
00179   Indices_[IndexOffset_[j]+i] = Indices[j];
00180       }
00181     }
00182   }
00183 }
00184 //=============================================================================
00185 int Epetra_JadMatrix::NumMyRowEntries(int MyRow, int & NumEntries) const {
00186   int i = InvRowPerm_[MyRow]; // Determine permuted row location
00187   NumEntries = Profile_[i]; // NNZ in current row
00188   return(0);
00189 }
00190 //=============================================================================
00191 int Epetra_JadMatrix::ExtractMyRowCopy(int MyRow, int Length, int & NumEntries, double *Values, int * Indices) const {
00192 
00193   if(MyRow < 0 || MyRow >= NumMyRows_)
00194     EPETRA_CHK_ERR(-1); // Not in Row range
00195 
00196   int i = InvRowPerm_[MyRow]; // Determine permuted row location
00197   NumEntries = Profile_[i]; // NNZ in current row
00198   if(NumEntries > Length)
00199     EPETRA_CHK_ERR(-2); // Not enough space for copy. Needed size is passed back in NumEntries
00200 
00201   for (int j=0; j< NumEntries; j++) Values[j] = Values_[IndexOffset_[j]+i];
00202   for (int j=0; j< NumEntries; j++) Indices[j] = Indices_[IndexOffset_[j]+i];
00203   return(0);
00204 }
00205 //=============================================================================
00206 int Epetra_JadMatrix::Multiply(bool TransA, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const {
00207   //
00208   // This function forms the product Y = A * Y or Y = A' * X
00209   //
00210 
00211   int NumVectors = X.NumVectors();
00212   if (NumVectors!=Y.NumVectors()) {
00213     EPETRA_CHK_ERR(-1); // Need same number of vectors in each MV
00214   }
00215 
00216   double** Xp = (double**) X.Pointers();
00217   double** Yp = (double**) Y.Pointers();
00218   int LDX = X.ConstantStride() ? X.Stride() : 0;
00219   int LDY = Y.ConstantStride() ? Y.Stride() : 0;
00220   UpdateImportVector(NumVectors); // Make sure Import and Export Vectors are compatible
00221   UpdateExportVector(NumVectors);
00222 
00223   if (!TransA) {
00224 
00225     // If we have a non-trivial importer, we must import elements that are permuted or are on other processors
00226     if (Importer()!=0) {
00227       EPETRA_CHK_ERR(ImportVector_->Import(X, *Importer(), Insert));
00228       Xp = (double**)ImportVector_->Pointers();
00229       LDX = ImportVector_->ConstantStride() ? ImportVector_->Stride() : 0;
00230     }
00231 
00232     // If we have a non-trivial exporter, we must export elements that are permuted or belong to other processors
00233     if (Exporter()!=0) {
00234       Yp = (double**)ExportVector_->Pointers();
00235       LDY = ExportVector_->ConstantStride() ? ExportVector_->Stride() : 0;
00236     }
00237 
00238     // Do actual computation
00239     if (NumVectors==1)
00240       GeneralMV(TransA, *Xp, *Yp);
00241     else
00242       GeneralMM(TransA, Xp, LDX, Yp, LDY, NumVectors);
00243     if (Exporter()!=0) {
00244       Y.PutScalar(0.0);  // Make sure target is zero
00245       Y.Export(*ExportVector_, *Exporter(), Add); // Fill Y with Values from export vector
00246     }
00247     // Handle case of rangemap being a local replicated map
00248     if (!OperatorRangeMap().DistributedGlobal() && Comm().NumProc()>1) EPETRA_CHK_ERR(Y.Reduce());
00249   }
00250   else { // Transpose operation
00251     
00252 
00253     // If we have a non-trivial exporter, we must import elements that are permuted or are on other processors
00254 
00255     if (Exporter()!=0) {
00256       EPETRA_CHK_ERR(ExportVector_->Import(X, *Exporter(), Insert));
00257       Xp = (double**)ExportVector_->Pointers();
00258       LDX = ExportVector_->ConstantStride() ? ExportVector_->Stride() : 0;
00259     }
00260 
00261     // If we have a non-trivial importer, we must export elements that are permuted or belong to other processors
00262     if (Importer()!=0) {
00263       Yp = (double**)ImportVector_->Pointers();
00264       LDY = ImportVector_->ConstantStride() ? ImportVector_->Stride() : 0;
00265     }
00266 
00267     // Do actual computation
00268     if (NumVectors==1)
00269       GeneralMV(TransA, *Xp, *Yp);
00270     else
00271       GeneralMM(TransA, Xp, LDX, Yp, LDY, NumVectors);
00272     if (Importer()!=0) {
00273       Y.PutScalar(0.0);  // Make sure target is zero
00274       EPETRA_CHK_ERR(Y.Export(*ImportVector_, *Importer(), Add)); // Fill Y with Values from export vector
00275     }
00276     // Handle case of rangemap being a local replicated map
00277     if (!OperatorDomainMap().DistributedGlobal() && Comm().NumProc()>1)  EPETRA_CHK_ERR(Y.Reduce());
00278   }
00279 
00280   UpdateFlops(2*NumVectors*NumGlobalNonzeros());
00281   return(0);
00282 }
00283 //=======================================================================================================
00284 void Epetra_JadMatrix::GeneralMM(bool TransA, double ** X, int LDX, double ** Y, int LDY, int NumVectors) const {
00285 
00286   if (LDX==0 || LDY==0 || NumVectors==1) {// Can't unroll RHS if X or Y not strided
00287     for (int k=0; k<NumVectors; k++) GeneralMV(TransA, X[k], Y[k]);
00288   }
00289   else if (NumVectors==2) // Special 2 RHS case (does unrolling in both NumVectors and NumJaggedDiagonals)
00290     GeneralMM2RHS(TransA, X[0], LDX, Y[0], LDY);
00291   // Otherwise unroll RHS only
00292   else
00293     GeneralMM3RHS(TransA, X, LDX, Y, LDY, NumVectors);
00294 
00295   return;
00296 }
00297 //=======================================================================================================
00298 void Epetra_JadMatrix::GeneralMM3RHS(bool TransA, double ** X, int ldx, double ** Y, int ldy, int NumVectors) const {
00299 
00300 #ifdef _CRAY 
00301 #define Pragma(S) _Pragma(S) 
00302 #else 
00303 #define Pragma(S) 
00304 #endif
00305 
00306   // Routine for 3 or more RHS
00307 
00308   const double * Values = Values_.Values();
00309   const int * Indices = Indices_.Values();
00310   const int * IndexOffset = IndexOffset_.Values();
00311   const int * RowPerm = RowPerm_.Values();
00312   for (int j=0; j<NumVectors; j++) {
00313     double * y = Y[j];
00314     if (!TransA)
00315       for (int i=0; i<NumMyRows_; i++) y[i] = 0.0;
00316     else
00317       for (int i=0; i<NumMyCols_; i++) y[i] = 0.0;
00318   }
00319 
00320   int nv = NumVectors%5; if (nv==0) nv=5;
00321     double * x = X[0];
00322     double * y = Y[0];
00323  
00324 
00325   for (int k=0; k<NumVectors; k+=5) {
00326     
00327     for (int j=0; j<NumJaggedDiagonals_; j++) {
00328       const int * curIndices = Indices+IndexOffset[j];
00329       const double * curValues = Values+IndexOffset[j];
00330       int jaggedDiagonalLength = IndexOffset[j+1]-IndexOffset[j];
00331       switch (nv){
00332       case 1:
00333   {
00334     if (!TransA) {
00335 Pragma("_CRI ivdep")
00336       for (int i=0; i<jaggedDiagonalLength; i++) {
00337         int ix = curIndices[i];
00338         int iy = RowPerm[i];
00339         double val = curValues[i];
00340         y[iy] += val*x[ix];
00341       }
00342     }
00343     else {
00344 Pragma("_CRI ivdep")
00345       for (int i=0; i<jaggedDiagonalLength; i++) {
00346         int iy = curIndices[i];
00347         int ix = RowPerm[i];
00348         double val = curValues[i];
00349         y[iy] += val*x[ix];
00350       }
00351     }
00352     break;
00353   }
00354       case 2:
00355   {
00356     if (!TransA) {
00357 Pragma("_CRI ivdep")
00358       for (int i=0; i<jaggedDiagonalLength; i++) {
00359         int ix = curIndices[i];
00360         int iy = RowPerm[i];
00361         double val = curValues[i];
00362         y[iy] += val*x[ix];
00363         iy+=ldy; ix+=ldx;
00364         y[iy] += val*x[ix];
00365       }
00366     }
00367     else {
00368 Pragma("_CRI ivdep")
00369       for (int i=0; i<jaggedDiagonalLength; i++) {
00370         int iy = curIndices[i];
00371         int ix = RowPerm[i];
00372         double val = curValues[i];
00373         y[iy] += val*x[ix];
00374         iy+=ldy; ix+=ldx;
00375         y[iy] += val*x[ix];
00376       }
00377     }
00378     break;
00379   }
00380       case 3:
00381   {
00382     if (!TransA) {
00383 Pragma("_CRI ivdep")
00384       for (int i=0; i<jaggedDiagonalLength; i++) {
00385         int ix = curIndices[i];
00386         int iy = RowPerm[i];
00387         double val = curValues[i];
00388         y[iy] += val*x[ix];
00389         iy+=ldy; ix+=ldx;
00390         y[iy] += val*x[ix];
00391         iy+=ldy; ix+=ldx;
00392         y[iy] += val*x[ix];
00393       }
00394     }
00395     else {
00396 Pragma("_CRI ivdep")
00397       for (int i=0; i<jaggedDiagonalLength; i++) {
00398         int iy = curIndices[i];
00399         int ix = RowPerm[i];
00400         double val = curValues[i];
00401         y[iy] += val*x[ix];
00402         iy+=ldy; ix+=ldx;
00403         y[iy] += val*x[ix];
00404         iy+=ldy; ix+=ldx;
00405         y[iy] += val*x[ix];
00406       }
00407     }
00408     break;
00409   }
00410       case 4:
00411   {
00412     if (!TransA) {
00413 Pragma("_CRI ivdep")
00414       for (int i=0; i<jaggedDiagonalLength; i++) {
00415         int ix = curIndices[i];
00416         int iy = RowPerm[i];
00417         double val = curValues[i];
00418         y[iy] += val*x[ix];
00419         iy+=ldy; ix+=ldx;
00420         y[iy] += val*x[ix];
00421         iy+=ldy; ix+=ldx;
00422         y[iy] += val*x[ix];
00423         iy+=ldy; ix+=ldx;
00424         y[iy] += val*x[ix];
00425       }
00426     }
00427     else {
00428 Pragma("_CRI ivdep")
00429       for (int i=0; i<jaggedDiagonalLength; i++) {
00430         int iy = curIndices[i];
00431         int ix = RowPerm[i];
00432         double val = curValues[i];
00433         y[iy] += val*x[ix];
00434         iy+=ldy; ix+=ldx;
00435         y[iy] += val*x[ix];
00436         iy+=ldy; ix+=ldx;
00437         y[iy] += val*x[ix];
00438         iy+=ldy; ix+=ldx;
00439         y[iy] += val*x[ix];
00440       }
00441     }
00442     break;
00443   }
00444       case 5:
00445   {
00446     if (!TransA) {
00447 Pragma("_CRI ivdep")
00448       for (int i=0; i<jaggedDiagonalLength; i++) {
00449         int ix = curIndices[i];
00450         int iy = RowPerm[i];
00451         double val = curValues[i];
00452         y[iy] += val*x[ix];
00453         iy+=ldy; ix+=ldx;
00454         y[iy] += val*x[ix];
00455         iy+=ldy; ix+=ldx;
00456         y[iy] += val*x[ix];
00457         iy+=ldy; ix+=ldx;
00458         y[iy] += val*x[ix];
00459         iy+=ldy; ix+=ldx;
00460         y[iy] += val*x[ix];
00461       }
00462     }
00463     else {
00464 Pragma("_CRI ivdep")
00465       for (int i=0; i<jaggedDiagonalLength; i++) {
00466         int iy = curIndices[i];
00467         int ix = RowPerm[i];
00468         double val = curValues[i];
00469         y[iy] += val*x[ix];
00470         iy+=ldy; ix+=ldx;
00471         y[iy] += val*x[ix];
00472         iy+=ldy; ix+=ldx;
00473         y[iy] += val*x[ix];
00474         iy+=ldy; ix+=ldx;
00475         y[iy] += val*x[ix];
00476         iy+=ldy; ix+=ldx;
00477         y[iy] += val*x[ix];
00478       }
00479     }
00480     break;
00481   }
00482       }
00483     }
00484     x += nv*ldx;
00485     y += nv*ldy;
00486     nv = 5; // After initial remainder, we will always do 5 RHS
00487   }
00488   return;
00489 }
00490 //=======================================================================================================
00491 void Epetra_JadMatrix::GeneralMM2RHS(bool TransA, double * x, int ldx, double * y, int ldy) const {
00492 
00493   // special 2 rhs case
00494 
00495   const double * Values = Values_.Values();
00496   const int * Indices = Indices_.Values();
00497   const int * IndexOffset = IndexOffset_.Values();
00498   const int * RowPerm = RowPerm_.Values();
00499   if (!TransA) 
00500     for (int i=0; i<NumMyRows_; i++) {
00501       y[i] = 0.0;
00502       y[i+ldy] = 0.0;
00503     }
00504   else
00505     for (int i=0; i<NumMyCols_; i++) {
00506       y[i] = 0.0;
00507       y[i+ldy] = 0.0;
00508     }
00509 
00510   int j = 0;
00511   while (j<NumJaggedDiagonals_) {
00512   int j0 = j;
00513   int jaggedDiagonalLength = IndexOffset[j+1]-IndexOffset[j];
00514     j++;
00515     // check if other diagonals have same length up to a max of 2
00516     while ((j<NumJaggedDiagonals_-1) && (IndexOffset[j+1]-IndexOffset[j]==jaggedDiagonalLength) && (j-j0<2)) j++;
00517     
00518     int numDiags = j-j0;
00519     assert(numDiags<3 && numDiags>0);
00520     assert(j<NumJaggedDiagonals_+1);
00521     
00522     switch (numDiags){
00523     case 1:
00524       {
00525   const int * curIndices = Indices+IndexOffset[j0];
00526   const double * curValues = Values+IndexOffset[j0];
00527   if (!TransA) {
00528 Pragma("_CRI ivdep")
00529     for (int i=0; i<jaggedDiagonalLength; i++) {
00530       int ix = curIndices[i];
00531       int iy = RowPerm[i];
00532       y[iy] += curValues[i]*x[ix];
00533       iy+=ldy; ix+=ldx;
00534       y[iy] += curValues[i]*x[ix];
00535     }
00536   }
00537   else {
00538 Pragma("_CRI ivdep")
00539     for (int i=0; i<jaggedDiagonalLength; i++){
00540       int iy = curIndices[i];
00541       int ix = RowPerm[i];
00542       y[iy] += curValues[i]*x[ix];
00543       iy+=ldy; ix+=ldx;
00544       y[iy] += curValues[i]*x[ix];
00545     }
00546   }
00547   break;
00548       }
00549     case 2:
00550       {
00551   const int * curIndices0 = Indices+IndexOffset[j0];
00552   const double * curValues0 = Values+IndexOffset[j0++];
00553   const int * curIndices1 = Indices+IndexOffset[j0];
00554   const double * curValues1 = Values+IndexOffset[j0];
00555   if (!TransA) {
00556 Pragma("_CRI ivdep")
00557     for (int i=0; i<jaggedDiagonalLength; i++) {
00558       int ix0 = curIndices0[i];
00559       int ix1 = curIndices1[i];
00560       int iy = RowPerm[i];
00561       y[iy] += 
00562         curValues0[i]*x[ix0] +
00563         curValues1[i]*x[ix1];
00564       iy+=ldy; ix0+=ldx; ix1+=ldx;
00565       y[iy] += 
00566         curValues0[i]*x[ix0] +
00567         curValues1[i]*x[ix1];
00568     }
00569   }
00570   else {
00571 Pragma("_CRI ivdep")
00572     for (int i=0; i<jaggedDiagonalLength; i++) {
00573       int iy0 = curIndices0[i];
00574       int iy1 = curIndices1[i];
00575       int ix = RowPerm[i];
00576       double xval = x[ix];
00577       y[iy0] += curValues0[i]*xval;
00578       y[iy1] += curValues1[i]*xval;
00579       ix+=ldx; iy0+=ldy; iy1+=ldy;
00580       xval = x[ix];
00581       y[iy0] += curValues0[i]*xval;
00582       y[iy1] += curValues1[i]*xval;
00583     }
00584   }
00585       }
00586       break;
00587     }
00588   }
00589   return;
00590 }
00591 //=======================================================================================================
00592 void Epetra_JadMatrix::GeneralMV(bool TransA, double * x, double * y)  const {
00593   
00594   const double * Values = Values_.Values();
00595   const int * Indices = Indices_.Values();
00596   const int * IndexOffset = IndexOffset_.Values();
00597   const int * RowPerm = RowPerm_.Values();
00598   if (!TransA)
00599     for (int i=0; i<NumMyRows_; i++) y[i] = 0.0;
00600   else
00601     for (int i=0; i<NumMyCols_; i++) y[i] = 0.0;
00602 
00603   int j = 0;
00604   while (j<NumJaggedDiagonals_) {
00605   int j0 = j;
00606   int jaggedDiagonalLength = IndexOffset[j+1]-IndexOffset[j];
00607     j++;
00608     // check if other diagonals have same length up to a max of 5
00609     while ((j<NumJaggedDiagonals_-1) && (IndexOffset[j+1]-IndexOffset[j]==jaggedDiagonalLength) && (j-j0<5)) j++;
00610     
00611     int numDiags = j-j0;
00612     assert(numDiags<6 && numDiags>0);
00613     assert(j<NumJaggedDiagonals_+1);
00614     
00615     switch (numDiags){
00616     case 1:
00617       {
00618   const int * curIndices = Indices+IndexOffset[j0];
00619   const double * curValues = Values+IndexOffset[j0];
00620   if (!TransA) {
00621 Pragma("_CRI ivdep")
00622     for (int i=0; i<jaggedDiagonalLength; i++)
00623       y[RowPerm[i]] += curValues[i]*x[curIndices[i]];
00624   }
00625   else {
00626 Pragma("_CRI ivdep")
00627     for (int i=0; i<jaggedDiagonalLength; i++)
00628       y[curIndices[i]] += curValues[i]*x[RowPerm[i]];
00629   }
00630   break;
00631       }
00632     case 2:
00633       {
00634   const int * curIndices0 = Indices+IndexOffset[j0];
00635   const double * curValues0 = Values+IndexOffset[j0++];
00636   const int * curIndices1 = Indices+IndexOffset[j0];
00637   const double * curValues1 = Values+IndexOffset[j0];
00638   if (!TransA) {
00639 Pragma("_CRI ivdep")
00640     for (int i=0; i<jaggedDiagonalLength; i++) {
00641       y[RowPerm[i]] += 
00642         curValues0[i]*x[curIndices0[i]] +
00643         curValues1[i]*x[curIndices1[i]];
00644     }
00645   }
00646   else {
00647     //Pragma("_CRI ivdep")  (Collisions possible)
00648     for (int i=0; i<jaggedDiagonalLength; i++) {
00649       double xval = x[RowPerm[i]];
00650       y[curIndices0[i]] += curValues0[i]*xval;
00651       y[curIndices1[i]] += curValues1[i]*xval;
00652     }
00653   }
00654       }
00655       break;
00656     case 3:
00657       {
00658   const int * curIndices0 = Indices+IndexOffset[j0];
00659   const double * curValues0 = Values+IndexOffset[j0++];
00660   const int * curIndices1 = Indices+IndexOffset[j0];
00661   const double * curValues1 = Values+IndexOffset[j0++];
00662   const int * curIndices2 = Indices+IndexOffset[j0];
00663   const double * curValues2 = Values+IndexOffset[j0];
00664   if (!TransA) {
00665 Pragma("_CRI ivdep")
00666     for (int i=0; i<jaggedDiagonalLength; i++) {
00667       y[RowPerm[i]] += 
00668         curValues0[i]*x[curIndices0[i]] +
00669         curValues1[i]*x[curIndices1[i]] +
00670         curValues2[i]*x[curIndices2[i]];
00671     }
00672   }
00673   else {
00674     //Pragma("_CRI ivdep")  (Collisions possible)
00675     for (int i=0; i<jaggedDiagonalLength; i++) {
00676       double xval = x[RowPerm[i]];
00677       y[curIndices0[i]] += curValues0[i]*xval;
00678       y[curIndices1[i]] += curValues1[i]*xval;
00679       y[curIndices2[i]] += curValues2[i]*xval;
00680     }
00681   }
00682       }
00683       break;
00684     case 4:
00685       {
00686   const int * curIndices0 = Indices+IndexOffset[j0];
00687   const double * curValues0 = Values+IndexOffset[j0++];
00688   const int * curIndices1 = Indices+IndexOffset[j0];
00689   const double * curValues1 = Values+IndexOffset[j0++];
00690   const int * curIndices2 = Indices+IndexOffset[j0];
00691   const double * curValues2 = Values+IndexOffset[j0++];
00692   const int * curIndices3 = Indices+IndexOffset[j0];
00693   const double * curValues3 = Values+IndexOffset[j0];
00694   if (!TransA) {
00695 Pragma("_CRI ivdep")
00696     for (int i=0; i<jaggedDiagonalLength; i++) {
00697       y[RowPerm[i]] += 
00698         curValues0[i]*x[curIndices0[i]] +
00699         curValues1[i]*x[curIndices1[i]] +
00700         curValues2[i]*x[curIndices2[i]] +
00701         curValues3[i]*x[curIndices3[i]];
00702     }
00703   }
00704   else {
00705     //Pragma("_CRI ivdep")  (Collisions possible)
00706     for (int i=0; i<jaggedDiagonalLength; i++) {
00707       double xval = x[RowPerm[i]];
00708       y[curIndices0[i]] += curValues0[i]*xval;
00709       y[curIndices1[i]] += curValues1[i]*xval;
00710       y[curIndices2[i]] += curValues2[i]*xval;
00711       y[curIndices3[i]] += curValues3[i]*xval;
00712     }
00713   }
00714       }
00715       break;
00716     case 5:
00717       {
00718   const int * curIndices0 = Indices+IndexOffset[j0];
00719   const double * curValues0 = Values+IndexOffset[j0++];
00720   const int * curIndices1 = Indices+IndexOffset[j0];
00721   const double * curValues1 = Values+IndexOffset[j0++];
00722   const int * curIndices2 = Indices+IndexOffset[j0];
00723   const double * curValues2 = Values+IndexOffset[j0++];
00724   const int * curIndices3 = Indices+IndexOffset[j0];
00725   const double * curValues3 = Values+IndexOffset[j0++];
00726   const int * curIndices4 = Indices+IndexOffset[j0];
00727   const double * curValues4 = Values+IndexOffset[j0];
00728   if (!TransA) {
00729 Pragma("_CRI ivdep")
00730     for (int i=0; i<jaggedDiagonalLength; i++) {
00731       y[RowPerm[i]] += 
00732         curValues0[i]*x[curIndices0[i]] +
00733         curValues1[i]*x[curIndices1[i]] +
00734         curValues2[i]*x[curIndices2[i]] +
00735         curValues3[i]*x[curIndices3[i]] +
00736         curValues4[i]*x[curIndices4[i]];
00737     }
00738   }
00739   else {
00740     // Pragma("_CRI ivdep") (Collisions possible)
00741     for (int i=0; i<jaggedDiagonalLength; i++) {
00742       double xval = x[RowPerm[i]];
00743       y[curIndices0[i]] += curValues0[i]*xval;
00744       y[curIndices1[i]] += curValues1[i]*xval;
00745       y[curIndices2[i]] += curValues2[i]*xval;
00746       y[curIndices3[i]] += curValues3[i]*xval;
00747       y[curIndices4[i]] += curValues4[i]*xval;
00748     }
00749   }
00750       }
00751       break;
00752     }
00753   }
00754   return;
00755 }

Generated on Wed May 12 21:41:05 2010 for Epetra Package Browser (Single Doxygen Collection) by  doxygen 1.4.7