Epetra Package Browser (Single Doxygen Collection) Development
Epetra_JadMatrix.cpp
Go to the documentation of this file.
00001 
00002 //@HEADER
00003 // ************************************************************************
00004 // 
00005 //               Epetra: Linear Algebra Services Package 
00006 //                 Copyright 2011 Sandia Corporation
00007 // 
00008 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00009 // the U.S. Government retains certain rights in this software.
00010 //
00011 // Redistribution and use in source and binary forms, with or without
00012 // modification, are permitted provided that the following conditions are
00013 // met:
00014 //
00015 // 1. Redistributions of source code must retain the above copyright
00016 // notice, this list of conditions and the following disclaimer.
00017 //
00018 // 2. Redistributions in binary form must reproduce the above copyright
00019 // notice, this list of conditions and the following disclaimer in the
00020 // documentation and/or other materials provided with the distribution.
00021 //
00022 // 3. Neither the name of the Corporation nor the names of the
00023 // contributors may be used to endorse or promote products derived from
00024 // this software without specific prior written permission.
00025 //
00026 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00027 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00028 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00029 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00030 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00031 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00032 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00033 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00034 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00035 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00036 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00037 //
00038 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00039 // 
00040 // ************************************************************************
00041 //@HEADER
00042 
00043 #include "Epetra_JadMatrix.h"
00044 #include "Epetra_Map.h"
00045 #include "Epetra_Import.h"
00046 #include "Epetra_Export.h"
00047 #include "Epetra_Vector.h"
00048 #include "Epetra_MultiVector.h"
00049 #include "Epetra_Comm.h"
00050 #include "Epetra_Util.h"
00051 #include "Epetra_IntSerialDenseVector.h"
00052 #include "Epetra_RowMatrix.h"
00053 #include "Epetra_CrsMatrix.h"
00054 
00055 //==============================================================================
00056 Epetra_JadMatrix::Epetra_JadMatrix(const Epetra_RowMatrix & Matrix) 
00057   : Epetra_BasicRowMatrix(Matrix.RowMatrixRowMap().Comm()),
00058     Values_(0),
00059     Indices_(0),
00060     IndexOffset_(0),
00061     Profile_(0),
00062     RowPerm_(0),
00063     InvRowPerm_(0),
00064     NumJaggedDiagonals_(Matrix.MaxNumEntries())
00065 {
00066   SetMaps(Matrix.RowMatrixRowMap(), Matrix.RowMatrixColMap(), Matrix.OperatorDomainMap(), Matrix.OperatorRangeMap());
00067   if (!Matrix.Filled()) throw Matrix.RowMatrixRowMap().ReportError("Input matrix must have called FillComplete()", -1);
00068   Allocate(Matrix);
00069   SetLabel("Epetra::JadMatrix");
00070 }
00071 
00072 //==============================================================================
00073 Epetra_JadMatrix::~Epetra_JadMatrix(){}
00074 
00075 //==============================================================================
00076 int Epetra_JadMatrix::UpdateValues(const Epetra_RowMatrix & Matrix, bool CheckStructure) {
00077 
00078   int NumEntries;
00079   int * Indices = 0;
00080   double * Values =0;
00081 
00082   int ierr = 0;
00083 
00084   try { // If matrix is an Epetra_CrsMatrix, we can get date much more cheaply
00085 
00086     const Epetra_CrsMatrix & A = dynamic_cast<const Epetra_CrsMatrix &>(Matrix);
00087 
00088     for (int i1=0; i1<NumMyRows_; i1++) {
00089       
00090       EPETRA_CHK_ERR(A.ExtractMyRowView(i1, NumEntries, Values, Indices)); // Get the current row based on the permutation
00091       int i = InvRowPerm_[i1]; // Determine permuted row location
00092       for (int j=0; j< NumEntries; j++) Values_[IndexOffset_[j]+i] = Values[j];
00093       if (CheckStructure)
00094   for (int j=0; j< NumEntries; j++) if (Indices_[IndexOffset_[j]+i] != Indices[j]) ierr = - 1;
00095     }
00096   }
00097   catch (...) { // Otherwise just live with RowMatrix interface
00098     
00099     Epetra_SerialDenseVector curValues(NumJaggedDiagonals_);
00100     Epetra_IntSerialDenseVector curIndices(NumJaggedDiagonals_);
00101     Indices = curIndices.Values();
00102     Values = curValues.Values();
00103     for (int i1=0; i1<NumMyRows_; i1++) {
00104       EPETRA_CHK_ERR(Matrix.ExtractMyRowCopy(i1, NumJaggedDiagonals_, NumEntries, Values, Indices)); // Get current row based on the permutation
00105       int i = InvRowPerm_[i1]; // Determine permuted row location
00106       for (int j=0; j< NumEntries; j++) Values_[IndexOffset_[j]+i] = Values[j];
00107       if (CheckStructure)
00108   for (int j=0; j< NumEntries; j++) if (Indices_[IndexOffset_[j]+i] != Indices[j]) ierr = - 1;
00109     }
00110   }
00111 
00112   HaveNumericConstants_ = false;
00113   EPETRA_CHK_ERR(ierr);
00114   return(ierr);
00115 }
00116 
00117 //==============================================================================
00118 void Epetra_JadMatrix::Allocate(const Epetra_RowMatrix & Matrix) {
00119 
00120   // Allocate IndexOffset storage
00121   int numMyRows = Matrix.NumMyRows();
00122   int numMyNonzeros = Matrix.NumMyNonzeros();
00123 
00124   IndexOffset_.Resize(NumJaggedDiagonals_+1);
00125 
00126   // Next compute permutation of rows
00127   RowPerm_.Resize(numMyRows);
00128   InvRowPerm_.Resize(numMyRows);
00129   Profile_.Resize(numMyRows);
00130   for (int i=0; i<numMyRows; i++) {
00131     int NumEntries;
00132     Matrix.NumMyRowEntries(i, NumEntries);
00133     Profile_[i] = NumEntries;
00134     RowPerm_[i] = i;
00135   }
00136 
00137   Epetra_Util sorter;
00138   int * RowPerm = RowPerm_.Values();
00139   sorter.Sort(false, numMyRows, Profile_.Values(), 0, 0, 1, &RowPerm, 0, 0);
00140   //cout << "Profile = " << Profile_ << std::endl;
00141   //cout << "RowPerm = " << RowPerm_ << std::endl;
00142   for (int i=0; i<numMyRows; i++) InvRowPerm_[RowPerm[i]] = i; // Compute inverse row permutation
00143   //cout << "InvRowPerm = " << InvRowPerm_ << std::endl;
00144 
00145   // Now build IndexOffsets:  These contain the lengths of the jagged diagonals
00146 
00147   for (int i=0; i<NumJaggedDiagonals_; i++) IndexOffset_[i] = 0;
00148 
00149   int curOffset = numMyRows;
00150   int * curIndex = IndexOffset_.Values(); // point to first index (will be incremented immediately below)
00151   for (int i=1; i<NumJaggedDiagonals_+1; i++) {
00152     curIndex++;
00153     while (*curIndex==0) {
00154       if (Profile_[curOffset-1]<i) curOffset--;
00155       else *curIndex = *(curIndex-1) + curOffset; // Set the length of the current jagged diagonal (plus scan sum)
00156     }
00157   }
00158 
00159   Values_.Resize(numMyNonzeros);
00160   Indices_.Resize(numMyNonzeros);
00161 
00162   int NumEntries;
00163   int * Indices = 0;
00164   double * Values =0;
00165 
00166   try { // If matrix is an Epetra_CrsMatrix, we can get data much more cheaply
00167 
00168     const Epetra_CrsMatrix & A = dynamic_cast<const Epetra_CrsMatrix &>(Matrix);
00169 
00170     for (int i1=0; i1<numMyRows; i1++) {
00171 
00172       A.ExtractMyRowView(i1, NumEntries, Values, Indices); // Get the current row
00173       int i = InvRowPerm_[i1]; // Determine permuted row location
00174       //cout << "i1, i, NumEntries = " << i1 <<" "<< i <<" "<< NumEntries << std::endl;
00175       for (int j=0; j< NumEntries; j++) {
00176   Values_[IndexOffset_[j]+i] = Values[j];
00177   Indices_[IndexOffset_[j]+i] = Indices[j];
00178       }
00179     }
00180   }
00181   catch (...) { // Otherwise just live with RowMatrix interface
00182 
00183   Epetra_SerialDenseVector curValues(NumJaggedDiagonals_);
00184   Epetra_IntSerialDenseVector curIndices(NumJaggedDiagonals_);
00185   Indices = curIndices.Values();
00186   Values = curValues.Values();
00187     for (int i1=0; i1<numMyRows; i1++) {
00188       Matrix.ExtractMyRowCopy(i1, NumJaggedDiagonals_, NumEntries, Values, Indices); // Get  current row based on the permutation
00189       int i = InvRowPerm_[i1]; // Determine permuted row location
00190       for (int j=0; j< NumEntries; j++) {
00191   Values_[IndexOffset_[j]+i] = Values[j];
00192   Indices_[IndexOffset_[j]+i] = Indices[j];
00193       }
00194     }
00195   }
00196 }
00197 //=============================================================================
00198 int Epetra_JadMatrix::NumMyRowEntries(int MyRow, int & NumEntries) const {
00199   int i = InvRowPerm_[MyRow]; // Determine permuted row location
00200   NumEntries = Profile_[i]; // NNZ in current row
00201   return(0);
00202 }
00203 //=============================================================================
00204 int Epetra_JadMatrix::ExtractMyRowCopy(int MyRow, int Length, int & NumEntries, double *Values, int * Indices) const {
00205 
00206   if(MyRow < 0 || MyRow >= NumMyRows_)
00207     EPETRA_CHK_ERR(-1); // Not in Row range
00208 
00209   int i = InvRowPerm_[MyRow]; // Determine permuted row location
00210   NumEntries = Profile_[i]; // NNZ in current row
00211   if(NumEntries > Length)
00212     EPETRA_CHK_ERR(-2); // Not enough space for copy. Needed size is passed back in NumEntries
00213 
00214   for (int j=0; j< NumEntries; j++) Values[j] = Values_[IndexOffset_[j]+i];
00215   for (int j=0; j< NumEntries; j++) Indices[j] = Indices_[IndexOffset_[j]+i];
00216   return(0);
00217 }
00218 //=============================================================================
00219 int Epetra_JadMatrix::Multiply(bool TransA, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const {
00220   //
00221   // This function forms the product Y = A * Y or Y = A' * X
00222   //
00223 
00224   int NumVectors = X.NumVectors();
00225   if (NumVectors!=Y.NumVectors()) {
00226     EPETRA_CHK_ERR(-1); // Need same number of vectors in each MV
00227   }
00228 
00229   double** Xp = (double**) X.Pointers();
00230   double** Yp = (double**) Y.Pointers();
00231   int LDX = X.ConstantStride() ? X.Stride() : 0;
00232   int LDY = Y.ConstantStride() ? Y.Stride() : 0;
00233   UpdateImportVector(NumVectors); // Make sure Import and Export Vectors are compatible
00234   UpdateExportVector(NumVectors);
00235 
00236   if (!TransA) {
00237 
00238     // If we have a non-trivial importer, we must import elements that are permuted or are on other processors
00239     if (Importer()!=0) {
00240       EPETRA_CHK_ERR(ImportVector_->Import(X, *Importer(), Insert));
00241       Xp = (double**)ImportVector_->Pointers();
00242       LDX = ImportVector_->ConstantStride() ? ImportVector_->Stride() : 0;
00243     }
00244 
00245     // If we have a non-trivial exporter, we must export elements that are permuted or belong to other processors
00246     if (Exporter()!=0) {
00247       Yp = (double**)ExportVector_->Pointers();
00248       LDY = ExportVector_->ConstantStride() ? ExportVector_->Stride() : 0;
00249     }
00250 
00251     // Do actual computation
00252     if (NumVectors==1)
00253       GeneralMV(TransA, *Xp, *Yp);
00254     else
00255       GeneralMM(TransA, Xp, LDX, Yp, LDY, NumVectors);
00256     if (Exporter()!=0) {
00257       Y.PutScalar(0.0);  // Make sure target is zero
00258       Y.Export(*ExportVector_, *Exporter(), Add); // Fill Y with Values from export vector
00259     }
00260     // Handle case of rangemap being a local replicated map
00261     if (!OperatorRangeMap().DistributedGlobal() && Comm().NumProc()>1) EPETRA_CHK_ERR(Y.Reduce());
00262   }
00263   else { // Transpose operation
00264     
00265 
00266     // If we have a non-trivial exporter, we must import elements that are permuted or are on other processors
00267 
00268     if (Exporter()!=0) {
00269       EPETRA_CHK_ERR(ExportVector_->Import(X, *Exporter(), Insert));
00270       Xp = (double**)ExportVector_->Pointers();
00271       LDX = ExportVector_->ConstantStride() ? ExportVector_->Stride() : 0;
00272     }
00273 
00274     // If we have a non-trivial importer, we must export elements that are permuted or belong to other processors
00275     if (Importer()!=0) {
00276       Yp = (double**)ImportVector_->Pointers();
00277       LDY = ImportVector_->ConstantStride() ? ImportVector_->Stride() : 0;
00278     }
00279 
00280     // Do actual computation
00281     if (NumVectors==1)
00282       GeneralMV(TransA, *Xp, *Yp);
00283     else
00284       GeneralMM(TransA, Xp, LDX, Yp, LDY, NumVectors);
00285     if (Importer()!=0) {
00286       Y.PutScalar(0.0);  // Make sure target is zero
00287       EPETRA_CHK_ERR(Y.Export(*ImportVector_, *Importer(), Add)); // Fill Y with Values from export vector
00288     }
00289     // Handle case of rangemap being a local replicated map
00290     if (!OperatorDomainMap().DistributedGlobal() && Comm().NumProc()>1)  EPETRA_CHK_ERR(Y.Reduce());
00291   }
00292 
00293   UpdateFlops(2*NumVectors*NumGlobalNonzeros64());
00294   return(0);
00295 }
00296 //=======================================================================================================
00297 void Epetra_JadMatrix::GeneralMM(bool TransA, double ** X, int LDX, double ** Y, int LDY, int NumVectors) const {
00298 
00299   if (LDX==0 || LDY==0 || NumVectors==1) {// Can't unroll RHS if X or Y not strided
00300     for (int k=0; k<NumVectors; k++) GeneralMV(TransA, X[k], Y[k]);
00301   }
00302   else if (NumVectors==2) // Special 2 RHS case (does unrolling in both NumVectors and NumJaggedDiagonals)
00303     GeneralMM2RHS(TransA, X[0], LDX, Y[0], LDY);
00304   // Otherwise unroll RHS only
00305   else
00306     GeneralMM3RHS(TransA, X, LDX, Y, LDY, NumVectors);
00307 
00308   return;
00309 }
00310 //=======================================================================================================
00311 void Epetra_JadMatrix::GeneralMM3RHS(bool TransA, double ** X, int ldx, double ** Y, int ldy, int NumVectors) const {
00312 
00313 #ifdef _CRAY 
00314 #define Pragma(S) _Pragma(S) 
00315 #else 
00316 #define Pragma(S) 
00317 #endif
00318 
00319   // Routine for 3 or more RHS
00320 
00321   const double * Values = Values_.Values();
00322   const int * Indices = Indices_.Values();
00323   const int * IndexOffset = IndexOffset_.Values();
00324   const int * RowPerm = RowPerm_.Values();
00325   for (int j=0; j<NumVectors; j++) {
00326     double * y = Y[j];
00327     if (!TransA)
00328       for (int i=0; i<NumMyRows_; i++) y[i] = 0.0;
00329     else
00330       for (int i=0; i<NumMyCols_; i++) y[i] = 0.0;
00331   }
00332 
00333   int nv = NumVectors%5; if (nv==0) nv=5;
00334     double * x = X[0];
00335     double * y = Y[0];
00336  
00337 
00338   for (int k=0; k<NumVectors; k+=5) {
00339     
00340     for (int j=0; j<NumJaggedDiagonals_; j++) {
00341       const int * curIndices = Indices+IndexOffset[j];
00342       const double * curValues = Values+IndexOffset[j];
00343       int jaggedDiagonalLength = IndexOffset[j+1]-IndexOffset[j];
00344       switch (nv){
00345       case 1:
00346   {
00347     if (!TransA) {
00348 Pragma("_CRI ivdep")
00349       for (int i=0; i<jaggedDiagonalLength; i++) {
00350         int ix = curIndices[i];
00351         int iy = RowPerm[i];
00352         double val = curValues[i];
00353         y[iy] += val*x[ix];
00354       }
00355     }
00356     else {
00357 Pragma("_CRI ivdep")
00358       for (int i=0; i<jaggedDiagonalLength; i++) {
00359         int iy = curIndices[i];
00360         int ix = RowPerm[i];
00361         double val = curValues[i];
00362         y[iy] += val*x[ix];
00363       }
00364     }
00365     break;
00366   }
00367       case 2:
00368   {
00369     if (!TransA) {
00370 Pragma("_CRI ivdep")
00371       for (int i=0; i<jaggedDiagonalLength; i++) {
00372         int ix = curIndices[i];
00373         int iy = RowPerm[i];
00374         double val = curValues[i];
00375         y[iy] += val*x[ix];
00376         iy+=ldy; ix+=ldx;
00377         y[iy] += val*x[ix];
00378       }
00379     }
00380     else {
00381 Pragma("_CRI ivdep")
00382       for (int i=0; i<jaggedDiagonalLength; i++) {
00383         int iy = curIndices[i];
00384         int ix = RowPerm[i];
00385         double val = curValues[i];
00386         y[iy] += val*x[ix];
00387         iy+=ldy; ix+=ldx;
00388         y[iy] += val*x[ix];
00389       }
00390     }
00391     break;
00392   }
00393       case 3:
00394   {
00395     if (!TransA) {
00396 Pragma("_CRI ivdep")
00397       for (int i=0; i<jaggedDiagonalLength; i++) {
00398         int ix = curIndices[i];
00399         int iy = RowPerm[i];
00400         double val = curValues[i];
00401         y[iy] += val*x[ix];
00402         iy+=ldy; ix+=ldx;
00403         y[iy] += val*x[ix];
00404         iy+=ldy; ix+=ldx;
00405         y[iy] += val*x[ix];
00406       }
00407     }
00408     else {
00409 Pragma("_CRI ivdep")
00410       for (int i=0; i<jaggedDiagonalLength; i++) {
00411         int iy = curIndices[i];
00412         int ix = RowPerm[i];
00413         double val = curValues[i];
00414         y[iy] += val*x[ix];
00415         iy+=ldy; ix+=ldx;
00416         y[iy] += val*x[ix];
00417         iy+=ldy; ix+=ldx;
00418         y[iy] += val*x[ix];
00419       }
00420     }
00421     break;
00422   }
00423       case 4:
00424   {
00425     if (!TransA) {
00426 Pragma("_CRI ivdep")
00427       for (int i=0; i<jaggedDiagonalLength; i++) {
00428         int ix = curIndices[i];
00429         int iy = RowPerm[i];
00430         double val = curValues[i];
00431         y[iy] += val*x[ix];
00432         iy+=ldy; ix+=ldx;
00433         y[iy] += val*x[ix];
00434         iy+=ldy; ix+=ldx;
00435         y[iy] += val*x[ix];
00436         iy+=ldy; ix+=ldx;
00437         y[iy] += val*x[ix];
00438       }
00439     }
00440     else {
00441 Pragma("_CRI ivdep")
00442       for (int i=0; i<jaggedDiagonalLength; i++) {
00443         int iy = curIndices[i];
00444         int ix = RowPerm[i];
00445         double val = curValues[i];
00446         y[iy] += val*x[ix];
00447         iy+=ldy; ix+=ldx;
00448         y[iy] += val*x[ix];
00449         iy+=ldy; ix+=ldx;
00450         y[iy] += val*x[ix];
00451         iy+=ldy; ix+=ldx;
00452         y[iy] += val*x[ix];
00453       }
00454     }
00455     break;
00456   }
00457       case 5:
00458   {
00459     if (!TransA) {
00460 Pragma("_CRI ivdep")
00461       for (int i=0; i<jaggedDiagonalLength; i++) {
00462         int ix = curIndices[i];
00463         int iy = RowPerm[i];
00464         double val = curValues[i];
00465         y[iy] += val*x[ix];
00466         iy+=ldy; ix+=ldx;
00467         y[iy] += val*x[ix];
00468         iy+=ldy; ix+=ldx;
00469         y[iy] += val*x[ix];
00470         iy+=ldy; ix+=ldx;
00471         y[iy] += val*x[ix];
00472         iy+=ldy; ix+=ldx;
00473         y[iy] += val*x[ix];
00474       }
00475     }
00476     else {
00477 Pragma("_CRI ivdep")
00478       for (int i=0; i<jaggedDiagonalLength; i++) {
00479         int iy = curIndices[i];
00480         int ix = RowPerm[i];
00481         double val = curValues[i];
00482         y[iy] += val*x[ix];
00483         iy+=ldy; ix+=ldx;
00484         y[iy] += val*x[ix];
00485         iy+=ldy; ix+=ldx;
00486         y[iy] += val*x[ix];
00487         iy+=ldy; ix+=ldx;
00488         y[iy] += val*x[ix];
00489         iy+=ldy; ix+=ldx;
00490         y[iy] += val*x[ix];
00491       }
00492     }
00493     break;
00494   }
00495       }
00496     }
00497     x += nv*ldx;
00498     y += nv*ldy;
00499     nv = 5; // After initial remainder, we will always do 5 RHS
00500   }
00501   return;
00502 }
00503 //=======================================================================================================
00504 void Epetra_JadMatrix::GeneralMM2RHS(bool TransA, double * x, int ldx, double * y, int ldy) const {
00505 
00506   // special 2 rhs case
00507 
00508   const double * Values = Values_.Values();
00509   const int * Indices = Indices_.Values();
00510   const int * IndexOffset = IndexOffset_.Values();
00511   const int * RowPerm = RowPerm_.Values();
00512   if (!TransA) 
00513     for (int i=0; i<NumMyRows_; i++) {
00514       y[i] = 0.0;
00515       y[i+ldy] = 0.0;
00516     }
00517   else
00518     for (int i=0; i<NumMyCols_; i++) {
00519       y[i] = 0.0;
00520       y[i+ldy] = 0.0;
00521     }
00522 
00523   int j = 0;
00524   while (j<NumJaggedDiagonals_) {
00525   int j0 = j;
00526   int jaggedDiagonalLength = IndexOffset[j+1]-IndexOffset[j];
00527     j++;
00528     // check if other diagonals have same length up to a max of 2
00529     while ((j<NumJaggedDiagonals_-1) && (IndexOffset[j+1]-IndexOffset[j]==jaggedDiagonalLength) && (j-j0<2)) j++;
00530     
00531     int numDiags = j-j0;
00532     assert(numDiags<3 && numDiags>0);
00533     assert(j<NumJaggedDiagonals_+1);
00534     
00535     switch (numDiags){
00536     case 1:
00537       {
00538   const int * curIndices = Indices+IndexOffset[j0];
00539   const double * curValues = Values+IndexOffset[j0];
00540   if (!TransA) {
00541 Pragma("_CRI ivdep")
00542     for (int i=0; i<jaggedDiagonalLength; i++) {
00543       int ix = curIndices[i];
00544       int iy = RowPerm[i];
00545       y[iy] += curValues[i]*x[ix];
00546       iy+=ldy; ix+=ldx;
00547       y[iy] += curValues[i]*x[ix];
00548     }
00549   }
00550   else {
00551 Pragma("_CRI ivdep")
00552     for (int i=0; i<jaggedDiagonalLength; i++){
00553       int iy = curIndices[i];
00554       int ix = RowPerm[i];
00555       y[iy] += curValues[i]*x[ix];
00556       iy+=ldy; ix+=ldx;
00557       y[iy] += curValues[i]*x[ix];
00558     }
00559   }
00560   break;
00561       }
00562     case 2:
00563       {
00564   const int * curIndices0 = Indices+IndexOffset[j0];
00565   const double * curValues0 = Values+IndexOffset[j0++];
00566   const int * curIndices1 = Indices+IndexOffset[j0];
00567   const double * curValues1 = Values+IndexOffset[j0];
00568   if (!TransA) {
00569 Pragma("_CRI ivdep")
00570     for (int i=0; i<jaggedDiagonalLength; i++) {
00571       int ix0 = curIndices0[i];
00572       int ix1 = curIndices1[i];
00573       int iy = RowPerm[i];
00574       y[iy] += 
00575         curValues0[i]*x[ix0] +
00576         curValues1[i]*x[ix1];
00577       iy+=ldy; ix0+=ldx; ix1+=ldx;
00578       y[iy] += 
00579         curValues0[i]*x[ix0] +
00580         curValues1[i]*x[ix1];
00581     }
00582   }
00583   else {
00584 Pragma("_CRI ivdep")
00585     for (int i=0; i<jaggedDiagonalLength; i++) {
00586       int iy0 = curIndices0[i];
00587       int iy1 = curIndices1[i];
00588       int ix = RowPerm[i];
00589       double xval = x[ix];
00590       y[iy0] += curValues0[i]*xval;
00591       y[iy1] += curValues1[i]*xval;
00592       ix+=ldx; iy0+=ldy; iy1+=ldy;
00593       xval = x[ix];
00594       y[iy0] += curValues0[i]*xval;
00595       y[iy1] += curValues1[i]*xval;
00596     }
00597   }
00598       }
00599       break;
00600     }
00601   }
00602   return;
00603 }
00604 //=======================================================================================================
00605 void Epetra_JadMatrix::GeneralMV(bool TransA, double * x, double * y)  const {
00606   
00607   const double * Values = Values_.Values();
00608   const int * Indices = Indices_.Values();
00609   const int * IndexOffset = IndexOffset_.Values();
00610   const int * RowPerm = RowPerm_.Values();
00611   if (!TransA)
00612     for (int i=0; i<NumMyRows_; i++) y[i] = 0.0;
00613   else
00614     for (int i=0; i<NumMyCols_; i++) y[i] = 0.0;
00615 
00616   int j = 0;
00617   while (j<NumJaggedDiagonals_) {
00618   int j0 = j;
00619   int jaggedDiagonalLength = IndexOffset[j+1]-IndexOffset[j];
00620     j++;
00621     // check if other diagonals have same length up to a max of 5
00622     while ((j<NumJaggedDiagonals_-1) && (IndexOffset[j+1]-IndexOffset[j]==jaggedDiagonalLength) && (j-j0<5)) j++;
00623     
00624     int numDiags = j-j0;
00625     assert(numDiags<6 && numDiags>0);
00626     assert(j<NumJaggedDiagonals_+1);
00627     
00628     switch (numDiags){
00629     case 1:
00630       {
00631   const int * curIndices = Indices+IndexOffset[j0];
00632   const double * curValues = Values+IndexOffset[j0];
00633   if (!TransA) {
00634 Pragma("_CRI ivdep")
00635     for (int i=0; i<jaggedDiagonalLength; i++)
00636       y[RowPerm[i]] += curValues[i]*x[curIndices[i]];
00637   }
00638   else {
00639 Pragma("_CRI ivdep")
00640     for (int i=0; i<jaggedDiagonalLength; i++)
00641       y[curIndices[i]] += curValues[i]*x[RowPerm[i]];
00642   }
00643   break;
00644       }
00645     case 2:
00646       {
00647   const int * curIndices0 = Indices+IndexOffset[j0];
00648   const double * curValues0 = Values+IndexOffset[j0++];
00649   const int * curIndices1 = Indices+IndexOffset[j0];
00650   const double * curValues1 = Values+IndexOffset[j0];
00651   if (!TransA) {
00652 Pragma("_CRI ivdep")
00653     for (int i=0; i<jaggedDiagonalLength; i++) {
00654       y[RowPerm[i]] += 
00655         curValues0[i]*x[curIndices0[i]] +
00656         curValues1[i]*x[curIndices1[i]];
00657     }
00658   }
00659   else {
00660     //Pragma("_CRI ivdep")  (Collisions possible)
00661     for (int i=0; i<jaggedDiagonalLength; i++) {
00662       double xval = x[RowPerm[i]];
00663       y[curIndices0[i]] += curValues0[i]*xval;
00664       y[curIndices1[i]] += curValues1[i]*xval;
00665     }
00666   }
00667       }
00668       break;
00669     case 3:
00670       {
00671   const int * curIndices0 = Indices+IndexOffset[j0];
00672   const double * curValues0 = Values+IndexOffset[j0++];
00673   const int * curIndices1 = Indices+IndexOffset[j0];
00674   const double * curValues1 = Values+IndexOffset[j0++];
00675   const int * curIndices2 = Indices+IndexOffset[j0];
00676   const double * curValues2 = Values+IndexOffset[j0];
00677   if (!TransA) {
00678 Pragma("_CRI ivdep")
00679     for (int i=0; i<jaggedDiagonalLength; i++) {
00680       y[RowPerm[i]] += 
00681         curValues0[i]*x[curIndices0[i]] +
00682         curValues1[i]*x[curIndices1[i]] +
00683         curValues2[i]*x[curIndices2[i]];
00684     }
00685   }
00686   else {
00687     //Pragma("_CRI ivdep")  (Collisions possible)
00688     for (int i=0; i<jaggedDiagonalLength; i++) {
00689       double xval = x[RowPerm[i]];
00690       y[curIndices0[i]] += curValues0[i]*xval;
00691       y[curIndices1[i]] += curValues1[i]*xval;
00692       y[curIndices2[i]] += curValues2[i]*xval;
00693     }
00694   }
00695       }
00696       break;
00697     case 4:
00698       {
00699   const int * curIndices0 = Indices+IndexOffset[j0];
00700   const double * curValues0 = Values+IndexOffset[j0++];
00701   const int * curIndices1 = Indices+IndexOffset[j0];
00702   const double * curValues1 = Values+IndexOffset[j0++];
00703   const int * curIndices2 = Indices+IndexOffset[j0];
00704   const double * curValues2 = Values+IndexOffset[j0++];
00705   const int * curIndices3 = Indices+IndexOffset[j0];
00706   const double * curValues3 = Values+IndexOffset[j0];
00707   if (!TransA) {
00708 Pragma("_CRI ivdep")
00709     for (int i=0; i<jaggedDiagonalLength; i++) {
00710       y[RowPerm[i]] += 
00711         curValues0[i]*x[curIndices0[i]] +
00712         curValues1[i]*x[curIndices1[i]] +
00713         curValues2[i]*x[curIndices2[i]] +
00714         curValues3[i]*x[curIndices3[i]];
00715     }
00716   }
00717   else {
00718     //Pragma("_CRI ivdep")  (Collisions possible)
00719     for (int i=0; i<jaggedDiagonalLength; i++) {
00720       double xval = x[RowPerm[i]];
00721       y[curIndices0[i]] += curValues0[i]*xval;
00722       y[curIndices1[i]] += curValues1[i]*xval;
00723       y[curIndices2[i]] += curValues2[i]*xval;
00724       y[curIndices3[i]] += curValues3[i]*xval;
00725     }
00726   }
00727       }
00728       break;
00729     case 5:
00730       {
00731   const int * curIndices0 = Indices+IndexOffset[j0];
00732   const double * curValues0 = Values+IndexOffset[j0++];
00733   const int * curIndices1 = Indices+IndexOffset[j0];
00734   const double * curValues1 = Values+IndexOffset[j0++];
00735   const int * curIndices2 = Indices+IndexOffset[j0];
00736   const double * curValues2 = Values+IndexOffset[j0++];
00737   const int * curIndices3 = Indices+IndexOffset[j0];
00738   const double * curValues3 = Values+IndexOffset[j0++];
00739   const int * curIndices4 = Indices+IndexOffset[j0];
00740   const double * curValues4 = Values+IndexOffset[j0];
00741   if (!TransA) {
00742 Pragma("_CRI ivdep")
00743     for (int i=0; i<jaggedDiagonalLength; i++) {
00744       y[RowPerm[i]] += 
00745         curValues0[i]*x[curIndices0[i]] +
00746         curValues1[i]*x[curIndices1[i]] +
00747         curValues2[i]*x[curIndices2[i]] +
00748         curValues3[i]*x[curIndices3[i]] +
00749         curValues4[i]*x[curIndices4[i]];
00750     }
00751   }
00752   else {
00753     // Pragma("_CRI ivdep") (Collisions possible)
00754     for (int i=0; i<jaggedDiagonalLength; i++) {
00755       double xval = x[RowPerm[i]];
00756       y[curIndices0[i]] += curValues0[i]*xval;
00757       y[curIndices1[i]] += curValues1[i]*xval;
00758       y[curIndices2[i]] += curValues2[i]*xval;
00759       y[curIndices3[i]] += curValues3[i]*xval;
00760       y[curIndices4[i]] += curValues4[i]*xval;
00761     }
00762   }
00763       }
00764       break;
00765     }
00766   }
00767   return;
00768 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines