Epetra Package Browser (Single Doxygen Collection) Development
Epetra_MultiVector.cpp
Go to the documentation of this file.
00001 
00002 //@HEADER
00003 // ************************************************************************
00004 // 
00005 //               Epetra: Linear Algebra Services Package 
00006 //                 Copyright 2001 Sandia Corporation
00007 // 
00008 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00009 // the U.S. Government retains certain rights in this software.
00010 //
00011 // Redistribution and use in source and binary forms, with or without
00012 // modification, are permitted provided that the following conditions are
00013 // met:
00014 //
00015 // 1. Redistributions of source code must retain the above copyright
00016 // notice, this list of conditions and the following disclaimer.
00017 //
00018 // 2. Redistributions in binary form must reproduce the above copyright
00019 // notice, this list of conditions and the following disclaimer in the
00020 // documentation and/or other materials provided with the distribution.
00021 //
00022 // 3. Neither the name of the Corporation nor the names of the
00023 // contributors may be used to endorse or promote products derived from
00024 // this software without specific prior written permission.
00025 //
00026 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00027 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00028 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00029 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00030 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00031 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00032 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00033 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00034 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00035 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00036 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00037 //
00038 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00039 // 
00040 // ************************************************************************
00041 //@HEADER
00042 
00043 #include "Epetra_MultiVector.h"
00044 #include "Epetra_Vector.h"
00045 #include "Epetra_Comm.h"
00046 #ifdef EPETRA_MPI
00047 #include "Epetra_MpiComm.h"
00048 #endif
00049 #include "Epetra_BlockMap.h"
00050 #include "Epetra_Map.h"
00051 #include "Epetra_Import.h"
00052 #include "Epetra_Export.h"
00053 #include "Epetra_Distributor.h"
00054 
00055 //=============================================================================
00056 
00057 // Epetra_BlockMap Constructor
00058 
00059 Epetra_MultiVector::Epetra_MultiVector(const Epetra_BlockMap& Map, int NumVectors, bool zeroOut)
00060   : Epetra_DistObject(Map, "Epetra::MultiVector"),
00061     Epetra_CompObject(),
00062     Values_(0),
00063     Pointers_(0),
00064     MyLength_(Map.NumMyPoints()),
00065     GlobalLength_(Map.NumGlobalPoints()),
00066     NumVectors_(NumVectors),
00067     UserAllocated_(false),
00068     ConstantStride_(true),
00069     Stride_(Map.NumMyPoints()),
00070     Allocated_(false)
00071 {
00072   Util_.SetSeed(1);
00073 
00074     AllocateForCopy();
00075     
00076     for (int i = 0; i< NumVectors_; i++) Pointers_[i] = Values_+i*Stride_;
00077 
00078   if(zeroOut) PutScalar(0.0); // Fill all vectors with zero.
00079 }
00080 //==========================================================================
00081 
00082 // Copy Constructor
00083 
00084 Epetra_MultiVector::Epetra_MultiVector(const Epetra_MultiVector& Source)
00085   : Epetra_DistObject(Source),
00086     Epetra_CompObject(Source),
00087     Values_(0),
00088     Pointers_(0),
00089     MyLength_(Source.MyLength_),
00090     GlobalLength_(Source.GlobalLength_),
00091     NumVectors_(Source.NumVectors_),
00092     UserAllocated_(false),
00093     ConstantStride_(true),
00094     Stride_(0),
00095     Allocated_(false),
00096     Util_(Source.Util_)
00097 {
00098   AllocateForCopy();
00099   
00100   double ** Source_Pointers = Source.Pointers();
00101   for (int i = 0; i< NumVectors_; i++) Pointers_[i] = Source_Pointers[i];
00102   
00103   DoCopy();
00104   
00105 }
00106 //==========================================================================
00107 
00108 // This constructor copies in or makes view of a standard Fortran array
00109 
00110 Epetra_MultiVector::Epetra_MultiVector(Epetra_DataAccess CV, const Epetra_BlockMap& Map, 
00111                double *A, int MyLDA, int NumVectors)
00112   : Epetra_DistObject(Map, "Epetra::MultiVector"),
00113     Epetra_CompObject(),
00114     Values_(0),
00115     Pointers_(0),
00116     MyLength_(Map.NumMyPoints()),
00117     GlobalLength_(Map.NumGlobalPoints()),
00118     NumVectors_(NumVectors),
00119     UserAllocated_(false),
00120     ConstantStride_(true),
00121     Stride_(Map.NumMyPoints()),
00122     Allocated_(false)
00123 {
00124   Util_.SetSeed(1);  
00125 
00126   if (CV==Copy) AllocateForCopy();
00127   else AllocateForView();
00128 
00129   for (int i = 0; i< NumVectors_; i++) Pointers_[i] = A + i*MyLDA;
00130   
00131    if (CV==Copy) DoCopy();
00132    else DoView();
00133   
00134 }
00135 
00136 //==========================================================================
00137 
00138 // This constructor copies in or makes view of a C/C++ array of pointer
00139 
00140 Epetra_MultiVector::Epetra_MultiVector(Epetra_DataAccess CV, const Epetra_BlockMap& Map, 
00141                 double **ArrayOfPointers, int NumVectors)
00142   : Epetra_DistObject(Map, "Epetra::MultiVector"),
00143     Epetra_CompObject(),
00144     Values_(0),
00145     Pointers_(0),
00146     MyLength_(Map.NumMyPoints()),
00147     GlobalLength_(Map.NumGlobalPoints()),
00148     NumVectors_(NumVectors),
00149     UserAllocated_(false),
00150     ConstantStride_(true),
00151     Stride_(Map.NumMyPoints()),
00152     Allocated_(false)
00153 {
00154   Util_.SetSeed(1);
00155 
00156   if (CV==Copy) AllocateForCopy();
00157   else AllocateForView();
00158   
00159   for (int i = 0; i< NumVectors_; i++) Pointers_[i] = ArrayOfPointers[i];
00160   
00161    if (CV==Copy) DoCopy();
00162    else DoView();
00163   
00164 }
00165 
00166 //==========================================================================
00167 
00168 // This constructor copies or makes view of selected vectors, specified in Indices, 
00169 // from an existing MultiVector
00170 
00171 Epetra_MultiVector::Epetra_MultiVector(Epetra_DataAccess CV, const Epetra_MultiVector& Source, 
00172                int *Indices, int NumVectors)
00173   : Epetra_DistObject(Source.Map(), "Epetra::MultiVector"),
00174     Epetra_CompObject(),
00175     Values_(0),
00176     Pointers_(0),
00177     MyLength_(Source.MyLength_),
00178     GlobalLength_(Source.GlobalLength_),
00179     NumVectors_(NumVectors),
00180     UserAllocated_(false),
00181     ConstantStride_(true),
00182     Stride_(0),
00183     Allocated_(false)
00184 {
00185   Util_.SetSeed(1);
00186 
00187   if (CV==Copy) AllocateForCopy();
00188   else AllocateForView();
00189 
00190   double ** Source_Pointers = Source.Pointers();  
00191   for (int i = 0; i< NumVectors_; i++) Pointers_[i] = Source_Pointers[Indices[i]];
00192   
00193    if (CV==Copy) DoCopy();
00194    else DoView();
00195   
00196 }
00197 
00198 //==========================================================================
00199 
00200 // This interface copies or makes view of a range of vectors from an existing MultiVector
00201 
00202 Epetra_MultiVector::Epetra_MultiVector(Epetra_DataAccess CV, const Epetra_MultiVector& Source, 
00203                int StartIndex, int NumVectors)
00204   : Epetra_DistObject(Source.Map(), "Epetra::MultiVector"),
00205     Epetra_CompObject(),
00206     Values_(0),
00207     Pointers_(0),
00208     MyLength_(Source.MyLength_),
00209     GlobalLength_(Source.GlobalLength_),
00210     NumVectors_(NumVectors),
00211     UserAllocated_(false),
00212     ConstantStride_(true),
00213     Stride_(0),
00214     Allocated_(false)
00215 {
00216   Util_.SetSeed(1);  
00217 
00218   if (CV==Copy) AllocateForCopy();
00219   else AllocateForView();
00220 
00221   double ** Source_Pointers = Source.Pointers();
00222   for (int i = 0; i< NumVectors_; i++) Pointers_[i] = Source_Pointers[StartIndex+i];
00223 
00224    if (CV==Copy) DoCopy();
00225    else DoView();
00226 }
00227 
00228 //=========================================================================
00229 Epetra_MultiVector::~Epetra_MultiVector(){
00230 
00231   if (!Allocated_) return;
00232 
00233   delete [] Pointers_;
00234   if (!UserAllocated_ && Values_!=0) delete [] Values_;
00235 
00236   if (Vectors_!=0) {
00237     for (int i=0; i<NumVectors_; i++) if (Vectors_[i]!=0) delete Vectors_[i];
00238     delete [] Vectors_;
00239   }
00240 
00241 
00242   if (DoubleTemp_!=0) delete [] DoubleTemp_;
00243 
00244 }
00245 
00246 //=========================================================================
00247 int Epetra_MultiVector::AllocateForCopy(void)
00248 {
00249   
00250   if (Allocated_) return(0);
00251     
00252   if (NumVectors_<=0) 
00253     throw ReportError("Number of Vectors = " + toString(NumVectors_) + ", but must be greater than zero", -1);
00254 
00255   Stride_ = Map_.NumMyPoints();
00256   if (Stride_>0) Values_ = new double[Stride_ * NumVectors_];
00257   Pointers_ = new double *[NumVectors_];
00258 
00259   DoubleTemp_ = 0;
00260   Vectors_ = 0;
00261   
00262   int randval = rand(); // Use POSIX standard random function
00263   if (DistributedGlobal_)
00264     Util_.SetSeed(2*Comm_->MyPID() + randval);
00265   else {
00266     int locrandval = randval;
00267     Comm_->MaxAll(&locrandval, &randval, 1);
00268     Util_.SetSeed(randval); // Replicated Local MultiVectors must have same seeds
00269   }
00270 
00271   Allocated_ = true;
00272   UserAllocated_ = false;
00273   return(0);
00274 }
00275 
00276 //=========================================================================
00277 int Epetra_MultiVector::DoCopy(void)
00278 {
00279   // On entry Pointers_ contains pointers to the incoming vectors.  These
00280   // pointers are the only unique piece of information for each of the 
00281   // constructors.
00282 
00283   // \interal { Optimization of this function can impact performance since it 
00284   //           involves a fair amount of memory traffic.}
00285 
00286   // On exit, Pointers_ is redefined to point to its own MultiVector vectors.
00287 
00288   for (int i = 0; i< NumVectors_; i++)
00289     {
00290       double * from = Pointers_[i];
00291       double * to = Values_+i*Stride_;
00292       Pointers_[i] = to;
00293       const int MyLength = MyLength_;
00294 #ifdef Epetra_HAVE_OMP
00295 #pragma omp parallel for default(none) shared(to,from)
00296 #endif
00297       for (int j=0; j<MyLength; j++) to[j] = from[j];
00298     }
00299 
00300   return(0);
00301 }
00302 //=========================================================================
00303 int Epetra_MultiVector::AllocateForView(void)
00304 {
00305   
00306   if (NumVectors_<=0) 
00307     throw ReportError("Number of Vectors = " + toString(NumVectors_) + ", but must be greater than zero", -1);
00308  
00309   Pointers_ = new double *[NumVectors_];
00310   
00311   DoubleTemp_ = 0;
00312   Vectors_ = 0;
00313   
00314   int randval = rand(); // Use POSIX standard random function
00315   if (DistributedGlobal_)
00316     Util_.SetSeed(2*Comm_->MyPID() + randval);
00317   else {
00318     int locrandval = randval;
00319     Comm_->MaxAll(&locrandval, &randval, 1);
00320     Util_.SetSeed(randval); // Replicated Local MultiVectors must have same seeds
00321   }
00322 
00323   Allocated_ = true;
00324   UserAllocated_ = true;
00325   
00326   return(0);
00327 }
00328 
00329 //=========================================================================
00330 int Epetra_MultiVector::DoView(void)
00331 {
00332   // On entry Pointers_ contains pointers to the incoming vectors.  These
00333   // pointers are the only unique piece of information for each of the 
00334   // constructors.
00335 
00336 
00337   Values_ = Pointers_[0];
00338 
00339   if (NumVectors_==1) {
00340     Stride_ = Map_.NumMyPoints();
00341     ConstantStride_ = true;
00342     return(0);
00343   }
00344 
00345   // Remainder of code checks if MultiVector has regular stride
00346 
00347   Stride_ = (int)(Pointers_[1] - Pointers_[0]);
00348   ConstantStride_ = false;
00349 
00350   for (int i = 1; i < NumVectors_-1; i++) {
00351     if (Pointers_[i+1] - Pointers_[i] != Stride_) return(0);
00352   }
00353 
00354   ConstantStride_ = true;
00355 
00356   return(0);
00357 }
00358 //=========================================================================
00359 int Epetra_MultiVector::ReplaceGlobalValue(int GlobalRow, int VectorIndex, double ScalarValue) {
00360 
00361  // Use the more general method below
00362   EPETRA_CHK_ERR(ChangeGlobalValue(GlobalRow, 0, VectorIndex, ScalarValue, false));
00363   return(0);
00364 }
00365 //=========================================================================
00366 int Epetra_MultiVector::ReplaceGlobalValue(int GlobalBlockRow, int BlockRowOffset, 
00367              int VectorIndex, double ScalarValue) {
00368   // Use the more general method below
00369   EPETRA_CHK_ERR(ChangeGlobalValue(GlobalBlockRow, BlockRowOffset, VectorIndex, ScalarValue, false)); 
00370   return(0);
00371 }
00372 //=========================================================================
00373 int Epetra_MultiVector::SumIntoGlobalValue(int GlobalRow, int VectorIndex, double ScalarValue) {
00374 
00375   // Use the more general method below
00376   EPETRA_CHK_ERR(ChangeGlobalValue(GlobalRow, 0, VectorIndex, ScalarValue, true)); 
00377   return(0);
00378 }
00379 //=========================================================================
00380 int Epetra_MultiVector::SumIntoGlobalValue(int GlobalBlockRow, int BlockRowOffset, 
00381              int VectorIndex, double ScalarValue) {
00382   // Use the more general method below
00383   EPETRA_CHK_ERR(ChangeGlobalValue(GlobalBlockRow, BlockRowOffset, VectorIndex, ScalarValue, true));
00384   return(0);
00385 }
00386 //=========================================================================
00387 int Epetra_MultiVector::ReplaceMyValue(int MyRow, int VectorIndex, double ScalarValue) {
00388 
00389   // Use the more general method below
00390   EPETRA_CHK_ERR(ChangeMyValue(MyRow, 0, VectorIndex, ScalarValue, false)); 
00391   return(0);
00392 }
00393 //=========================================================================
00394 int Epetra_MultiVector::ReplaceMyValue(int MyBlockRow, int BlockRowOffset, 
00395              int VectorIndex, double ScalarValue) {
00396   // Use the more general method below
00397   EPETRA_CHK_ERR(ChangeMyValue(MyBlockRow, BlockRowOffset, VectorIndex, ScalarValue, false)); 
00398   return(0);
00399 }
00400 //=========================================================================
00401 int Epetra_MultiVector::SumIntoMyValue(int MyRow, int VectorIndex, double ScalarValue) {
00402   // Use the more general method below
00403   EPETRA_CHK_ERR(ChangeMyValue(MyRow, 0, VectorIndex, ScalarValue, true)); 
00404   return(0);
00405 }
00406 //=========================================================================
00407 int Epetra_MultiVector::SumIntoMyValue(int MyBlockRow, int BlockRowOffset, 
00408              int VectorIndex, double ScalarValue) {
00409   // Use the more general method below
00410   EPETRA_CHK_ERR(ChangeMyValue(MyBlockRow, BlockRowOffset, VectorIndex, ScalarValue, true));
00411   return(0);
00412 }
00413 //=========================================================================
00414 int Epetra_MultiVector::ChangeGlobalValue(int GlobalBlockRow, int BlockRowOffset, 
00415              int VectorIndex, double ScalarValue, bool SumInto) {
00416 
00417   // Convert GID to LID and call LID version
00418   EPETRA_CHK_ERR(ChangeMyValue(Map().LID(GlobalBlockRow), BlockRowOffset, VectorIndex, ScalarValue, SumInto));
00419   return(0);
00420 }
00421 //=========================================================================
00422 int Epetra_MultiVector::ChangeMyValue(int MyBlockRow, int BlockRowOffset, 
00423              int VectorIndex, double ScalarValue, bool SumInto) {
00424   
00425   if (!Map().MyLID(MyBlockRow)) EPETRA_CHK_ERR(1); // I don't own this one, return a warning flag
00426   if (VectorIndex>= NumVectors_) EPETRA_CHK_ERR(-1); // Consider this a real error
00427   if (BlockRowOffset<0 || BlockRowOffset>=Map().ElementSize(MyBlockRow)) EPETRA_CHK_ERR(-2); // Offset is out-of-range
00428 
00429   int entry = Map().FirstPointInElement(MyBlockRow);
00430 
00431   if (SumInto)
00432     Pointers_[VectorIndex][entry+BlockRowOffset] += ScalarValue;
00433   else
00434     Pointers_[VectorIndex][entry+BlockRowOffset] = ScalarValue;
00435 
00436   return(0);
00437 }
00438 //=========================================================================
00439 int Epetra_MultiVector::Random() {
00440   // Generate random numbers drawn from a uniform distribution on
00441   // the interval (-1,1) using a multiplicative congruential generator
00442   // with modulus 2^31 - 1.
00443   /*  
00444   const double a = 16807.0, BigInt=2147483647.0, DbleOne=1.0, DbleTwo=2.0;
00445   
00446   for(int i=0; i < NumVectors_; i++)
00447     for (int j=0; j<MyLength_; j++){
00448       Seed_ = fmod( a*Seed_, BigInt );
00449       Pointers_[i][j] = DbleTwo*(Seed_/BigInt)-DbleOne;
00450     }
00451   */
00452 
00453    const int MyLength = MyLength_;
00454   for(int i = 0; i < NumVectors_; i++) {
00455           double * const to = Pointers_[i];
00456 #ifdef Epetra_HAVE_OMP
00457 #pragma omp parallel for default(none)
00458 #endif
00459     for(int j = 0; j < MyLength; j++)
00460       to[j] = Util_.RandomDouble();
00461         }
00462 
00463   return(0);
00464 }
00465  
00466 //=========================================================================
00467 
00468 // Extract a copy of a Epetra_MultiVector.  Put in a user's Fortran-style array
00469 
00470 int Epetra_MultiVector::ExtractCopy(double *A, int MyLDA) const {
00471   if (NumVectors_>1 && Stride_ > MyLDA) EPETRA_CHK_ERR(-1); // LDA not big enough
00472   
00473   const int MyLength = MyLength_;
00474   for (int i=0; i< NumVectors_; i++)
00475     {
00476       double * from = Pointers_[i];
00477       double * to = A + i*MyLDA;
00478       for (int j=0; j<MyLength; j++) *to++ = *from++;
00479     }
00480 
00481   return(0);
00482 }
00483 
00484 //=========================================================================
00485 int Epetra_MultiVector::ReplaceMap(const Epetra_BlockMap& map)
00486 {
00487   if (Map().PointSameAs(map)) {
00488     Epetra_DistObject::Map_ = map;
00489     return(0);
00490   }
00491 
00492   return(-1);  
00493 }
00494 
00495 //=========================================================================
00496 
00497 // Extract a copy of a Epetra_MultiVector.  Put in a user's array of pointers
00498 
00499 int Epetra_MultiVector::ExtractCopy(double **ArrayOfPointers) const {
00500   const int MyLength = MyLength_;
00501   for (int i=0; i< NumVectors_; i++)
00502     {
00503       double * from = Pointers_[i];
00504       double * to = ArrayOfPointers[i];
00505       for (int j=0; j<MyLength; j++) *to++ = *from++;
00506     }
00507   
00508   return(0);
00509 }
00510       
00511       
00512 
00513 //=========================================================================
00514 
00515 // Extract a view of a Epetra_MultiVector.  Set up a user's Fortran-style array
00516 
00517 int Epetra_MultiVector::ExtractView(double **A, int *MyLDA) const {
00518   if (!ConstantStride_) EPETRA_CHK_ERR(-1);  // Can't make a Fortran-style view if not constant stride
00519   *MyLDA = Stride_; // Set user's LDA
00520   *A = Values_; // Set user's value pointer
00521   return(0);
00522 }
00523       
00524       
00525 
00526 //=========================================================================
00527 
00528 // Extract a view of a Epetra_MultiVector.  Put in a user's array of pointers
00529 
00530 int Epetra_MultiVector::ExtractView(double ***ArrayOfPointers) const {
00531   *ArrayOfPointers = Pointers_;
00532   
00533   return(0);
00534 }
00535       
00536       
00537 //=========================================================================
00538 int Epetra_MultiVector::PutScalar(double ScalarConstant) {
00539 
00540   // Fills MultiVector with the value ScalarConstant **/
00541 
00542   const int MyLength = MyLength_;
00543   for (int i = 0; i < NumVectors_; i++) {
00544     double * const to = Pointers_[i];
00545 #ifdef Epetra_HAVE_OMP
00546 #pragma omp parallel for default(none) shared(ScalarConstant)
00547 #endif
00548     for (int j=0; j<MyLength; j++) to[j] = ScalarConstant;
00549   }
00550   return(0);
00551 }
00552 //=========================================================================
00553 int Epetra_MultiVector::CheckSizes(const Epetra_SrcDistObject& Source) {
00554 
00555   const Epetra_MultiVector & A = dynamic_cast<const Epetra_MultiVector &>(Source);
00556 
00557   if (NumVectors()!=A.NumVectors()) {EPETRA_CHK_ERR(-3)};
00558   return(0);
00559 }
00560 
00561 //=========================================================================
00562 int Epetra_MultiVector::CopyAndPermute(const Epetra_SrcDistObject& Source,
00563                                        int NumSameIDs, 
00564                                        int NumPermuteIDs,
00565                                        int * PermuteToLIDs, 
00566                                        int *PermuteFromLIDs,
00567                                        const Epetra_OffsetIndex * Indexor)
00568 {
00569   (void)Indexor;
00570 
00571   const Epetra_MultiVector & A = dynamic_cast<const Epetra_MultiVector &>(Source);
00572 
00573   double **From = A.Pointers();
00574   double **To = Pointers_;
00575   int NumVectors = NumVectors_;
00576 
00577   int * ToFirstPointInElementList = 0;
00578   int * FromFirstPointInElementList = 0;
00579   int * FromElementSizeList = 0;
00580   int MaxElementSize = Map().MaxElementSize();
00581   bool ConstantElementSize = Map().ConstantElementSize();
00582 
00583   if (!ConstantElementSize) {
00584     ToFirstPointInElementList =   Map().FirstPointInElementList();
00585     FromFirstPointInElementList = A.Map().FirstPointInElementList();
00586     FromElementSizeList = A.Map().ElementSizeList();
00587   }
00588   int jj, jjj, k;
00589   
00590   int NumSameEntries;
00591 
00592   bool Case1 = false;
00593   bool Case2 = false;
00594   // bool Case3 = false;
00595 
00596   if (MaxElementSize==1) {
00597     Case1 = true;
00598     NumSameEntries = NumSameIDs;
00599   }
00600   else if (ConstantElementSize) {
00601     Case2 = true;
00602     NumSameEntries = NumSameIDs * MaxElementSize;
00603   }
00604   else {
00605     // Case3 = true;
00606     NumSameEntries = FromFirstPointInElementList[NumSameIDs];
00607   }
00608 
00609   // Short circuit for the case where the source and target vector is the same.
00610   if (To==From) NumSameEntries = 0;
00611   
00612   // Do copy first
00613   if (NumSameIDs>0)
00614     if (To!=From) {
00615       for (int i=0; i < NumVectors; i++)
00616   for (int j=0; j<NumSameEntries; j++)
00617     To[i][j] = From[i][j];
00618     }
00619   // Do local permutation next
00620   if (NumPermuteIDs>0) {
00621   
00622     // Point entry case
00623     if (Case1) {
00624       
00625       if (NumVectors==1)
00626   for (int j=0; j<NumPermuteIDs; j++) 
00627     To[0][PermuteToLIDs[j]] = From[0][PermuteFromLIDs[j]];
00628       
00629       else {
00630   for (int j=0; j<NumPermuteIDs; j++) {
00631     jj = PermuteToLIDs[j];
00632     jjj = PermuteFromLIDs[j];
00633     for (int i=0; i<NumVectors; i++)
00634       To[i][jj] = From[i][jjj];
00635   }
00636       }
00637     }
00638     // constant element size case
00639     else if (Case2) {
00640       
00641       for (int j=0; j<NumPermuteIDs; j++) {
00642   jj = MaxElementSize*PermuteToLIDs[j];
00643   jjj = MaxElementSize*PermuteFromLIDs[j];
00644   for (int i=0; i<NumVectors; i++)
00645     for (k=0; k<MaxElementSize; k++)
00646       To[i][jj+k] = From[i][jjj+k];
00647       }
00648     }
00649     
00650     // variable element size case
00651     else {
00652       
00653       for (int j=0; j<NumPermuteIDs; j++) {
00654   jj = ToFirstPointInElementList[PermuteToLIDs[j]];
00655   jjj = FromFirstPointInElementList[PermuteFromLIDs[j]];
00656   int ElementSize = FromElementSizeList[PermuteFromLIDs[j]];
00657   for (int i=0; i<NumVectors; i++)
00658     for (k=0; k<ElementSize; k++)
00659       To[i][jj+k] = From[i][jjj+k];
00660       }
00661     }
00662   }
00663   return(0);
00664 }
00665 
00666 //=========================================================================
00667 int Epetra_MultiVector::PackAndPrepare(const Epetra_SrcDistObject & Source,
00668                                        int NumExportIDs,
00669                                        int * ExportLIDs,
00670                                        int & LenExports,
00671                                        char * & Exports,
00672                                        int & SizeOfPacket,
00673                                        int * Sizes,
00674                                        bool & VarSizes,
00675                                        Epetra_Distributor & Distor)
00676 {
00677   (void)Sizes;
00678   (void)VarSizes;
00679   (void)Distor;
00680 
00681   const Epetra_MultiVector & A = dynamic_cast<const Epetra_MultiVector &>(Source);
00682   int jj, k;
00683 
00684   double **From = A.Pointers();
00685   int MaxElementSize = Map().MaxElementSize();
00686   int NumVectors = NumVectors_;
00687   bool ConstantElementSize = Map().ConstantElementSize();
00688 
00689   int * FromFirstPointInElementList = 0;
00690   int * FromElementSizeList = 0;
00691 
00692   if (!ConstantElementSize) {
00693     FromFirstPointInElementList = A.Map().FirstPointInElementList();
00694     FromElementSizeList = A.Map().ElementSizeList();
00695   }
00696 
00697   double * DoubleExports = 0;
00698 
00699   SizeOfPacket = NumVectors*MaxElementSize*(int)sizeof(double);
00700 
00701   if(SizeOfPacket*NumExportIDs>LenExports) {
00702     if (LenExports>0) delete [] Exports;
00703     LenExports = SizeOfPacket*NumExportIDs;
00704     DoubleExports = new double[NumVectors*MaxElementSize*NumExportIDs];
00705     Exports = (char *) DoubleExports;
00706   }
00707 
00708   double * ptr;
00709 
00710   if (NumExportIDs>0) {
00711     ptr = (double *) Exports;
00712     
00713     // Point entry case
00714     if (MaxElementSize==1) {
00715       
00716       if (NumVectors==1)
00717         for (int j=0; j<NumExportIDs; j++)
00718           *ptr++ = From[0][ExportLIDs[j]];
00719 
00720       else {
00721   for (int j=0; j<NumExportIDs; j++) {
00722     jj = ExportLIDs[j];
00723     for (int i=0; i<NumVectors; i++)
00724       *ptr++ = From[i][jj];
00725   }
00726       }
00727     }
00728 
00729     // constant element size case
00730     else if (ConstantElementSize) {
00731       
00732       for (int j=0; j<NumExportIDs; j++) {
00733   jj = MaxElementSize*ExportLIDs[j];
00734   for (int i=0; i<NumVectors; i++)
00735     for (k=0; k<MaxElementSize; k++)
00736       *ptr++ = From[i][jj+k];
00737       }
00738     }
00739     
00740     // variable element size case
00741     else {
00742       
00743       int thisSizeOfPacket = NumVectors*MaxElementSize;
00744       for (int j=0; j<NumExportIDs; j++) {
00745   ptr = (double *) Exports + j*thisSizeOfPacket;
00746   jj = FromFirstPointInElementList[ExportLIDs[j]];
00747   int ElementSize = FromElementSizeList[ExportLIDs[j]];
00748   for (int i=0; i<NumVectors; i++)
00749     for (k=0; k<ElementSize; k++)
00750       *ptr++ = From[i][jj+k];
00751       }
00752     }
00753   }
00754 
00755   return(0);
00756 }
00757 
00758 //=========================================================================
00759 int Epetra_MultiVector::UnpackAndCombine(const Epetra_SrcDistObject & Source,
00760                                          int NumImportIDs,
00761                                          int * ImportLIDs, 
00762                                          int LenImports, 
00763                                          char * Imports,
00764                                          int & SizeOfPacket, 
00765                                          Epetra_Distributor & Distor, 
00766                                          Epetra_CombineMode CombineMode,
00767                                          const Epetra_OffsetIndex * Indexor )
00768 {
00769   (void)Source;
00770   (void)LenImports;
00771   (void)SizeOfPacket;
00772   (void)Distor;
00773   (void)Indexor;
00774   int jj, k;
00775   
00776   if(    CombineMode != Add
00777       && CombineMode != Zero
00778       && CombineMode != Insert
00779       && CombineMode != InsertAdd
00780       && CombineMode != Average
00781       && CombineMode != AbsMax )
00782     EPETRA_CHK_ERR(-1); //Unsupported CombinedMode, will default to Zero
00783 
00784   if (NumImportIDs<=0) return(0);
00785 
00786   double ** To = Pointers_;
00787   int NumVectors = NumVectors_;
00788   int MaxElementSize = Map().MaxElementSize();
00789   bool ConstantElementSize = Map().ConstantElementSize();
00790 
00791   int * ToFirstPointInElementList = 0;
00792   int * ToElementSizeList = 0;
00793 
00794   if (!ConstantElementSize) {
00795     ToFirstPointInElementList = Map().FirstPointInElementList();
00796     ToElementSizeList = Map().ElementSizeList();
00797   }
00798   
00799   double * ptr;
00800   // Unpack it...
00801 
00802   ptr = (double *) Imports;
00803     
00804   // Point entry case
00805   if (MaxElementSize==1) {
00806       
00807     if (NumVectors==1) {
00808       if (CombineMode==Add)
00809   for (int j=0; j<NumImportIDs; j++) To[0][ImportLIDs[j]] += *ptr++; // Add to existing value
00810       else if(CombineMode==Insert)
00811   for (int j=0; j<NumImportIDs; j++) To[0][ImportLIDs[j]] = *ptr++;
00812       else if(CombineMode==InsertAdd) {
00813   for (int j=0; j<NumImportIDs; j++) To[0][ImportLIDs[j]] = 0.0;
00814   for (int j=0; j<NumImportIDs; j++) To[0][ImportLIDs[j]] += *ptr++;
00815       }
00816       else if(CombineMode==AbsMax)
00817         for (int j=0; j<NumImportIDs; j++) {
00818     To[0][ImportLIDs[j]] = EPETRA_MAX( To[0][ImportLIDs[j]],std::abs(*ptr));
00819     ptr++;
00820   }
00821       // Note:  The following form of averaging is not a true average if more that one value is combined.
00822       //        This might be an issue in the future, but we leave this way for now.
00823       else if(CombineMode==Average)
00824   for (int j=0; j<NumImportIDs; j++) {To[0][ImportLIDs[j]] += *ptr++; To[0][ImportLIDs[j]] *= 0.5;}
00825     }
00826 
00827     else {  // NumVectors>1
00828 
00829       if (CombineMode==Add) {
00830   for (int j=0; j<NumImportIDs; j++) {
00831     jj = ImportLIDs[j];
00832     for (int i=0; i<NumVectors; i++)
00833       To[i][jj] += *ptr++; // Add to existing value
00834   }
00835       }
00836       else if(CombineMode==Insert) {
00837   for (int j=0; j<NumImportIDs; j++) {
00838     jj = ImportLIDs[j];
00839     for (int i=0; i<NumVectors; i++)
00840       To[i][jj] = *ptr++;
00841   }
00842       }
00843       else if(CombineMode==InsertAdd) {
00844   for (int j=0; j<NumImportIDs; j++) {
00845     jj = ImportLIDs[j];
00846     for (int i=0; i<NumVectors; i++)
00847       To[i][jj] = 0.0;
00848   }
00849   for (int j=0; j<NumImportIDs; j++) {
00850     jj = ImportLIDs[j];
00851     for (int i=0; i<NumVectors; i++)
00852       To[i][jj] += *ptr++;
00853   }
00854       }
00855       else if(CombineMode==AbsMax) {
00856         for (int j=0; j<NumImportIDs; j++) {
00857           jj = ImportLIDs[j];
00858           for (int i=0; i<NumVectors; i++) {
00859             To[i][jj] = EPETRA_MAX( To[i][jj], std::abs(*ptr) );
00860       ptr++;
00861     }
00862         }
00863       }
00864       // Note:  The following form of averaging is not a true average if more that one value is combined.
00865       //        This might be an issue in the future, but we leave this way for now.
00866       else if(CombineMode==Average) {
00867   for (int j=0; j<NumImportIDs; j++) {
00868     jj = ImportLIDs[j];
00869     for (int i=0; i<NumVectors; i++)
00870       { To[i][jj] += *ptr++;  To[i][jj] *= 0.5;}
00871   }
00872       }
00873     }
00874   }
00875 
00876   // constant element size case
00877 
00878   else if (ConstantElementSize) {
00879    
00880     if (CombineMode==Add) {
00881       for (int j=0; j<NumImportIDs; j++) {
00882   jj = MaxElementSize*ImportLIDs[j];
00883   for (int i=0; i<NumVectors; i++)
00884     for (k=0; k<MaxElementSize; k++)
00885       To[i][jj+k] += *ptr++; // Add to existing value
00886       }
00887     }
00888     else if(CombineMode==Insert) {
00889       for (int j=0; j<NumImportIDs; j++) {
00890   jj = MaxElementSize*ImportLIDs[j];
00891   for (int i=0; i<NumVectors; i++)
00892     for (k=0; k<MaxElementSize; k++)
00893       To[i][jj+k] = *ptr++;
00894       }
00895     }
00896     else if(CombineMode==InsertAdd) {
00897       for (int j=0; j<NumImportIDs; j++) {
00898   jj = MaxElementSize*ImportLIDs[j];
00899   for (int i=0; i<NumVectors; i++)
00900     for (k=0; k<MaxElementSize; k++)
00901       To[i][jj+k] = 0.0;
00902       }
00903       for (int j=0; j<NumImportIDs; j++) {
00904   jj = MaxElementSize*ImportLIDs[j];
00905   for (int i=0; i<NumVectors; i++)
00906     for (k=0; k<MaxElementSize; k++)
00907       To[i][jj+k] += *ptr++;
00908       }
00909     }
00910     else if(CombineMode==AbsMax) {
00911       for (int j=0; j<NumImportIDs; j++) {
00912   jj = MaxElementSize*ImportLIDs[j];
00913   for (int i=0; i<NumVectors; i++)
00914     for (k=0; k<MaxElementSize; k++) {
00915       To[i][jj+k] = EPETRA_MAX( To[i][jj+k], std::abs(*ptr) );
00916       ptr++;
00917     }
00918       }
00919     }
00920     // Note:  The following form of averaging is not a true average if more that one value is combined.
00921     //        This might be an issue in the future, but we leave this way for now.
00922     else if(CombineMode==Average) {
00923       for (int j=0; j<NumImportIDs; j++) {
00924   jj = MaxElementSize*ImportLIDs[j];
00925   for (int i=0; i<NumVectors; i++)
00926     for (k=0; k<MaxElementSize; k++)
00927       { To[i][jj+k] += *ptr++; To[i][jj+k] *= 0.5;}
00928       }
00929     }
00930   }
00931     
00932   // variable element size case
00933 
00934   else {
00935     int thisSizeOfPacket = NumVectors*MaxElementSize;
00936 
00937     if (CombineMode==Add) {
00938       for (int j=0; j<NumImportIDs; j++) {
00939   ptr = (double *) Imports + j*thisSizeOfPacket;
00940   jj = ToFirstPointInElementList[ImportLIDs[j]];
00941   int ElementSize = ToElementSizeList[ImportLIDs[j]];
00942   for (int i=0; i<NumVectors; i++)
00943     for (k=0; k<ElementSize; k++)
00944       To[i][jj+k] += *ptr++; // Add to existing value
00945       }
00946     }
00947     else  if(CombineMode==Insert){
00948       for (int j=0; j<NumImportIDs; j++) {
00949   ptr = (double *) Imports + j*thisSizeOfPacket;
00950   jj = ToFirstPointInElementList[ImportLIDs[j]];
00951   int ElementSize = ToElementSizeList[ImportLIDs[j]];
00952   for (int i=0; i<NumVectors; i++)
00953     for (k=0; k<ElementSize; k++)
00954       To[i][jj+k] = *ptr++;
00955       }
00956     }
00957     else  if(CombineMode==InsertAdd){
00958       for (int j=0; j<NumImportIDs; j++) {
00959   ptr = (double *) Imports + j*thisSizeOfPacket;
00960   jj = ToFirstPointInElementList[ImportLIDs[j]];
00961   int ElementSize = ToElementSizeList[ImportLIDs[j]];
00962   for (int i=0; i<NumVectors; i++)
00963     for (k=0; k<ElementSize; k++)
00964       To[i][jj+k] = 0.0;
00965       }
00966       for (int j=0; j<NumImportIDs; j++) {
00967   ptr = (double *) Imports + j*thisSizeOfPacket;
00968   jj = ToFirstPointInElementList[ImportLIDs[j]];
00969   int ElementSize = ToElementSizeList[ImportLIDs[j]];
00970   for (int i=0; i<NumVectors; i++)
00971     for (k=0; k<ElementSize; k++)
00972       To[i][jj+k] += *ptr++;
00973       }
00974     }
00975     else  if(CombineMode==AbsMax){
00976       for (int j=0; j<NumImportIDs; j++) {
00977   ptr = (double *) Imports + j*thisSizeOfPacket;
00978   jj = ToFirstPointInElementList[ImportLIDs[j]];
00979   int ElementSize = ToElementSizeList[ImportLIDs[j]];
00980   for (int i=0; i<NumVectors; i++)
00981     for (k=0; k<ElementSize; k++) {
00982       To[i][jj+k] = EPETRA_MAX( To[i][jj+k], std::abs(*ptr));
00983       ptr++;
00984     }
00985       }
00986     }
00987     // Note:  The following form of averaging is not a true average if more that one value is combined.
00988     //        This might be an issue in the future, but we leave this way for now.
00989     else if(CombineMode==Average) {
00990       for (int j=0; j<NumImportIDs; j++) {
00991   ptr = (double *) Imports + j*thisSizeOfPacket;
00992   jj = ToFirstPointInElementList[ImportLIDs[j]];
00993   int ElementSize = ToElementSizeList[ImportLIDs[j]];
00994   for (int i=0; i<NumVectors; i++)
00995     for (k=0; k<ElementSize; k++)
00996       { To[i][jj+k] += *ptr++; To[i][jj+k] *= 0.5;}
00997       }
00998     }
00999   }
01000 
01001   return(0);
01002 }
01003 
01004 //=========================================================================
01005 int Epetra_MultiVector::Dot(const Epetra_MultiVector& A, double *Result) const {
01006 
01007   // Dot product of two MultiVectors 
01008 
01009   if (NumVectors_ != A.NumVectors()) EPETRA_CHK_ERR(-1);
01010   const int MyLength = MyLength_;
01011   if (MyLength != A.MyLength()) EPETRA_CHK_ERR(-2);
01012   UpdateDoubleTemp();
01013     
01014   double **A_Pointers = A.Pointers();
01015 
01016 #ifdef Epetra_HAVE_OMP
01017   for (int i=0; i < NumVectors_; i++) 
01018     {
01019       const double * const from = Pointers_[i];
01020       const double * const fromA = A_Pointers[i];
01021       double sum = 0.0;
01022 #pragma omp parallel for reduction (+:sum) default(none)
01023       for (int j=0; j < MyLength; j++) sum += from[j] * fromA[j];
01024       DoubleTemp_[i] = sum;
01025     }
01026 #else
01027   for (int i=0; i < NumVectors_; i++) DoubleTemp_[i] = DOT(MyLength, Pointers_[i], A_Pointers[i]);
01028 #endif
01029 
01030   if (DistributedGlobal())
01031     Comm_->SumAll(DoubleTemp_, Result, NumVectors_);
01032   else
01033     for (int i=0; i< NumVectors_; ++i) Result[i] = DoubleTemp_[i];
01034   
01035   UpdateFlops(2*GlobalLength_*NumVectors_);
01036 
01037   return(0);
01038 }
01039 //=========================================================================
01040 int Epetra_MultiVector::Abs(const Epetra_MultiVector& A) {
01041 
01042   // this[i][j] = std::abs(A[i][j])
01043 
01044   const int MyLength = MyLength_;
01045   if (NumVectors_ != A.NumVectors()) EPETRA_CHK_ERR(-1);
01046   if (MyLength != A.MyLength()) EPETRA_CHK_ERR(-2);
01047 
01048   double **A_Pointers = A.Pointers();
01049 
01050   for (int i=0; i < NumVectors_; i++) {
01051     double * const to = Pointers_[i];
01052     const double * const from = A_Pointers[i];
01053 #ifdef Epetra_HAVE_OMP
01054 #pragma omp parallel for default(none)
01055 #endif
01056     for (int j=0; j < MyLength; j++) to[j] = std::abs(from[j]);
01057   }
01058 
01059   return(0);
01060 }
01061 //=========================================================================
01062 int Epetra_MultiVector::Reciprocal(const Epetra_MultiVector& A) {
01063 
01064   // this[i][j] = 1.0/(A[i][j])
01065 
01066   int ierr = 0;
01067 #ifndef Epetra_HAVE_OMP
01068   int localierr = 0;
01069 #endif
01070   const int MyLength = MyLength_;
01071   if (NumVectors_ != A.NumVectors()) EPETRA_CHK_ERR(-1);
01072   if (MyLength != A.MyLength()) EPETRA_CHK_ERR(-2);
01073 
01074   double **A_Pointers = A.Pointers();
01075 
01076   for (int i=0; i < NumVectors_; i++) {
01077     double * const to = Pointers_[i];
01078     const double * const from = A_Pointers[i];
01079 #ifdef Epetra_HAVE_OMP
01080 #pragma omp parallel default(none) shared(ierr)
01081 {
01082     int localierr = 0;
01083 #pragma omp for
01084 #endif
01085     for (int j=0; j < MyLength; j++) {
01086       double value = from[j];
01087       // Set error to 1 to signal that zero rowsum found (supercedes ierr = 2)     
01088       if (std::abs(value)<Epetra_MinDouble) {
01089         if (value==0.0) localierr = 1;
01090         else if (localierr!=1) localierr = 2;
01091         to[j] = EPETRA_SGN(value) * Epetra_MaxDouble;
01092       }
01093       else
01094         to[j] = 1.0/value;
01095     }
01096 #ifdef Epetra_HAVE_OMP
01097 #pragma omp critical 
01098 #endif
01099 {
01100    if (localierr==1) ierr = 1;
01101    else if (localierr==2 && ierr!=1) ierr = 2;
01102 }
01103 #ifdef Epetra_HAVE_OMP
01104 }
01105 #endif
01106   }
01107   EPETRA_CHK_ERR(ierr);
01108   return(0);
01109 }
01110   //=========================================================================
01111   int Epetra_MultiVector::Scale (double ScalarValue) {
01112 
01113     // scales a MultiVector in place by a scalar
01114   
01115 
01116   const int MyLength = MyLength_;
01117 #ifdef Epetra_HAVE_OMP
01118     for (int i = 0; i < NumVectors_; i++) {
01119       double * const to = Pointers_[i];
01120 #pragma omp parallel for default(none) shared(ScalarValue)
01121       for (int j = 0; j < MyLength; j++) to[j] = ScalarValue * to[j];
01122     }
01123 #else
01124     for (int i = 0; i < NumVectors_; i++)
01125       SCAL(MyLength, ScalarValue, Pointers_[i]);
01126 #endif
01127     UpdateFlops(GlobalLength_*NumVectors_);
01128 
01129     return(0);
01130   }
01131 
01132   //=========================================================================
01133   int Epetra_MultiVector::Scale (double ScalarA, const Epetra_MultiVector& A) {
01134 
01135     // scales a MultiVector by a scalar and put in the this:
01136     // this = ScalarA * A
01137 
01138   const int MyLength = MyLength_;
01139   if (NumVectors_ != A.NumVectors()) EPETRA_CHK_ERR(-1);
01140   if (MyLength != A.MyLength()) EPETRA_CHK_ERR(-2);
01141 
01142     double **A_Pointers = (double**)A.Pointers();
01143     
01144     for (int i = 0; i < NumVectors_; i++) {
01145       double * const to = Pointers_[i];
01146       const double * const from = A_Pointers[i];
01147 #ifdef Epetra_HAVE_OMP
01148 #pragma omp parallel for default(none) shared(ScalarA)
01149 #endif
01150       for (int j = 0; j < MyLength; j++) to[j] = ScalarA * from[j];
01151     }
01152     UpdateFlops(GlobalLength_*NumVectors_);
01153 
01154     return(0);
01155   }
01156 
01157   //=========================================================================
01158   int Epetra_MultiVector::Update(double ScalarA, const Epetra_MultiVector& A, double ScalarThis) {
01159 
01160 
01161     // linear combination of two MultiVectors: this = ScalarThis * this + ScalarA * A
01162 
01163   const int MyLength = MyLength_;
01164   if (NumVectors_ != A.NumVectors()) EPETRA_CHK_ERR(-1);
01165   if (MyLength != A.MyLength()) EPETRA_CHK_ERR(-2);
01166 
01167     double **A_Pointers = (double**)A.Pointers();
01168 
01169     if (ScalarThis==0.0)
01170       {
01171   for (int i = 0; i < NumVectors_; i++) {
01172           double * const to = Pointers_[i];
01173           const double * const from = A_Pointers[i];
01174 #ifdef Epetra_HAVE_OMP
01175 #pragma omp parallel for default(none) shared(ScalarA)
01176 #endif
01177     for (int j = 0; j < MyLength; j++) to[j] = ScalarA * from[j];
01178         }
01179   UpdateFlops(GlobalLength_*NumVectors_);
01180       }
01181     else if (ScalarThis==1.0)
01182       {
01183   for (int i = 0; i < NumVectors_; i++) {
01184           double * const to = Pointers_[i];
01185           const double * const from = A_Pointers[i];
01186 #ifdef Epetra_HAVE_OMP
01187 #pragma omp parallel for default(none) shared(ScalarA)
01188 #endif
01189     for (int j = 0; j < MyLength; j++) to[j] = to[j] + ScalarA * from[j];
01190         }
01191   UpdateFlops(2*GlobalLength_*NumVectors_);
01192       }
01193     else if (ScalarA==1.0)
01194       {
01195   for (int i = 0; i < NumVectors_; i++) {
01196           double * const to = Pointers_[i];
01197           const double * const from = A_Pointers[i];
01198 #ifdef Epetra_HAVE_OMP
01199 #pragma omp parallel for default(none) shared(ScalarThis)
01200 #endif
01201     for (int j = 0; j < MyLength; j++) to[j] = ScalarThis * to[j] + from[j];
01202         }
01203   UpdateFlops(2*GlobalLength_*NumVectors_);
01204       }
01205     else
01206       {
01207   for (int i = 0; i < NumVectors_; i++) {
01208           double * const to = Pointers_[i];
01209           const double * const from = A_Pointers[i];
01210 #ifdef Epetra_HAVE_OMP
01211 #pragma omp parallel for default(none) shared(ScalarA,ScalarThis)
01212 #endif
01213     for (int j = 0; j < MyLength; j++) to[j] = ScalarThis * to[j] +
01214                               ScalarA *  from[j];
01215         }
01216   UpdateFlops(3*GlobalLength_*NumVectors_);
01217       }
01218 
01219     return(0);
01220   }
01221 
01222 //=========================================================================
01223 int Epetra_MultiVector::Update(double ScalarA, const Epetra_MultiVector& A, 
01224           double ScalarB, const Epetra_MultiVector& B, double ScalarThis) {
01225   
01226   
01227   // linear combination of three MultiVectors: 
01228   // this = ScalarThis * this + ScalarA * A + ScalarB * B
01229   
01230   if (ScalarA==0.0) {
01231     EPETRA_CHK_ERR(Update(ScalarB, B, ScalarThis));
01232     return(0);
01233   }
01234   if (ScalarB==0.0) {
01235     EPETRA_CHK_ERR(Update(ScalarA, A, ScalarThis));
01236     return(0);
01237   }
01238          
01239   const int MyLength = MyLength_;
01240   if (NumVectors_ != A.NumVectors() || NumVectors_ != B.NumVectors()) EPETRA_CHK_ERR(-1);
01241   if (MyLength != A.MyLength() || MyLength != B.MyLength()) EPETRA_CHK_ERR(-2);
01242   
01243     double **A_Pointers = (double**)A.Pointers();
01244     double **B_Pointers = (double**)B.Pointers();
01245 
01246     if (ScalarThis==0.0)
01247       {
01248   if (ScalarA==1.0)
01249     {
01250       for (int i = 0; i < NumVectors_; i++) {
01251               double * const to = Pointers_[i];
01252               const double * const fromA = A_Pointers[i];
01253               const double * const fromB = B_Pointers[i];
01254 #ifdef Epetra_HAVE_OMP
01255 #pragma omp parallel for default(none) shared(ScalarB)
01256 #endif
01257         for (int j = 0; j < MyLength; j++) to[j] =           fromA[j] + 
01258                             ScalarB * fromB[j];
01259             }
01260       UpdateFlops(2*GlobalLength_*NumVectors_);
01261     }
01262   else if (ScalarB==1.0)
01263     {
01264       for (int i = 0; i < NumVectors_; i++) {
01265               double * const to = Pointers_[i];
01266               const double * const fromA = A_Pointers[i];
01267               const double * const fromB = B_Pointers[i];
01268 #ifdef Epetra_HAVE_OMP
01269 #pragma omp parallel for default(none) shared(ScalarA)
01270 #endif
01271         for (int j = 0; j < MyLength; j++) to[j] = ScalarA * fromA[j] +
01272                                       fromB[j];
01273             }
01274       UpdateFlops(2*GlobalLength_*NumVectors_);
01275     }
01276   else
01277     {
01278       for (int i = 0; i < NumVectors_; i++) {
01279               double * const to = Pointers_[i];
01280               const double * const fromA = A_Pointers[i];
01281               const double * const fromB = B_Pointers[i];
01282 #ifdef Epetra_HAVE_OMP
01283 #pragma omp parallel for default(none) shared(ScalarA,ScalarB)
01284 #endif
01285         for (int j = 0; j < MyLength; j++) to[j] = ScalarA * fromA[j] + 
01286                             ScalarB * fromB[j];
01287             }
01288       UpdateFlops(3*GlobalLength_*NumVectors_);
01289     }
01290       }
01291     else if (ScalarThis==1.0)
01292       {
01293   if (ScalarA==1.0)
01294     {
01295       for (int i = 0; i < NumVectors_; i++) {
01296               double * const to = Pointers_[i];
01297               const double * const fromA = A_Pointers[i];
01298               const double * const fromB = B_Pointers[i];
01299 #ifdef Epetra_HAVE_OMP
01300 #pragma omp parallel for default(none) shared(ScalarB)
01301 #endif
01302         for (int j = 0; j < MyLength; j++) to[j] +=           fromA[j] + 
01303                              ScalarB * fromB[j];
01304             }
01305       UpdateFlops(3*GlobalLength_*NumVectors_);
01306     }
01307   else if (ScalarB==1.0)
01308     {
01309       for (int i = 0; i < NumVectors_; i++) {
01310               double * const to = Pointers_[i];
01311               const double * const fromA = A_Pointers[i];
01312               const double * const fromB = B_Pointers[i];
01313 #ifdef Epetra_HAVE_OMP
01314 #pragma omp parallel for default(none) shared(ScalarA)
01315 #endif
01316         for (int j = 0; j < MyLength; j++) to[j] += ScalarA * fromA[j] +
01317                                        fromB[j];
01318             }
01319       UpdateFlops(3*GlobalLength_*NumVectors_);
01320     }
01321   else
01322     {
01323       for (int i = 0; i < NumVectors_; i++) {
01324               double * const to = Pointers_[i];
01325               const double * const fromA = A_Pointers[i];
01326               const double * const fromB = B_Pointers[i];
01327 #ifdef Epetra_HAVE_OMP
01328 #pragma omp parallel for default(none) shared(ScalarA,ScalarB)
01329 #endif
01330         for (int j = 0; j < MyLength; j++) to[j] += ScalarA * fromA[j] + 
01331                              ScalarB * fromB[j];
01332             }
01333       UpdateFlops(4*GlobalLength_*NumVectors_);
01334     }
01335       }
01336     else
01337       {
01338   if (ScalarA==1.0)
01339     {
01340       for (int i = 0; i < NumVectors_; i++) {
01341               double * const to = Pointers_[i];
01342               const double * const fromA = A_Pointers[i];
01343               const double * const fromB = B_Pointers[i];
01344 #ifdef Epetra_HAVE_OMP
01345 #pragma omp parallel for default(none) shared(ScalarA,ScalarB,ScalarThis)
01346 #endif
01347         for (int j = 0; j < MyLength; j++) to[j] =  ScalarThis *    to[j] +
01348                                        fromA[j] + 
01349                              ScalarB * fromB[j];
01350             }
01351       UpdateFlops(4*GlobalLength_*NumVectors_);
01352     }
01353   else if (ScalarB==1.0)
01354     {
01355       for (int i = 0; i < NumVectors_; i++) {
01356               double * const to = Pointers_[i];
01357               const double * const fromA = A_Pointers[i];
01358               const double * const fromB = B_Pointers[i];
01359 #ifdef Epetra_HAVE_OMP
01360 #pragma omp parallel for default(none) shared(ScalarA,ScalarThis)
01361 #endif
01362         for (int j = 0; j < MyLength; j++) to[j] =  ScalarThis *    to[j] +
01363                              ScalarA * fromA[j] +
01364                                        fromB[j];
01365             }
01366       UpdateFlops(4*GlobalLength_*NumVectors_);
01367     }
01368   else
01369     {
01370       for (int i = 0; i < NumVectors_; i++) {
01371               double * const to = Pointers_[i];
01372               const double * const fromA = A_Pointers[i];
01373               const double * const fromB = B_Pointers[i];
01374 #ifdef Epetra_HAVE_OMP
01375 #pragma omp parallel for default(none) shared(ScalarA,ScalarB,ScalarThis)
01376 #endif
01377         for (int j = 0; j < MyLength; j++) to[j] =  ScalarThis *    to[j] +
01378                              ScalarA * fromA[j] + 
01379                              ScalarB * fromB[j];
01380             }
01381       UpdateFlops(5*GlobalLength_*NumVectors_);
01382     }
01383       }
01384 
01385 
01386     return(0);
01387   }
01388 
01389 //=========================================================================
01390 int  Epetra_MultiVector::Norm1 (double* Result) const {
01391   
01392   // 1-norm of each vector in MultiVector 
01393     
01394   if (!Map().UniqueGIDs()) {EPETRA_CHK_ERR(-1);}
01395 
01396   const int MyLength = MyLength_;
01397   UpdateDoubleTemp();
01398 #ifdef Epetra_HAVE_OMP
01399   for (int i=0; i < NumVectors_; i++) 
01400     {
01401       const double * const from = Pointers_[i];
01402       double asum = 0.0;
01403 #pragma omp parallel default(none) shared(asum)
01404 {
01405       double localasum = 0.0;
01406 #pragma omp for 
01407       for (int j=0; j< MyLength; j++) localasum += std::abs(from[j]); 
01408 #pragma omp critical
01409       asum += localasum;
01410 }
01411       DoubleTemp_[i] = asum;
01412   }
01413 #else
01414 
01415   for (int i=0; i < NumVectors_; i++) DoubleTemp_[i] = ASUM(MyLength, Pointers_[i]);
01416 #endif
01417   
01418   if (DistributedGlobal())
01419     Comm_->SumAll(DoubleTemp_, Result, NumVectors_);
01420   else
01421     for (int i=0; i< NumVectors_; ++i) Result[i] = DoubleTemp_[i];
01422   
01423   UpdateFlops(2*GlobalLength_*NumVectors_);
01424 
01425   return(0);
01426 }
01427 
01428 //=========================================================================
01429 int  Epetra_MultiVector::Norm2 (double* Result) const {
01430   
01431   // 2-norm of each vector in MultiVector 
01432   
01433 
01434   if (!Map().UniqueGIDs()) {EPETRA_CHK_ERR(-1);}
01435 
01436   const int MyLength = MyLength_;
01437   UpdateDoubleTemp();
01438 
01439   for (int i=0; i < NumVectors_; i++) 
01440     {
01441       const double * const from = Pointers_[i];
01442       double sum = 0.0;
01443 #ifdef Epetra_HAVE_OMP
01444 #pragma omp parallel for reduction (+:sum) default(none)
01445 #endif
01446       for (int j=0; j < MyLength; j++) sum += from[j] * from[j];
01447       DoubleTemp_[i] = sum;
01448     }
01449   if (DistributedGlobal())
01450     Comm_->SumAll(DoubleTemp_, Result, NumVectors_);
01451   else
01452     for (int i=0; i< NumVectors_; ++i) Result[i] = DoubleTemp_[i];
01453 
01454   for (int i=0; i < NumVectors_; i++) Result[i] = std::sqrt(Result[i]);
01455   
01456   UpdateFlops(2*GlobalLength_*NumVectors_);
01457   
01458   return(0);
01459 }
01460 
01461 //=========================================================================
01462 int  Epetra_MultiVector::NormInf (double* Result) const {
01463   
01464   // Inf-norm of each vector in MultiVector 
01465     
01466 
01467   const int MyLength = MyLength_;
01468   UpdateDoubleTemp();
01469 
01470   for (int i=0; i < NumVectors_; i++) 
01471     {
01472       DoubleTemp_[i] = 0.0;
01473       double normval = 0.0;
01474       const double * const from = Pointers_[i];
01475       if (MyLength>0) normval = from[0];
01476 #ifdef Epetra_HAVE_OMP
01477 #pragma omp parallel default(none) shared(normval)
01478 {
01479       double localnormval = 0.0;
01480 #pragma omp for
01481       for (int j=0; j< MyLength; j++) {
01482          localnormval = EPETRA_MAX(localnormval,std::abs(from[j])); 
01483       }
01484 #pragma omp critical
01485       {
01486       normval = EPETRA_MAX(normval,localnormval);
01487       }
01488 }
01489       DoubleTemp_[i] = normval;
01490 #else
01491       int jj = IAMAX(MyLength, Pointers_[i]);
01492       if (jj>-1) DoubleTemp_[i] = std::abs(Pointers_[i][jj]);
01493 #endif
01494     }
01495   if (DistributedGlobal())
01496     Comm_->MaxAll(DoubleTemp_, Result, NumVectors_);
01497   else
01498     for (int i=0; i< NumVectors_; ++i) Result[i] = DoubleTemp_[i];
01499   
01500   // UpdateFlops(0);  Strictly speaking there are not FLOPS in this routine  
01501   return(0);
01502 }
01503 
01504 //=========================================================================
01505 int  Epetra_MultiVector::NormWeighted (const Epetra_MultiVector& Weights, double* Result) const {
01506   
01507   // Weighted 2-norm of each vector in MultiVector 
01508 
01509   // If only one vector in Weights, we assume it will be used as the weights for all vectors
01510 
01511   const int MyLength = MyLength_;
01512   bool OneW = false;
01513   if (Weights.NumVectors()==1) OneW = true;
01514   else if (NumVectors_ != Weights.NumVectors()) EPETRA_CHK_ERR(-1);
01515 
01516   if (MyLength != Weights.MyLength()) EPETRA_CHK_ERR(-2);
01517 
01518 
01519   UpdateDoubleTemp();
01520 
01521   double *W = Weights.Values();
01522   double **W_Pointers = Weights.Pointers();
01523   
01524   for (int i=0; i < NumVectors_; i++) 
01525     {
01526       if (!OneW) W = W_Pointers[i]; // If Weights has the same number of vectors as this, use each weight vector
01527       double sum = 0.0;
01528       const double * const from = Pointers_[i];
01529 #ifdef Epetra_HAVE_OMP
01530 #pragma omp parallel for reduction (+:sum) default(none) shared(W)
01531 #endif
01532       for (int j=0; j < MyLength; j++) {
01533         double tmp = from[j]/W[j];
01534         sum += tmp * tmp;
01535       }
01536       DoubleTemp_[i] = sum;
01537     }
01538   double OneOverN;
01539   if (DistributedGlobal()) {
01540     Comm_->SumAll(DoubleTemp_, Result, NumVectors_);
01541     OneOverN = 1.0 / (double) GlobalLength_;
01542   }
01543   else {
01544     for (int i=0; i< NumVectors_; ++i) Result[i] = DoubleTemp_[i];
01545     OneOverN = 1.0 / (double) MyLength;
01546   }
01547   for (int i=0; i < NumVectors_; i++) Result[i] = std::sqrt(Result[i]*OneOverN);
01548   
01549   UpdateFlops(3*GlobalLength_*NumVectors_);
01550   
01551   return(0);
01552   }
01553 
01554 //=========================================================================
01555 int  Epetra_MultiVector::MinValue (double* Result) const {
01556   
01557   // Minimum value of each vector in MultiVector 
01558   
01559   int ierr = 0;
01560 
01561   const int MyLength = MyLength_;
01562   UpdateDoubleTemp();
01563 
01564   for (int i=0; i < NumVectors_; i++) 
01565     {
01566       const double * const from = Pointers_[i];
01567       double MinVal = Epetra_MaxDouble;
01568       if (MyLength>0) MinVal = from[0];
01569 #ifdef Epetra_HAVE_OMP
01570 #pragma omp parallel default(none) shared(MinVal)
01571 {
01572       double localMinVal = MinVal;
01573 #pragma omp for
01574       for (int j=0; j< MyLength; j++) localMinVal = EPETRA_MIN(localMinVal,from[j]); 
01575 #pragma omp critical 
01576       {
01577       MinVal = EPETRA_MIN(MinVal,localMinVal);
01578       }
01579 }
01580       DoubleTemp_[i] = MinVal;
01581 #else
01582       for (int j=0; j< MyLength; j++) MinVal = EPETRA_MIN(MinVal,from[j]); 
01583       DoubleTemp_[i] = MinVal;
01584 #endif
01585     }
01586 
01587   if (MyLength > 0) {
01588     for(int i=0; i<NumVectors_; ++i) {
01589       Result[i] = DoubleTemp_[i];
01590     }
01591   }
01592 
01593   //If MyLength == 0 and Comm_->NumProc() == 1, then Result has
01594   //not been referenced. Also, if vector contents are uninitialized
01595   //then Result contents are not well defined...
01596 
01597   if (Comm_->NumProc() == 1 || !DistributedGlobal()) return(ierr);
01598 
01599   //We're going to use MPI_Allgather to gather every proc's local-
01600   //min values onto every other proc. We'll use the last position
01601   //of the DoubleTemp_ array to indicate whether this proc has
01602   //valid data that should be considered by other procs when forming
01603   //the global-min results.
01604 
01605   if (MyLength == 0) DoubleTemp_[NumVectors_] = 0.0;
01606   else DoubleTemp_[NumVectors_] = 1.0;
01607 
01608   //Now proceed to handle the parallel case. We'll gather local-min
01609   //values from other procs and form a global-min. If any processor
01610   //has MyLength>0, we'll end up with a valid result.
01611 
01612 #ifdef EPETRA_MPI
01613   const Epetra_MpiComm* epetrampicomm =
01614     dynamic_cast<const Epetra_MpiComm*>(Comm_);
01615   if (!epetrampicomm) {
01616     return(-2);
01617   }
01618 
01619   MPI_Comm mpicomm = epetrampicomm->GetMpiComm();
01620   int numProcs = epetrampicomm->NumProc();
01621   double* dwork = new double[numProcs*(NumVectors_+1)];
01622 
01623   MPI_Allgather(DoubleTemp_, NumVectors_+1, MPI_DOUBLE,
01624                 dwork, NumVectors_+1, MPI_DOUBLE, mpicomm);
01625 
01626   //if MyLength==0, then our Result array currently contains
01627   //Epetra_MaxDouble from the local-min calculations above. In this
01628   //case we'll overwrite our Result array with values from the first
01629   //processor that sent valid data.
01630   bool overwrite = MyLength == 0 ? true : false;
01631 
01632   int myPID = epetrampicomm->MyPID();
01633   double* dwork_ptr = dwork;
01634 
01635   for(int j=0; j<numProcs; ++j) {
01636 
01637     //skip data from self, and skip data from
01638     //procs with DoubleTemp_[NumVectors_] == 0.0.
01639     if (j == myPID || dwork_ptr[NumVectors_] < 0.5) {
01640       dwork_ptr += NumVectors_+1;
01641       continue;
01642     }
01643 
01644     for(int i=0; i<NumVectors_; ++i) {
01645       double val = dwork_ptr[i];
01646 
01647       //Set val into our Result array if overwrite is true (see above),
01648       //or if val is less than our current Result[i].
01649       if (overwrite || (Result[i] > val)) Result[i] = val;
01650     }
01651 
01652     //Now set overwrite to false so that we'll do the right thing
01653     //when processing data from subsequent processors.
01654     if (overwrite) overwrite = false;
01655 
01656     dwork_ptr += NumVectors_+1;
01657   }
01658 
01659   delete [] dwork;
01660 #endif
01661 
01662   // UpdateFlops(0);  Strictly speaking there are not FLOPS in this routine
01663   
01664   return(ierr);
01665 }
01666 
01667 //=========================================================================
01668 int  Epetra_MultiVector::MaxValue (double* Result) const {
01669   
01670   // Maximum value of each vector in MultiVector 
01671   
01672   int ierr = 0;
01673 
01674   const int MyLength = MyLength_;
01675   UpdateDoubleTemp();
01676 
01677   for (int i=0; i < NumVectors_; i++) 
01678     {
01679       const double * const from = Pointers_[i];
01680       double MaxVal = -Epetra_MaxDouble;
01681       if (MyLength>0) MaxVal = from[0];
01682 #ifdef Epetra_HAVE_OMP
01683 #pragma omp parallel default(none) shared(MaxVal)
01684 {
01685       double localMaxVal = MaxVal;
01686 #pragma omp for
01687       for (int j=0; j< MyLength; j++) localMaxVal = EPETRA_MAX(localMaxVal,from[j]); 
01688 #pragma omp critical 
01689       {
01690       MaxVal = EPETRA_MAX(MaxVal,localMaxVal);
01691       }
01692 }
01693       DoubleTemp_[i] = MaxVal;
01694 #else
01695       for (int j=0; j< MyLength; j++) MaxVal = EPETRA_MAX(MaxVal,from[j]); 
01696       DoubleTemp_[i] = MaxVal;
01697 #endif
01698     }
01699 
01700   if (MyLength > 0) {
01701     for(int i=0; i<NumVectors_; ++i) {
01702       Result[i] = DoubleTemp_[i];
01703     }
01704   }
01705 
01706   //If MyLength == 0 and Comm_->NumProc() == 1, then Result has
01707   //not been referenced. Also, if vector contents are uninitialized
01708   //then Result contents are not well defined...
01709 
01710   if (Comm_->NumProc() == 1  || !DistributedGlobal()) return(ierr);
01711 
01712   //We're going to use MPI_Allgather to gather every proc's local-
01713   //max values onto every other proc. We'll use the last position
01714   //of the DoubleTemp_ array to indicate whether this proc has
01715   //valid data that should be considered by other procs when forming
01716   //the global-max results.
01717 
01718   if (MyLength == 0) DoubleTemp_[NumVectors_] = 0.0;
01719   else DoubleTemp_[NumVectors_] = 1.0;
01720 
01721   //Now proceed to handle the parallel case. We'll gather local-max
01722   //values from other procs and form a global-max. If any processor
01723   //has MyLength>0, we'll end up with a valid result.
01724 
01725 #ifdef EPETRA_MPI
01726   const Epetra_MpiComm* epetrampicomm =
01727     dynamic_cast<const Epetra_MpiComm*>(Comm_);
01728   if (!epetrampicomm) {
01729     return(-2);
01730   }
01731 
01732   MPI_Comm mpicomm = epetrampicomm->GetMpiComm();
01733   int numProcs = epetrampicomm->NumProc();
01734   double* dwork = new double[numProcs*(NumVectors_+1)];
01735 
01736   MPI_Allgather(DoubleTemp_, NumVectors_+1, MPI_DOUBLE,
01737                 dwork, NumVectors_+1, MPI_DOUBLE, mpicomm);
01738 
01739   //if MyLength==0, then our Result array currently contains
01740   //-Epetra_MaxDouble from the local-max calculations above. In this
01741   //case we'll overwrite our Result array with values from the first
01742   //processor that sent valid data.
01743   bool overwrite = MyLength == 0 ? true : false;
01744 
01745   int myPID = epetrampicomm->MyPID();
01746   double* dwork_ptr = dwork;
01747 
01748   for(int j=0; j<numProcs; ++j) {
01749 
01750     //skip data from self, and skip data from
01751     //procs with DoubleTemp_[NumVectors_] == 0.0.
01752     if (j == myPID || dwork_ptr[NumVectors_] < 0.5) {
01753       dwork_ptr += NumVectors_+1;
01754       continue;
01755     }
01756 
01757     for(int i=0; i<NumVectors_; ++i) {
01758       double val = dwork_ptr[i];
01759 
01760       //Set val into our Result array if overwrite is true (see above),
01761       //or if val is larger than our current Result[i].
01762       if (overwrite || (Result[i] < val)) Result[i] = val; 
01763     }
01764 
01765     //Now set overwrite to false so that we'll do the right thing
01766     //when processing data from subsequent processors.
01767     if (overwrite) overwrite = false;
01768 
01769     dwork_ptr += NumVectors_+1;
01770   }
01771 
01772   delete [] dwork;
01773 #endif
01774 
01775   // UpdateFlops(0);  Strictly speaking there are not FLOPS in this routine
01776   
01777   return(ierr);
01778 }
01779 
01780 //=========================================================================
01781 int  Epetra_MultiVector::MeanValue (double* Result) const {
01782   
01783   // Mean value of each vector in MultiVector 
01784   
01785   const int MyLength = MyLength_;
01786 
01787   if (!Map().UniqueGIDs()) {EPETRA_CHK_ERR(-1);}
01788 
01789   double fGlobalLength = 1.0/EPETRA_MAX((double) GlobalLength_, 1.0);
01790   
01791 
01792   UpdateDoubleTemp();
01793 
01794   for (int i=0; i < NumVectors_; i++) 
01795     {
01796       double sum = 0.0;
01797       const double * const from = Pointers_[i];
01798 #ifdef Epetra_HAVE_OMP
01799 #pragma omp parallel for reduction (+:sum) default(none)
01800 #endif
01801       for (int j=0; j < MyLength; j++) sum += from[j];
01802       DoubleTemp_[i] = sum;
01803     }
01804   if (DistributedGlobal())
01805     Comm_->SumAll(DoubleTemp_, Result, NumVectors_);
01806   else
01807     for (int i=0; i< NumVectors_; ++i) Result[i] = DoubleTemp_[i];
01808 
01809   for (int i=0; i < NumVectors_; i++) Result[i] = Result[i]*fGlobalLength;
01810   
01811   UpdateFlops(GlobalLength_*NumVectors_);
01812 
01813   return(0);
01814 }
01815 
01816   //=========================================================================
01817   int  Epetra_MultiVector::Multiply (char TransA, char TransB, double ScalarAB, 
01818           const Epetra_MultiVector& A, 
01819           const Epetra_MultiVector& B,
01820           double ScalarThis ) {
01821 
01822     // This routine performs a variety of matrix-matrix multiply operations, interpreting
01823     // the Epetra_MultiVector (this-aka C , A and B) as 2D matrices.  Variations are due to
01824     // the fact that A, B and C can be local replicated or global distributed
01825     // Epetra_MultiVectors and that we may or may not operate with the transpose of 
01826     // A and B.  Possible cases are:
01827 
01828     //                                       Num
01829     //     OPERATIONS                        case  Notes
01830     // 1) C(local) = A^X(local) * B^X(local)  4   (X=Trans or Not, No comm needed) 
01831     // 2) C(local) = A^T(distr) * B  (distr)  1   (2D dot product, replicate C)
01832     // 3) C(distr) = A  (distr) * B^X(local)  2   (2D vector update, no comm needed)
01833 
01834     // Note that the following operations are not meaningful for 
01835     // 1D distributions:
01836 
01837     // 1) C(local) = A^T(distr) * B^T(distr)  1
01838     // 2) C(local) = A  (distr) * B^X(distr)  2
01839     // 3) C(distr) = A^X(local) * B^X(local)  4
01840     // 4) C(distr) = A^X(local) * B^X(distr)  4
01841     // 5) C(distr) = A^T(distr) * B^X(local)  2
01842     // 6) C(local) = A^X(distr) * B^X(local)  4
01843     // 7) C(distr) = A^X(distr) * B^X(local)  4
01844     // 8) C(local) = A^X(local) * B^X(distr)  4
01845 
01846     // Total of 32 case (2^5).
01847 
01848     
01849     //if (!ConstantStride_    ||
01852 
01853     // Check for compatible dimensions
01854 
01855     int A_nrows = (TransA=='T') ? A.NumVectors() : A.MyLength();
01856     int A_ncols = (TransA=='T') ? A.MyLength() : A.NumVectors();
01857     int B_nrows = (TransB=='T') ? B.NumVectors() : B.MyLength();
01858     int B_ncols = (TransB=='T') ? B.MyLength() : B.NumVectors();
01859 
01860     double Scalar_local = ScalarThis; // local copy of Scalar
01861   const int MyLength = MyLength_;
01862 
01863     if( MyLength      != A_nrows     ||   // RAB: 2002/01/25: Minor reformat to allow
01864     A_ncols        != B_nrows     ||   //   setting breakpoint at error return.
01865     NumVectors_    != B_ncols  )
01866     EPETRA_CHK_ERR(-2); // Return error
01867 
01868     bool A_is_local = (!A.DistributedGlobal());
01869     bool B_is_local = (!B.DistributedGlobal());
01870     bool C_is_local = (!DistributedGlobal_);
01871     bool Case1 = ( A_is_local &&  B_is_local &&  C_is_local);  // Case 1 above
01872     bool Case2 = (!A_is_local && !B_is_local &&  C_is_local && TransA=='T' );// Case 2
01873     bool Case3 = (!A_is_local &&  B_is_local && !C_is_local && TransA=='N');// Case 3
01874 
01875     if (Case2 && (!A.Map().UniqueGIDs() || !B.Map().UniqueGIDs())) {EPETRA_CHK_ERR(-4);}
01876     if (Case3 && (!A.Map().UniqueGIDs() || !Map().UniqueGIDs())) {EPETRA_CHK_ERR(-5);}
01877   
01878     // Test for meaningful cases
01879 
01880     if (Case1 || Case2 || Case3)
01881       {
01882   if (ScalarThis!=0.0 && Case2)
01883     {
01884       const int MyPID = Comm_->MyPID();
01885       if (MyPID!=0) Scalar_local = 0.0;
01886     }
01887 
01888         // Check if A, B, C have constant stride, if not then make temp copy (strided)
01889 
01890         Epetra_MultiVector * A_tmp, * B_tmp, *C_tmp;
01891         if (!ConstantStride_) C_tmp = new Epetra_MultiVector(*this);
01892         else C_tmp = this;
01893           
01894         if (!A.ConstantStride()) A_tmp = new Epetra_MultiVector(A);
01895         else A_tmp = (Epetra_MultiVector *) &A;
01896     
01897         if (!B.ConstantStride()) B_tmp = new Epetra_MultiVector(B);
01898         else B_tmp = (Epetra_MultiVector *) &B;
01899       
01900     
01901   int m = MyLength;
01902   int n = NumVectors_;
01903   int k = A_ncols;
01904   int lda = EPETRA_MAX(A_tmp->Stride(),1); // The reference BLAS implementation requires lda, ldb, ldc > 0 even if m, n, or k = 0
01905   int ldb = EPETRA_MAX(B_tmp->Stride(),1);
01906   int ldc = EPETRA_MAX(C_tmp->Stride(),1);
01907   double *Ap = A_tmp->Values();
01908   double *Bp = B_tmp->Values();
01909   double *Cp = C_tmp->Values();
01910    
01911   GEMM(TransA, TransB,  m, n, k, ScalarAB,
01912        Ap, lda, Bp, ldb, Scalar_local, Cp, ldc);
01913       
01914   // FLOP Counts
01915   //                                       Num
01916   //     OPERATIONS                        case  Notes 
01917   // 1) C(local) = A^X(local) * B^X(local)  4   (X=Trans or Not, No comm needed)
01918   // 2) C(local) = A^T(distr) * B  (distr)  1   (2D dot product, replicate C)      
01919   // 3) C(distr) = A  (distr) * B^X(local)  2   (2D vector update, no comm needed)
01920 
01921   // For Case 1 we only count the local operations, since we are interested in serial
01922   // cost.  Computation on other processors is redundant.
01923   if (Case1)
01924     {     
01925       UpdateFlops(2*m*n*k);
01926       if (ScalarAB!=1.0) UpdateFlops(m*n);
01927       if (ScalarThis==1.0) UpdateFlops(m*n); else if (ScalarThis!=0.0) UpdateFlops(2*m*n);
01928     }
01929   else if (Case2)
01930     {     
01931       UpdateFlops(2*m*n*A.GlobalLength());
01932       if (ScalarAB!=1.0) UpdateFlops(m*n);
01933       if (ScalarThis==1.0) UpdateFlops(m*n); else if (ScalarThis!=0.0) UpdateFlops(2*m*n);
01934     }
01935   else
01936     {
01937       UpdateFlops(2*GlobalLength_*n*k);
01938       if (ScalarAB!=1.0) UpdateFlops(GlobalLength_*n);
01939       if (ScalarThis==1.0) UpdateFlops(GlobalLength_*n);
01940       else if (ScalarThis!=0.0) UpdateFlops(2*GlobalLength_*n);
01941     }
01942 
01943   // If A or B were not strided, dispose of extra copies.
01944   if (!A.ConstantStride()) delete A_tmp;
01945   if (!B.ConstantStride()) delete B_tmp;
01946 
01947   // If C was not strided, copy from strided version and delete
01948   if (!ConstantStride_) 
01949     {
01950       C_tmp->ExtractCopy(Pointers_);
01951       delete C_tmp;
01952     }
01953 
01954   // If Case 2 then sum up C and distribute it to all processors.
01955 
01956   if (Case2) {EPETRA_CHK_ERR(Reduce());}
01957 
01958   return(0);
01959 
01960       }
01961     else {EPETRA_CHK_ERR(-3)}; // Return error: not a supported operation
01962 
01963   return(0);
01964   }
01965 
01966 
01967 //=========================================================================
01968 int Epetra_MultiVector::Multiply(double ScalarAB, const Epetra_MultiVector& A, const Epetra_MultiVector& B,
01969            double ScalarThis) {
01970   
01971   
01972   // Hadamard product of two MultiVectors: 
01973   // this = ScalarThis * this + ScalarAB * A * B (element-wise)
01974   
01975   if (ScalarAB==0.0) {
01976     EPETRA_CHK_ERR(Scale(ScalarThis));
01977     return(0);
01978   }
01979   const int MyLength = MyLength_;
01980          
01981   if (A.NumVectors() != 1 && A.NumVectors() != B.NumVectors()) EPETRA_CHK_ERR(-1); // A must have one column or be the same as B.
01982   if (NumVectors_ != B.NumVectors()) EPETRA_CHK_ERR(-2);
01983   if (MyLength != A.MyLength() || MyLength != B.MyLength()) EPETRA_CHK_ERR(-3);
01984   
01985   double **A_Pointers = (double**)A.Pointers();
01986   double **B_Pointers = (double**)B.Pointers();
01987 
01988   int IncA = 1;
01989   if (A.NumVectors() == 1 ) IncA = 0;
01990 
01991     if (ScalarThis==0.0) {
01992       if (ScalarAB==1.0)
01993   {
01994     for (int i = 0; i < NumVectors_; i++) {
01995       const double * const Aptr = A_Pointers[i*IncA];
01996             const double * const Bptr = B_Pointers[i];
01997             double * const to = Pointers_[i];
01998 #ifdef Epetra_HAVE_OMP
01999 #pragma omp parallel for default(none)
02000 #endif
02001       for (int j = 0; j < MyLength; j++) {
02002               to[j] =  Aptr[j] * Bptr[j];
02003             }
02004     }
02005     UpdateFlops(GlobalLength_*NumVectors_);
02006   }
02007       else
02008   {
02009     for (int i = 0; i < NumVectors_; i++) {
02010       const double * const Aptr = A_Pointers[i*IncA];
02011             const double * const Bptr = B_Pointers[i];
02012             double * const to = Pointers_[i];
02013 #ifdef Epetra_HAVE_OMP
02014 #pragma omp parallel for default(none) shared(ScalarAB)
02015 #endif
02016       for (int j = 0; j < MyLength; j++) {
02017               to[j] = ScalarAB * Aptr[j] * Bptr[j];
02018             }
02019     }
02020     UpdateFlops(2*GlobalLength_*NumVectors_);
02021   }
02022     }
02023     else if (ScalarThis==1.0) {
02024       if (ScalarAB==1.0)
02025   {
02026     for (int i = 0; i < NumVectors_; i++) {
02027       const double * const Aptr = A_Pointers[i*IncA];
02028             const double * const Bptr = B_Pointers[i];
02029             double * const to = Pointers_[i];
02030 #ifdef Epetra_HAVE_OMP
02031 #pragma omp parallel for default(none)
02032 #endif
02033       for (int j = 0; j < MyLength; j++) {
02034               to[j] +=  Aptr[j] * Bptr[j];
02035             }
02036     }
02037     UpdateFlops(2*GlobalLength_*NumVectors_);
02038   }
02039       else {
02040     for (int i = 0; i < NumVectors_; i++) {
02041       const double * const Aptr = A_Pointers[i*IncA];
02042             const double * const Bptr = B_Pointers[i];
02043             double * const to = Pointers_[i];
02044 #ifdef Epetra_HAVE_OMP
02045 #pragma omp parallel for default(none) shared(ScalarAB)
02046 #endif
02047       for (int j = 0; j < MyLength; j++) {
02048               to[j] += ScalarAB * Aptr[j] * Bptr[j];
02049             }
02050           }
02051           UpdateFlops(3*GlobalLength_*NumVectors_);
02052       }
02053     }
02054     else { // if (ScalarThis!=1.0 && ScalarThis !=0 ) 
02055       if (ScalarAB==1.0)
02056   {
02057     for (int i = 0; i < NumVectors_; i++) {
02058       const double * const Aptr = A_Pointers[i*IncA];
02059             const double * const Bptr = B_Pointers[i];
02060             double * const to = Pointers_[i];
02061 #ifdef Epetra_HAVE_OMP
02062 #pragma omp parallel for default(none) shared(ScalarThis)
02063 #endif
02064       for (int j = 0; j < MyLength; j++) {
02065               to[j] =  ScalarThis * to[j] +
02066                   Aptr[j] * Bptr[j];
02067             }
02068     }
02069     UpdateFlops(3*GlobalLength_*NumVectors_);
02070   }
02071       else
02072   {
02073     for (int i = 0; i < NumVectors_; i++) {
02074       const double * const Aptr = A_Pointers[i*IncA];
02075             const double * const Bptr = B_Pointers[i];
02076             double * const to = Pointers_[i];
02077 #ifdef Epetra_HAVE_OMP
02078 #pragma omp parallel for default(none) shared(ScalarThis,ScalarAB)
02079 #endif
02080       for (int j = 0; j < MyLength; j++) {
02081               to[j] = ScalarThis * to[j] +
02082                      ScalarAB * Aptr[j] * Bptr[j];
02083             }
02084     }
02085     UpdateFlops(4*GlobalLength_*NumVectors_);
02086   }
02087     }
02088   return(0);
02089 }
02090 //=========================================================================
02091 int Epetra_MultiVector::ReciprocalMultiply(double ScalarAB, const Epetra_MultiVector& A, const Epetra_MultiVector& B,
02092            double ScalarThis) {
02093   
02094   
02095   // Hadamard product of two MultiVectors: 
02096   // this = ScalarThis * this + ScalarAB * B / A (element-wise)
02097   
02098   if (ScalarAB==0.0) {
02099     EPETRA_CHK_ERR(Scale(ScalarThis));
02100     return(0);
02101   }
02102   const int MyLength = MyLength_;
02103          
02104   if (A.NumVectors() != 1 && A.NumVectors() != B.NumVectors()) EPETRA_CHK_ERR(-1); // A must have one column or be the same as B.
02105   if (NumVectors_ != B.NumVectors()) EPETRA_CHK_ERR(-2);
02106   if (MyLength != A.MyLength() || MyLength != B.MyLength()) EPETRA_CHK_ERR(-3);
02107   
02108   double **A_Pointers = (double**)A.Pointers();
02109   double **B_Pointers = (double**)B.Pointers();
02110 
02111   int IncA = 1;
02112   if (A.NumVectors() == 1 ) IncA = 0;
02113 
02114     if (ScalarThis==0.0) {
02115       if (ScalarAB==1.0)
02116   {
02117     for (int i = 0; i < NumVectors_; i++) {
02118       const double * const Aptr = A_Pointers[i*IncA];
02119             const double * const Bptr = B_Pointers[i];
02120             double * const to = Pointers_[i];
02121 #ifdef Epetra_HAVE_OMP
02122 #pragma omp parallel for default(none)
02123 #endif
02124       for (int j = 0; j < MyLength; j++) {
02125               to[j] = Bptr[j] / Aptr[j];
02126             }
02127     }
02128     UpdateFlops(GlobalLength_*NumVectors_);
02129   }
02130       else
02131   {
02132     for (int i = 0; i < NumVectors_; i++) {
02133       const double * const Aptr = A_Pointers[i*IncA];
02134             const double * const Bptr = B_Pointers[i];
02135             double * const to = Pointers_[i];
02136 #ifdef Epetra_HAVE_OMP
02137 #pragma omp parallel for default(none) shared(ScalarAB)
02138 #endif
02139       for (int j = 0; j < MyLength; j++) {
02140               to[j] = ScalarAB * Bptr[j] / Aptr[j];
02141             }
02142     }
02143     UpdateFlops(2*GlobalLength_*NumVectors_);
02144   }
02145     }
02146     else if (ScalarThis==1.0) {
02147       if (ScalarAB==1.0)
02148   {
02149     for (int i = 0; i < NumVectors_; i++) {
02150       const double * const Aptr = A_Pointers[i*IncA];
02151             const double * const Bptr = B_Pointers[i];
02152             double * const to = Pointers_[i];
02153 #ifdef Epetra_HAVE_OMP
02154 #pragma omp parallel for default(none)
02155 #endif
02156       for (int j = 0; j < MyLength; j++) {
02157               to[j] +=  Bptr[j] / Aptr[j];
02158             }
02159     }
02160     UpdateFlops(2*GlobalLength_*NumVectors_);
02161   }
02162       else
02163   {
02164     for (int i = 0; i < NumVectors_; i++) {
02165       const double * const Aptr = A_Pointers[i*IncA];
02166             const double * const Bptr = B_Pointers[i];
02167             double * const to = Pointers_[i];
02168 #ifdef Epetra_HAVE_OMP
02169 #pragma omp parallel for default(none) shared(ScalarAB)
02170 #endif
02171       for (int j = 0; j < MyLength; j++) {
02172               to[j] += ScalarAB * Bptr[j] / Aptr[j];
02173             }
02174     }
02175     UpdateFlops(3*GlobalLength_*NumVectors_);
02176         }
02177     }
02178     else { // if (ScalarThis!=1.0 && ScalarThis !=0 ) 
02179       if (ScalarAB==1.0)
02180   {
02181     for (int i = 0; i < NumVectors_; i++) {
02182       const double * const Aptr = A_Pointers[i*IncA];
02183             const double * const Bptr = B_Pointers[i];
02184             double * const to = Pointers_[i];
02185 #ifdef Epetra_HAVE_OMP
02186 #pragma omp parallel for default(none) shared(ScalarThis)
02187 #endif
02188       for (int j = 0; j < MyLength; j++) {
02189               to[j] =  ScalarThis * to[j] +
02190                    Bptr[j] / Aptr[j];
02191             }
02192     }
02193     UpdateFlops(3*GlobalLength_*NumVectors_);
02194   }
02195       else {
02196     for (int i = 0; i < NumVectors_; i++) {
02197       const double * const Aptr = A_Pointers[i*IncA];
02198             const double * const Bptr = B_Pointers[i];
02199             double * const to = Pointers_[i];
02200 #ifdef Epetra_HAVE_OMP
02201 #pragma omp parallel for default(none) shared(ScalarAB,ScalarThis)
02202 #endif
02203       for (int j = 0; j < MyLength; j++) {
02204               to[j] = ScalarThis * to[j] + ScalarAB * 
02205                 Bptr[j] / Aptr[j];
02206             }
02207     }
02208           UpdateFlops(4*GlobalLength_*NumVectors_);
02209       }
02210     }
02211   return(0);
02212 }
02213 
02214 
02215 //=======================================================================
02216 Epetra_Vector *& Epetra_MultiVector::operator () (int index)  {
02217   
02218   //  Epetra_MultiVector::operator () --- return non-const reference 
02219   
02220   if (index < 0 || index >=NumVectors_) 
02221     throw ReportError("Vector index = " + toString(index) + "is out of range. Number of Vectors = " + toString(NumVectors_), -1);
02222 
02223   UpdateVectors();
02224 
02225   // Create a new Epetra_Vector that is a view of ith vector, if not already present
02226   if (Vectors_[index]==0)
02227     Vectors_[index] = new Epetra_Vector(View, Map(), Pointers_[index]);
02228   return(Vectors_[index]);
02229 }
02230 
02231 //=======================================================================
02232 const Epetra_Vector *& Epetra_MultiVector::operator () (int index) const {
02233   
02234   //  Epetra_MultiVector::operator () --- return non-const reference 
02235 
02236   if (index < 0 || index >=NumVectors_) 
02237     throw ReportError("Vector index = " + toString(index) + "is out of range. Number of Vectors = " + toString(NumVectors_), -1);
02238 
02239   UpdateVectors();
02240 
02241   if (Vectors_[index]==0)
02242     Vectors_[index] = new Epetra_Vector(View, Map(), Pointers_[index]);
02243 
02244   const Epetra_Vector * & temp = (const Epetra_Vector * &) (Vectors_[index]);
02245   return(temp);
02246 }
02247 
02248 //========================================================================
02249 Epetra_MultiVector& Epetra_MultiVector::operator = (const Epetra_MultiVector& Source) {
02250   
02251   // Check for special case of this=Source
02252   if (this != &Source) Assign(Source);
02253   
02254   return(*this);
02255 }
02256 
02257 //=========================================================================
02258 void Epetra_MultiVector::Assign(const Epetra_MultiVector& A) {
02259   
02260   const int MyLength = MyLength_;
02261   if (NumVectors_ != A.NumVectors())
02262     throw ReportError("Number of vectors incompatible in Assign.  The this MultiVector has NumVectors = " + toString(NumVectors_)
02263           + ".  The A MultiVector has NumVectors = " + toString(A.NumVectors()), -3);
02264   if (MyLength != A.MyLength())
02265     throw ReportError("Length of MultiVectors incompatible in Assign.  The this MultiVector has MyLength = " + toString(MyLength)
02266           + ".  The A MultiVector has MyLength = " + toString(A.MyLength()), -4);
02267   
02268   double ** A_Pointers = A.Pointers();
02269   for (int i = 0; i< NumVectors_; i++) {
02270       double * const to = Pointers_[i];
02271       const double * const from = A_Pointers[i];
02272 #ifdef Epetra_HAVE_OMP
02273 #pragma omp parallel for default(none)
02274 #endif
02275       for (int j=0; j<MyLength; j++) to[j] = from[j];
02276     }
02277     return;    
02278   }
02279 
02280   //=========================================================================
02281   int  Epetra_MultiVector::Reduce() {
02282 
02283     // Global reduction on each entry of a Replicated Local MultiVector
02284   const int MyLength = MyLength_;
02285     double * source = 0;
02286       if (MyLength>0) source = new double[MyLength*NumVectors_];
02287     double * target = 0;
02288     bool packed = (ConstantStride_ && (Stride_==MyLength));
02289     if (packed) {
02290        for (int i=0; i<MyLength*NumVectors_; i++) source[i] = Values_[i];
02291        target = Values_;
02292     }
02293     else {
02294        double * tmp1 = source;
02295        for (int i = 0; i < NumVectors_; i++) {
02296          double * tmp2 = Pointers_[i];
02297          for (int j=0; j< MyLength; j++) *tmp1++ = *tmp2++;
02298        }
02299        if (MyLength>0) target = new double[MyLength*NumVectors_];
02300     }
02301 
02302     Comm_->SumAll(source, target, MyLength*NumVectors_);
02303     if (MyLength>0) delete [] source;
02304     if (!packed) {
02305        double * tmp2 = target;
02306        for (int i = 0; i < NumVectors_; i++) {
02307          double * tmp1 = Pointers_[i];
02308          for (int j=0; j< MyLength; j++) *tmp1++ = *tmp2++;
02309        }
02310        if (MyLength>0) delete [] target;
02311     }
02312     // UpdateFlops(0);  No serial Flops in this function
02313     return(0);
02314   }
02315 //=======================================================================
02316 int Epetra_MultiVector::ResetView(double ** ArrayOfPointers) {
02317   
02318   if (!UserAllocated_) {
02319     EPETRA_CHK_ERR(-1); // Can't reset view if multivector was not allocated as a view
02320   }
02321 
02322   for (int i = 0; i< NumVectors_; i++) Pointers_[i] = ArrayOfPointers[i];
02323   DoView();
02324   
02325   return(0);
02326   }
02327 //=======================================================================
02328 void Epetra_MultiVector::Print(ostream& os) const {
02329   int MyPID = Map().Comm().MyPID();
02330   int NumProc = Map().Comm().NumProc();
02331   
02332   for (int iproc=0; iproc < NumProc; iproc++) {
02333     if (MyPID==iproc) {
02334       int NumVectors1 = NumVectors();
02335       int NumMyElements1 =Map(). NumMyElements();
02336       int MaxElementSize1 = Map().MaxElementSize();
02337       int * MyGlobalElements1 = Map().MyGlobalElements();
02338       int * FirstPointInElementList1 = NULL;
02339       if (MaxElementSize1!=1) FirstPointInElementList1 = Map().FirstPointInElementList();
02340       double ** A_Pointers = Pointers();
02341 
02342       if (MyPID==0) {
02343   os.width(8);
02344   os <<  "     MyPID"; os << "    ";
02345   os.width(12);
02346   if (MaxElementSize1==1)
02347     os <<  "GID  ";
02348   else
02349     os <<  "     GID/Point";
02350   for (int j = 0; j < NumVectors1 ; j++)
02351     {   
02352       os.width(20);
02353       os <<  "Value  ";
02354     }
02355   os << endl;
02356       }
02357       for (int i=0; i < NumMyElements1; i++) {
02358   for (int ii=0; ii< Map().ElementSize(i); ii++) {
02359        int iii;
02360     os.width(10);
02361     os <<  MyPID; os << "    ";
02362     os.width(10);
02363     if (MaxElementSize1==1) {
02364       os << MyGlobalElements1[i] << "    ";
02365        iii = i;
02366        }
02367     else {
02368       os <<  MyGlobalElements1[i]<< "/" << ii << "    ";
02369          iii = FirstPointInElementList1[i]+ii;
02370        }
02371     for (int j = 0; j < NumVectors1 ; j++)
02372       {   
02373         os.width(20);
02374         os <<  A_Pointers[j][iii];
02375       }
02376     os << endl;
02377   }
02378       }
02379       os << flush; 
02380     }
02381 
02382     // Do a few global ops to give I/O a chance to complete
02383     Map().Comm().Barrier();
02384     Map().Comm().Barrier();
02385     Map().Comm().Barrier();
02386   }
02387   return;
02388 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines