Epetra Package Browser (Single Doxygen Collection) Development
test/BasicPerfTest_LL/cxx_main.cpp
Go to the documentation of this file.
00001 //@HEADER
00002 // ************************************************************************
00003 // 
00004 //               Epetra: Linear Algebra Services Package 
00005 //                 Copyright 2011 Sandia Corporation
00006 // 
00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00008 // the U.S. Government retains certain rights in this software.
00009 //
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00038 // 
00039 // ************************************************************************
00040 //@HEADER
00041 
00042 
00043 #define EPETRA_HAVE_JADMATRIX
00044 #define EPETRA_VERY_SHORT_PERFTEST
00045 #define EPETRA_HAVE_STATICPROFILE
00046 #include "Epetra_Map.h"
00047 #include "Epetra_LocalMap.h"
00048 #include "Epetra_BlockMap.h"
00049 #include "Epetra_Time.h"
00050 #include "Epetra_CrsMatrix.h"
00051 #include "Epetra_VbrMatrix.h"
00052 #include "Epetra_Vector.h"
00053 #include "Epetra_IntVector.h"
00054 #include "Epetra_MultiVector.h"
00055 #include "Epetra_IntSerialDenseVector.h"
00056 #include "Epetra_SerialDenseVector.h"
00057 #include "Epetra_Flops.h"
00058 #ifdef EPETRA_MPI
00059 #include "Epetra_MpiComm.h"
00060 #include "mpi.h"
00061 #else
00062 #include "Epetra_SerialComm.h"
00063 #endif
00064 #include "../epetra_test_err.h"
00065 #include "Epetra_Version.h"
00066 #ifdef EPETRA_HAVE_JADMATRIX
00067 #include "Epetra_JadMatrix.h"
00068 #endif
00069 
00070 // prototypes
00071 
00072 void GenerateCrsProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints, 
00073       int * xoff, int * yoff,
00074       const Epetra_Comm  &comm, bool verbose, bool summary, 
00075       Epetra_Map *& map, 
00076       Epetra_CrsMatrix *& A, 
00077       Epetra_Vector *& b, 
00078       Epetra_Vector *& bt,
00079       Epetra_Vector *&xexact, bool StaticProfile, bool MakeLocalOnly);
00080 
00081 void GenerateCrsProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints, 
00082       int * xoff, int * yoff, int nrhs,
00083       const Epetra_Comm  &comm, bool verbose, bool summary, 
00084       Epetra_Map *& map, 
00085       Epetra_CrsMatrix *& A, 
00086       Epetra_MultiVector *& b, 
00087       Epetra_MultiVector *& bt,
00088       Epetra_MultiVector *&xexact, bool StaticProfile, bool MakeLocalOnly);
00089  
00090 void GenerateVbrProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints, 
00091       int * xoff, int * yoff,
00092       int nsizes, int * sizes,
00093       const Epetra_Comm  &comm, bool verbose, bool summary, 
00094       Epetra_BlockMap *& map, 
00095       Epetra_VbrMatrix *& A, 
00096       Epetra_Vector *& b, 
00097       Epetra_Vector *& bt,
00098       Epetra_Vector *&xexact, bool StaticProfile, bool MakeLocalOnly);
00099 
00100 void GenerateVbrProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints, 
00101       int * xoff, int * yoff, 
00102       int nsizes, int * sizes, int nrhs,
00103       const Epetra_Comm  &comm, bool verbose, bool summary, 
00104       Epetra_BlockMap *& map, 
00105       Epetra_VbrMatrix *& A, 
00106       Epetra_MultiVector *& b, 
00107       Epetra_MultiVector *& bt,
00108       Epetra_MultiVector *&xexact, bool StaticProfile, bool MakeLocalOnly);
00109 
00110 void GenerateMyGlobalElements(int numNodesX, int numNodesY, int numProcsX, int numProcs,
00111             int myPID, long long * & myGlobalElements);
00112 
00113 void runMatrixTests(Epetra_CrsMatrix * A,  Epetra_MultiVector * b, Epetra_MultiVector * bt,
00114         Epetra_MultiVector * xexact, bool StaticProfile, bool verbose, bool summary);
00115 #ifdef EPETRA_HAVE_JADMATRIX
00116 void runJadMatrixTests(Epetra_JadMatrix * A,  Epetra_MultiVector * b, Epetra_MultiVector * bt,
00117         Epetra_MultiVector * xexact, bool StaticProfile, bool verbose, bool summary);
00118 #endif
00119 void runLUMatrixTests(Epetra_CrsMatrix * L,  Epetra_MultiVector * bL, Epetra_MultiVector * btL, Epetra_MultiVector * xexactL, 
00120           Epetra_CrsMatrix * U,  Epetra_MultiVector * bU, Epetra_MultiVector * btU, Epetra_MultiVector * xexactU, 
00121           bool StaticProfile, bool verbose, bool summary);
00122 int main(int argc, char *argv[])
00123 {
00124   int ierr = 0;
00125   double elapsed_time;
00126   double total_flops;
00127   double MFLOPs;
00128     
00129 
00130 #ifdef EPETRA_MPI
00131 
00132   // Initialize MPI
00133   MPI_Init(&argc,&argv);
00134   Epetra_MpiComm comm( MPI_COMM_WORLD );
00135 #else
00136   Epetra_SerialComm comm;
00137 #endif
00138 
00139   bool verbose = false;
00140   bool summary = false;
00141 
00142   // Check if we should print verbose results to standard out
00143   if (argc>6) if (argv[6][0]=='-' && argv[6][1]=='v') verbose = true;
00144 
00145   // Check if we should print verbose results to standard out
00146   if (argc>6) if (argv[6][0]=='-' && argv[6][1]=='s') summary = true;
00147 
00148   if(argc < 6) {
00149     cerr << "Usage: " << argv[0]
00150          << " NumNodesX NumNodesY NumProcX NumProcY NumPoints [-v|-s]" << endl
00151          << "where:" << endl
00152          << "NumNodesX         - Number of mesh nodes in X direction per processor" << endl
00153          << "NumNodesY         - Number of mesh nodes in Y direction per processor" << endl
00154          << "NumProcX          - Number of processors to use in X direction" << endl
00155          << "NumProcY          - Number of processors to use in Y direction" << endl
00156          << "NumPoints         - Number of points to use in stencil (5, 9 or 25 only)" << endl
00157          << "-v|-s             - (Optional) Run in verbose mode if -v present or summary mode if -s present" << endl
00158          << " NOTES: NumProcX*NumProcY must equal the number of processors used to run the problem." << endl << endl
00159    << " Serial example:" << endl
00160          << argv[0] << " 16 12 1 1 25 -v" << endl
00161    << " Run this program in verbose mode on 1 processor using a 16 X 12 grid with a 25 point stencil."<< endl <<endl
00162    << " MPI example:" << endl
00163          << "mpirun -np 32 " << argv[0] << " 10 12 4 8 9 -v" << endl
00164    << " Run this program in verbose mode on 32 processors putting a 10 X 12 subgrid on each processor using 4 processors "<< endl
00165    << " in the X direction and 8 in the Y direction.  Total grid size is 40 points in X and 96 in Y with a 9 point stencil."<< endl
00166          << endl;
00167     return(1);
00168 
00169   }
00170     //char tmp;
00171     //if (comm.MyPID()==0) cout << "Press any key to continue..."<< endl;
00172     //if (comm.MyPID()==0) cin >> tmp;
00173     //comm.Barrier();
00174 
00175   comm.SetTracebackMode(0); // This should shut down any error traceback reporting
00176   if (verbose && comm.MyPID()==0)
00177     cout << Epetra_Version() << endl << endl;
00178   if (summary && comm.MyPID()==0) {
00179     if (comm.NumProc()==1)
00180       cout << Epetra_Version() << endl << endl;
00181     else
00182       cout << endl << endl; // Print two blank line to keep output columns lined up
00183   }
00184 
00185   if (verbose) cout << comm <<endl;
00186 
00187 
00188   // Redefine verbose to only print on PE 0
00189 
00190   if (verbose && comm.MyPID()!=0) verbose = false;
00191   if (summary && comm.MyPID()!=0) summary = false;
00192 
00193   int numNodesX = atoi(argv[1]);
00194   int numNodesY = atoi(argv[2]);
00195   int numProcsX = atoi(argv[3]);
00196   int numProcsY = atoi(argv[4]);
00197   int numPoints = atoi(argv[5]);
00198 
00199   if (verbose || (summary && comm.NumProc()==1)) {
00200     cout << " Number of local nodes in X direction  = " << numNodesX << endl
00201    << " Number of local nodes in Y direction  = " << numNodesY << endl
00202    << " Number of global nodes in X direction = " << numNodesX*numProcsX << endl
00203    << " Number of global nodes in Y direction = " << numNodesY*numProcsY << endl
00204    << " Number of local nonzero entries       = " << numNodesX*numNodesY*numPoints << endl
00205    << " Number of global nonzero entries      = " << numNodesX*numNodesY*numPoints*numProcsX*numProcsY << endl
00206    << " Number of Processors in X direction   = " << numProcsX << endl
00207    << " Number of Processors in Y direction   = " << numProcsY << endl
00208    << " Number of Points in stencil           = " << numPoints << endl << endl;
00209   }
00210   // Print blank line to keep output columns lined up
00211   if (summary && comm.NumProc()>1)
00212     cout << endl << endl << endl << endl << endl << endl << endl << endl<< endl << endl;
00213 
00214   if (numProcsX*numProcsY!=comm.NumProc()) {
00215     cerr << "Number of processors = " << comm.NumProc() << endl
00216    << " is not the product of " << numProcsX << " and " << numProcsY << endl << endl;
00217     return(1);
00218   }
00219 
00220   if (numPoints!=5 && numPoints!=9 && numPoints!=25) {
00221     cerr << "Number of points specified = " << numPoints << endl
00222    << " is not 5, 9, 25" << endl << endl;
00223     return(1);
00224   }
00225 
00226   if (numNodesX*numNodesY<=0) {
00227     cerr << "Product of number of nodes is <= zero" << endl << endl;
00228     return(1);
00229   }
00230 
00231   Epetra_IntSerialDenseVector Xoff, XLoff, XUoff;
00232   Epetra_IntSerialDenseVector Yoff, YLoff, YUoff;
00233   if (numPoints==5) {
00234 
00235      // Generate a 5-point 2D Finite Difference matrix
00236     Xoff.Size(5);
00237     Yoff.Size(5);
00238     Xoff[0] = -1; Xoff[1] = 1; Xoff[2] = 0; Xoff[3] = 0;  Xoff[4] = 0; 
00239     Yoff[0] = 0;  Yoff[1] = 0; Yoff[2] = 0; Yoff[3] = -1; Yoff[4] = 1; 
00240 
00241      // Generate a 2-point 2D Lower triangular Finite Difference matrix
00242     XLoff.Size(2);
00243     YLoff.Size(2);
00244     XLoff[0] = -1; XLoff[1] =  0; 
00245     YLoff[0] =  0; YLoff[1] = -1;
00246 
00247      // Generate a 3-point 2D upper triangular Finite Difference matrix
00248     XUoff.Size(3);
00249     YUoff.Size(3);
00250     XUoff[0] =  0; XUoff[1] =  1; XUoff[2] = 0; 
00251     YUoff[0] =  0; YUoff[1] =  0; YUoff[2] = 1;
00252   }
00253   else if (numPoints==9) {
00254     // Generate a 9-point 2D Finite Difference matrix
00255     Xoff.Size(9);
00256     Yoff.Size(9);
00257     Xoff[0] = -1;  Xoff[1] =  0; Xoff[2] =  1; 
00258     Yoff[0] = -1;  Yoff[1] = -1; Yoff[2] = -1; 
00259     Xoff[3] = -1;  Xoff[4] =  0; Xoff[5] =  1; 
00260     Yoff[3] =  0;  Yoff[4] =  0; Yoff[5] =  0; 
00261     Xoff[6] = -1;  Xoff[7] =  0; Xoff[8] =  1; 
00262     Yoff[6] =  1;  Yoff[7] =  1; Yoff[8] =  1; 
00263 
00264     // Generate a 5-point lower triangular 2D Finite Difference matrix
00265     XLoff.Size(5);
00266     YLoff.Size(5);
00267     XLoff[0] = -1;  XLoff[1] =  0; Xoff[2] =  1; 
00268     YLoff[0] = -1;  YLoff[1] = -1; Yoff[2] = -1; 
00269     XLoff[3] = -1;  XLoff[4] =  0; 
00270     YLoff[3] =  0;  YLoff[4] =  0;
00271 
00272     // Generate a 4-point upper triangular 2D Finite Difference matrix
00273     XUoff.Size(4);
00274     YUoff.Size(4);
00275     XUoff[0] =  1; 
00276     YUoff[0] =  0; 
00277     XUoff[1] = -1;  XUoff[2] =  0; XUoff[3] =  1; 
00278     YUoff[1] =  1;  YUoff[2] =  1; YUoff[3] =  1; 
00279 
00280   }
00281   else {
00282     // Generate a 25-point 2D Finite Difference matrix
00283     Xoff.Size(25);
00284     Yoff.Size(25);
00285     int xi = 0, yi = 0;
00286     int xo = -2, yo = -2;
00287     Xoff[xi++] = xo++;  Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++;
00288     Yoff[yi++] = yo  ;  Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; 
00289     xo = -2, yo++;
00290     Xoff[xi++] = xo++;  Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++;
00291     Yoff[yi++] = yo  ;  Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; 
00292     xo = -2, yo++;
00293     Xoff[xi++] = xo++;  Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++;
00294     Yoff[yi++] = yo  ;  Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; 
00295     xo = -2, yo++;
00296     Xoff[xi++] = xo++;  Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++;
00297     Yoff[yi++] = yo  ;  Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; 
00298     xo = -2, yo++;
00299     Xoff[xi++] = xo++;  Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++;
00300     Yoff[yi++] = yo  ;  Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; 
00301 
00302     // Generate a 13-point lower triangular 2D Finite Difference matrix
00303     XLoff.Size(13);
00304     YLoff.Size(13);
00305     xi = 0, yi = 0;
00306     xo = -2, yo = -2;
00307     XLoff[xi++] = xo++;  XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++;
00308     YLoff[yi++] = yo  ;  YLoff[yi++] = yo  ; YLoff[yi++] = yo  ; YLoff[yi++] = yo  ; YLoff[yi++] = yo  ; 
00309     xo = -2, yo++;
00310     XLoff[xi++] = xo++;  XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++;
00311     YLoff[yi++] = yo  ;  YLoff[yi++] = yo  ; YLoff[yi++] = yo  ; YLoff[yi++] = yo  ; YLoff[yi++] = yo  ; 
00312     xo = -2, yo++;
00313     XLoff[xi++] = xo++;  XLoff[xi++] = xo++; XLoff[xi++] = xo++;
00314     YLoff[yi++] = yo  ;  YLoff[yi++] = yo  ; YLoff[yi++] = yo  ;
00315 
00316     // Generate a 13-point upper triangular 2D Finite Difference matrix
00317     XUoff.Size(13);
00318     YUoff.Size(13);
00319     xi = 0, yi = 0;
00320     xo = 0, yo = 0;
00321     XUoff[xi++] = xo++;  XUoff[xi++] = xo++; XUoff[xi++] = xo++;
00322     YUoff[yi++] = yo  ;  YUoff[yi++] = yo  ; YUoff[yi++] = yo  ; 
00323     xo = -2, yo++;
00324     XUoff[xi++] = xo++;  XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++;
00325     YUoff[yi++] = yo  ;  YUoff[yi++] = yo  ; YUoff[yi++] = yo  ; YUoff[yi++] = yo  ; YUoff[yi++] = yo  ; 
00326     xo = -2, yo++;
00327     XUoff[xi++] = xo++;  XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++;
00328     YUoff[yi++] = yo  ;  YUoff[yi++] = yo  ; YUoff[yi++] = yo  ; YUoff[yi++] = yo  ; YUoff[yi++] = yo  ; 
00329 
00330   }
00331 
00332   Epetra_Map * map;
00333   Epetra_Map * mapL;
00334   Epetra_Map * mapU;
00335   Epetra_CrsMatrix * A;
00336   Epetra_CrsMatrix * L;
00337   Epetra_CrsMatrix * U;
00338   Epetra_MultiVector * b;
00339   Epetra_MultiVector * bt;
00340   Epetra_MultiVector * xexact;
00341   Epetra_MultiVector * bL;
00342   Epetra_MultiVector * btL;
00343   Epetra_MultiVector * xexactL;
00344   Epetra_MultiVector * bU;
00345   Epetra_MultiVector * btU;
00346   Epetra_MultiVector * xexactU;
00347   Epetra_SerialDenseVector resvec(0);
00348 
00349   //Timings
00350   Epetra_Flops flopcounter;
00351   Epetra_Time timer(comm);
00352 
00353 #ifdef EPETRA_VERY_SHORT_PERFTEST
00354   int jstop = 1;
00355 #elif EPETRA_SHORT_PERFTEST
00356   int jstop = 1;
00357 #else
00358   int jstop = 2;
00359 #endif
00360   for (int j=0; j<jstop; j++) {
00361     for (int k=1; k<17; k++) {
00362 #ifdef EPETRA_VERY_SHORT_PERFTEST
00363       if (k<3 || (k%4==0 && k<9)) {
00364 #elif EPETRA_SHORT_PERFTEST
00365       if (k<6 || k%4==0) {
00366 #else
00367       if (k<7 || k%2==0) {
00368 #endif
00369       int nrhs=k;
00370       if (verbose) cout << "\n*************** Results for " << nrhs << " RHS with ";
00371 
00372       bool StaticProfile = (j!=0);
00373       if (verbose) {
00374         if (StaticProfile) cout << " static profile\n";
00375         else cout << " dynamic profile\n";
00376       }
00377       GenerateCrsProblem(numNodesX, numNodesY, numProcsX, numProcsY, numPoints,
00378        Xoff.Values(), Yoff.Values(), nrhs, comm, verbose, summary,
00379        map, A, b, bt, xexact, StaticProfile, false);
00380 
00381       
00382 #ifdef EPETRA_HAVE_JADMATRIX
00383       
00384       timer.ResetStartTime();
00385       Epetra_JadMatrix JA(*A);
00386       elapsed_time = timer.ElapsedTime();
00387       if (verbose) cout << "Time to create Jagged diagonal matrix = " << elapsed_time << endl;
00388 
00389       //cout << "A = " << *A << endl;
00390       //cout << "JA = " << JA << endl;
00391 
00392       runJadMatrixTests(&JA, b, bt, xexact, StaticProfile, verbose, summary);
00393 
00394 #endif
00395       runMatrixTests(A, b, bt, xexact, StaticProfile, verbose, summary);
00396 
00397       delete A;
00398       delete b;
00399       delete bt; 
00400       delete xexact;
00401 
00402       GenerateCrsProblem(numNodesX, numNodesY, numProcsX, numProcsY, XLoff.Length(),
00403        XLoff.Values(), YLoff.Values(), nrhs, comm, verbose, summary,
00404        mapL, L, bL, btL, xexactL, StaticProfile, true);
00405       
00406 
00407       GenerateCrsProblem(numNodesX, numNodesY, numProcsX, numProcsY, XUoff.Length(),
00408        XUoff.Values(), YUoff.Values(), nrhs, comm, verbose, summary,
00409        mapU, U, bU, btU, xexactU, StaticProfile, true);
00410       
00411 
00412       runLUMatrixTests(L, bL, btL, xexactL, U, bU, btU, xexactU, StaticProfile, verbose, summary);
00413 
00414       delete L;
00415       delete bL;
00416       delete btL; 
00417       delete xexactL;
00418       delete mapL;
00419 
00420       delete U;
00421       delete bU;
00422       delete btU; 
00423       delete xexactU;
00424       delete mapU;
00425 
00426       Epetra_MultiVector q(*map, nrhs);
00427       Epetra_MultiVector z(q);
00428       Epetra_MultiVector r(q);
00429       
00430       delete map;
00431       q.SetFlopCounter(flopcounter);
00432       z.SetFlopCounter(q);
00433       r.SetFlopCounter(q);
00434 
00435       resvec.Resize(nrhs);
00436       
00437     
00438       flopcounter.ResetFlops();
00439       timer.ResetStartTime();
00440 
00441       //10 norms
00442       for( int i = 0; i < 10; ++i )
00443   q.Norm2( resvec.Values() );
00444 
00445       elapsed_time = timer.ElapsedTime();
00446       total_flops = q.Flops();
00447       MFLOPs = total_flops/elapsed_time/1000000.0;
00448       if (verbose) cout << "\nTotal MFLOPs for 10 Norm2's= " << MFLOPs << endl;
00449       
00450       if (summary) {
00451   if (comm.NumProc()==1) cout << "Norm2" << '\t';
00452   cout << MFLOPs << endl;
00453       }
00454       
00455       flopcounter.ResetFlops();
00456       timer.ResetStartTime();
00457       
00458       //10 dot's
00459       for( int i = 0; i < 10; ++i )
00460   q.Dot(z, resvec.Values());
00461       
00462       elapsed_time = timer.ElapsedTime();
00463       total_flops = q.Flops();
00464       MFLOPs = total_flops/elapsed_time/1000000.0;
00465       if (verbose) cout << "Total MFLOPs for 10 Dot's  = " << MFLOPs << endl;
00466       
00467       if (summary) {
00468   if (comm.NumProc()==1) cout << "DotProd" << '\t';
00469   cout << MFLOPs << endl;
00470       }
00471       
00472       flopcounter.ResetFlops();
00473       timer.ResetStartTime();
00474       
00475       //10 dot's
00476       for( int i = 0; i < 10; ++i )
00477   q.Update(1.0, z, 1.0, r, 0.0);
00478       
00479       elapsed_time = timer.ElapsedTime();
00480       total_flops = q.Flops();
00481       MFLOPs = total_flops/elapsed_time/1000000.0;
00482       if (verbose) cout << "Total MFLOPs for 10 Updates= " << MFLOPs << endl;
00483       
00484       if (summary) {
00485   if (comm.NumProc()==1) cout << "Update" << '\t';
00486   cout << MFLOPs << endl;
00487       }
00488     }
00489     }
00490   }
00491 #ifdef EPETRA_MPI
00492   MPI_Finalize() ;
00493 #endif
00494 
00495 return ierr ;
00496 }
00497 
00498 // Constructs a 2D PDE finite difference matrix using the list of x and y offsets.
00499 // 
00500 // nx      (In) - number of grid points in x direction
00501 // ny      (In) - number of grid points in y direction
00502 //   The total number of equations will be nx*ny ordered such that the x direction changes
00503 //   most rapidly: 
00504 //      First equation is at point (0,0)
00505 //      Second at                  (1,0)
00506 //       ...
00507 //      nx equation at             (nx-1,0)
00508 //      nx+1st equation at         (0,1)
00509 
00510 // numPoints (In) - number of points in finite difference stencil
00511 // xoff    (In) - stencil offsets in x direction (of length numPoints)
00512 // yoff    (In) - stencil offsets in y direction (of length numPoints)
00513 //   A standard 5-point finite difference stencil would be described as:
00514 //     numPoints = 5
00515 //     xoff = [-1, 1, 0,  0, 0]
00516 //     yoff = [ 0, 0, 0, -1, 1]
00517 
00518 // nrhs - Number of rhs to generate. (First interface produces vectors, so nrhs is not needed
00519 
00520 // comm    (In) - an Epetra_Comm object describing the parallel machine (numProcs and my proc ID)
00521 // map    (Out) - Epetra_Map describing distribution of matrix and vectors/multivectors
00522 // A      (Out) - Epetra_CrsMatrix constructed for nx by ny grid using prescribed stencil
00523 //                Off-diagonal values are random between 0 and 1.  If diagonal is part of stencil,
00524 //                diagonal will be slightly diag dominant.
00525 // b      (Out) - Generated RHS.  Values satisfy b = A*xexact
00526 // bt     (Out) - Generated RHS.  Values satisfy b = A'*xexact
00527 // xexact (Out) - Generated exact solution to Ax = b and b' = A'xexact
00528 
00529 // Note: Caller of this function is responsible for deleting all output objects.
00530 
00531 void GenerateCrsProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints, 
00532       int * xoff, int * yoff,
00533       const Epetra_Comm  &comm, bool verbose, bool summary, 
00534       Epetra_Map *& map, 
00535       Epetra_CrsMatrix *& A, 
00536       Epetra_Vector *& b, 
00537       Epetra_Vector *& bt,
00538       Epetra_Vector *&xexact, bool StaticProfile, bool MakeLocalOnly) {
00539 
00540   Epetra_MultiVector * b1, * bt1, * xexact1;
00541   
00542   GenerateCrsProblem(numNodesX, numNodesY, numProcsX, numProcsY, numPoints, 
00543          xoff, yoff, 1, comm, verbose, summary, 
00544          map, A, b1, bt1, xexact1, StaticProfile, MakeLocalOnly);
00545 
00546   b = dynamic_cast<Epetra_Vector *>(b1);
00547   bt = dynamic_cast<Epetra_Vector *>(bt1);
00548   xexact = dynamic_cast<Epetra_Vector *>(xexact1);
00549 
00550   return;
00551 }
00552 
00553 void GenerateCrsProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints, 
00554       int * xoff, int * yoff, int nrhs,
00555       const Epetra_Comm  &comm, bool verbose, bool summary,
00556       Epetra_Map *& map, 
00557       Epetra_CrsMatrix *& A, 
00558       Epetra_MultiVector *& b, 
00559       Epetra_MultiVector *& bt,
00560       Epetra_MultiVector *&xexact, bool StaticProfile, bool MakeLocalOnly) {
00561   
00562   Epetra_Time timer(comm);
00563   // Determine my global IDs
00564   long long * myGlobalElements;
00565   GenerateMyGlobalElements(numNodesX, numNodesY, numProcsX, numProcsY, comm.MyPID(), myGlobalElements);
00566 
00567   int numMyEquations = numNodesX*numNodesY;
00568   
00569   map = new Epetra_Map((long long)-1, numMyEquations, myGlobalElements, 0, comm); // Create map with 2D block partitioning.
00570   delete [] myGlobalElements;
00571 
00572   long long numGlobalEquations = map->NumGlobalElements64();
00573 
00574   int profile = 0; if (StaticProfile) profile = numPoints;
00575 
00576 #ifdef EPETRA_HAVE_STATICPROFILE
00577 
00578   if (MakeLocalOnly) 
00579     A = new Epetra_CrsMatrix(Copy, *map, *map, profile, StaticProfile); // Construct matrix with rowmap=colmap
00580   else
00581     A = new Epetra_CrsMatrix(Copy, *map, profile, StaticProfile); // Construct matrix
00582 
00583 #else
00584 
00585   if (MakeLocalOnly) 
00586     A = new Epetra_CrsMatrix(Copy, *map, *map, profile); // Construct matrix with rowmap=colmap
00587   else
00588     A = new Epetra_CrsMatrix(Copy, *map, profile); // Construct matrix
00589 
00590 #endif
00591 
00592   long long * indices = new long long[numPoints];
00593   double * values = new double[numPoints];
00594 
00595   double dnumPoints = (double) numPoints;
00596   int nx = numNodesX*numProcsX;
00597 
00598   for (int i=0; i<numMyEquations; i++) {
00599 
00600     long long rowID = map->GID64(i);
00601     int numIndices = 0;
00602 
00603     for (int j=0; j<numPoints; j++) {
00604       long long colID = rowID + xoff[j] + nx*yoff[j]; // Compute column ID based on stencil offsets
00605       if (colID>-1 && colID<numGlobalEquations) {
00606   indices[numIndices] = colID;
00607   double value = - ((double) rand())/ ((double) RAND_MAX);
00608   if (colID==rowID)
00609     values[numIndices++] = dnumPoints - value; // Make diagonal dominant
00610   else
00611     values[numIndices++] = value;
00612       }
00613     }
00614     //cout << "Building row " << rowID << endl;
00615     A->InsertGlobalValues(rowID, numIndices, values, indices);
00616   }
00617 
00618   delete [] indices;
00619   delete [] values;
00620   double insertTime = timer.ElapsedTime();
00621   timer.ResetStartTime();
00622   A->FillComplete(false);
00623   double fillCompleteTime = timer.ElapsedTime();
00624 
00625   if (verbose)
00626     cout << "Time to insert matrix values = " << insertTime << endl
00627    << "Time to complete fill        = " << fillCompleteTime << endl;
00628   if (summary) {
00629     if (comm.NumProc()==1) cout << "InsertTime" << '\t';
00630     cout << insertTime << endl;
00631     if (comm.NumProc()==1) cout << "FillCompleteTime" << '\t';
00632     cout << fillCompleteTime << endl;
00633   }
00634 
00635   if (nrhs<=1) {  
00636     b = new Epetra_Vector(*map);
00637     bt = new Epetra_Vector(*map);
00638     xexact = new Epetra_Vector(*map);
00639   }
00640   else {
00641     b = new Epetra_MultiVector(*map, nrhs);
00642     bt = new Epetra_MultiVector(*map, nrhs);
00643     xexact = new Epetra_MultiVector(*map, nrhs);
00644   }
00645 
00646   xexact->Random(); // Fill xexact with random values
00647 
00648   A->Multiply(false, *xexact, *b);
00649   A->Multiply(true, *xexact, *bt);
00650 
00651   return;
00652 }
00653 
00654 
00655 // Constructs a 2D PDE finite difference matrix using the list of x and y offsets.
00656 // 
00657 // nx      (In) - number of grid points in x direction
00658 // ny      (In) - number of grid points in y direction
00659 //   The total number of equations will be nx*ny ordered such that the x direction changes
00660 //   most rapidly: 
00661 //      First equation is at point (0,0)
00662 //      Second at                  (1,0)
00663 //       ...
00664 //      nx equation at             (nx-1,0)
00665 //      nx+1st equation at         (0,1)
00666 
00667 // numPoints (In) - number of points in finite difference stencil
00668 // xoff    (In) - stencil offsets in x direction (of length numPoints)
00669 // yoff    (In) - stencil offsets in y direction (of length numPoints)
00670 //   A standard 5-point finite difference stencil would be described as:
00671 //     numPoints = 5
00672 //     xoff = [-1, 1, 0,  0, 0]
00673 //     yoff = [ 0, 0, 0, -1, 1]
00674 
00675 // nsizes  (In) - Length of element size list used to create variable block map and matrix
00676 // sizes   (In) - integer list of element sizes of length nsizes
00677 //    The map associated with this VbrMatrix will be created by cycling through the sizes list.
00678 //    For example, if nsize = 3 and sizes = [ 2, 4, 3], the block map will have elementsizes
00679 //    of 2, 4, 3, 2, 4, 3, ...
00680 
00681 // nrhs - Number of rhs to generate. (First interface produces vectors, so nrhs is not needed
00682 
00683 // comm    (In) - an Epetra_Comm object describing the parallel machine (numProcs and my proc ID)
00684 // map    (Out) - Epetra_Map describing distribution of matrix and vectors/multivectors
00685 // A      (Out) - Epetra_VbrMatrix constructed for nx by ny grid using prescribed stencil
00686 //                Off-diagonal values are random between 0 and 1.  If diagonal is part of stencil,
00687 //                diagonal will be slightly diag dominant.
00688 // b      (Out) - Generated RHS.  Values satisfy b = A*xexact
00689 // bt     (Out) - Generated RHS.  Values satisfy b = A'*xexact
00690 // xexact (Out) - Generated exact solution to Ax = b and b' = A'xexact
00691 
00692 // Note: Caller of this function is responsible for deleting all output objects.
00693 
00694 void GenerateVbrProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints, 
00695       int * xoff, int * yoff,
00696       int nsizes, int * sizes,
00697       const Epetra_Comm  &comm, bool verbose, bool summary, 
00698       Epetra_BlockMap *& map, 
00699       Epetra_VbrMatrix *& A, 
00700       Epetra_Vector *& b, 
00701       Epetra_Vector *& bt,
00702       Epetra_Vector *&xexact, bool StaticProfile, bool MakeLocalOnly) {
00703   
00704   Epetra_MultiVector * b1, * bt1, * xexact1;
00705   
00706   GenerateVbrProblem(numNodesX, numNodesY, numProcsX, numProcsY, numPoints,
00707          xoff, yoff, nsizes, sizes,
00708          1, comm, verbose, summary, map, A, b1, bt1, xexact1, StaticProfile, MakeLocalOnly);
00709 
00710   b = dynamic_cast<Epetra_Vector *>(b1);
00711   bt = dynamic_cast<Epetra_Vector *>(bt1);
00712   xexact = dynamic_cast<Epetra_Vector *>(xexact1);
00713 
00714   return;
00715 }
00716 
00717 void GenerateVbrProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints, 
00718       int * xoff, int * yoff, 
00719       int nsizes, int * sizes, int nrhs,
00720       const Epetra_Comm  &comm, bool verbose, bool summary, 
00721       Epetra_BlockMap *& map, 
00722       Epetra_VbrMatrix *& A, 
00723       Epetra_MultiVector *& b, 
00724       Epetra_MultiVector *& bt,
00725       Epetra_MultiVector *&xexact, bool StaticProfile, bool MakeLocalOnly) {
00726 
00727   int i;
00728 
00729   // Determine my global IDs
00730   long long * myGlobalElements;
00731   GenerateMyGlobalElements(numNodesX, numNodesY, numProcsX, numProcsY, comm.MyPID(), myGlobalElements);
00732 
00733   int numMyElements = numNodesX*numNodesY;
00734   
00735   Epetra_Map ptMap((long long)-1, numMyElements, myGlobalElements, 0, comm); // Create map with 2D block partitioning.
00736   delete [] myGlobalElements;
00737 
00738   Epetra_IntVector elementSizes(ptMap); // This vector will have the list of element sizes
00739   for (i=0; i<numMyElements; i++) 
00740     elementSizes[i] = sizes[ptMap.GID64(i)%nsizes]; // cycle through sizes array
00741 
00742   map = new Epetra_BlockMap((long long)-1, numMyElements, ptMap.MyGlobalElements64(), elementSizes.Values(),
00743           ptMap.IndexBase64(), ptMap.Comm());
00744 
00745   int profile = 0; if (StaticProfile) profile = numPoints;
00746   
00747 // FIXME: Won't compile until Epetra_VbrMatrix is modified.
00748 #if 0
00749   int j;
00750   long long numGlobalEquations = ptMap.NumGlobalElements64();
00751 
00752   if (MakeLocalOnly) 
00753     A = new Epetra_VbrMatrix(Copy, *map, *map, profile); // Construct matrix rowmap=colmap
00754   else
00755     A = new Epetra_VbrMatrix(Copy, *map, profile); // Construct matrix
00756 
00757   long long * indices = new long long[numPoints];
00758 
00759   // This section of code creates a vector of random values that will be used to create
00760   // light-weight dense matrices to pass into the VbrMatrix construction process.
00761 
00762   int maxElementSize = 0;
00763   for (i=0; i< nsizes; i++) maxElementSize = EPETRA_MAX(maxElementSize, sizes[i]);
00764 
00765   Epetra_LocalMap lmap((long long)maxElementSize*maxElementSize, ptMap.IndexBase(), ptMap.Comm());
00766   Epetra_Vector randvec(lmap);
00767   randvec.Random();
00768   randvec.Scale(-1.0); // Make value negative
00769   int nx = numNodesX*numProcsX;
00770 
00771 
00772   for (i=0; i<numMyElements; i++) {
00773     long long rowID = map->GID64(i);
00774     int numIndices = 0;
00775     int rowDim = sizes[rowID%nsizes];
00776     for (j=0; j<numPoints; j++) {
00777       long long colID = rowID + xoff[j] + nx*yoff[j]; // Compute column ID based on stencil offsets
00778       if (colID>-1 && colID<numGlobalEquations)
00779   indices[numIndices++] = colID;
00780     }
00781       
00782     A->BeginInsertGlobalValues(rowID, numIndices, indices);
00783     
00784     for (j=0; j < numIndices; j++) {
00785       int colDim = sizes[indices[j]%nsizes];
00786       A->SubmitBlockEntry(&(randvec[0]), rowDim, rowDim, colDim);
00787     }
00788     A->EndSubmitEntries();
00789   }
00790 
00791   delete [] indices;
00792 
00793   A->FillComplete();
00794 
00795   // Compute the InvRowSums of the matrix rows
00796   Epetra_Vector invRowSums(A->RowMap());
00797   Epetra_Vector rowSums(A->RowMap());
00798   A->InvRowSums(invRowSums);
00799   rowSums.Reciprocal(invRowSums);
00800 
00801   // Jam the row sum values into the diagonal of the Vbr matrix (to make it diag dominant)
00802   int numBlockDiagonalEntries;
00803   int * rowColDims;
00804   int * diagoffsets = map->FirstPointInElementList();
00805   A->BeginExtractBlockDiagonalView(numBlockDiagonalEntries, rowColDims);
00806   for (i=0; i< numBlockDiagonalEntries; i++) {
00807     double * diagVals;
00808     int diagLDA;
00809     A->ExtractBlockDiagonalEntryView(diagVals, diagLDA);
00810     int rowDim = map->ElementSize(i);
00811     for (j=0; j<rowDim; j++) diagVals[j+j*diagLDA] = rowSums[diagoffsets[i]+j];
00812   }
00813 
00814   if (nrhs<=1) {  
00815     b = new Epetra_Vector(*map);
00816     bt = new Epetra_Vector(*map);
00817     xexact = new Epetra_Vector(*map);
00818   }
00819   else {
00820     b = new Epetra_MultiVector(*map, nrhs);
00821     bt = new Epetra_MultiVector(*map, nrhs);
00822     xexact = new Epetra_MultiVector(*map, nrhs);
00823   }
00824 
00825   xexact->Random(); // Fill xexact with random values
00826 
00827   A->Multiply(false, *xexact, *b);
00828   A->Multiply(true, *xexact, *bt);
00829 
00830 #endif // EPETRA_NO_32BIT_GLOBAL_INDICES
00831 
00832   return;
00833 }
00834 
00835 void GenerateMyGlobalElements(int numNodesX, int numNodesY, int numProcsX, int numProcs,
00836             int myPID, long long * & myGlobalElements) {
00837 
00838   myGlobalElements = new long long[numNodesX*numNodesY];
00839   int myProcX = myPID%numProcsX;
00840   int myProcY = myPID/numProcsX;
00841   int curGID = myProcY*(numProcsX*numNodesX)*numNodesY+myProcX*numNodesX;
00842   for (int j=0; j<numNodesY; j++) {
00843     for (int i=0; i<numNodesX; i++) {
00844       myGlobalElements[j*numNodesX+i] = curGID+i;
00845     }
00846     curGID+=numNodesX*numProcsX;
00847   }
00848   //for (int i=0; i<numNodesX*numNodesY; i++) cout << "MYPID " << myPID <<" GID "<< myGlobalElements[i] << endl;
00849   
00850   return;
00851 }
00852 
00853 void runMatrixTests(Epetra_CrsMatrix * A,  Epetra_MultiVector * b, Epetra_MultiVector * bt,
00854         Epetra_MultiVector * xexact, bool StaticProfile, bool verbose, bool summary) {
00855 
00856   Epetra_MultiVector z(*b);
00857   Epetra_MultiVector r(*b);
00858   Epetra_SerialDenseVector resvec(b->NumVectors());
00859 
00860   //Timings
00861   Epetra_Flops flopcounter;
00862   A->SetFlopCounter(flopcounter);
00863   Epetra_Time timer(A->Comm());
00864   std::string statdyn =        "dynamic";
00865   if (StaticProfile) statdyn = "static ";
00866 
00867   for (int j=0; j<4; j++) { // j = 0/2 is notrans, j = 1/3 is trans
00868     
00869     bool TransA = (j==1 || j==3);
00870     std::string contig = "without";
00871     if (j>1) contig =    "with   ";
00872     
00873 #ifdef EPETRA_SHORT_PERFTEST
00874     int kstart = 1;
00875 #else
00876     int kstart = 0;
00877 #endif
00878     for (int k=kstart; k<2; k++) { // Loop over old multiply vs. new multiply
00879       
00880       std::string oldnew = "old";
00881       if (k>0) oldnew =    "new";
00882 
00883       if (j==2) A->OptimizeStorage();
00884 
00885       flopcounter.ResetFlops();
00886       timer.ResetStartTime();
00887 
00888       if (k==0) {
00889   //10 matvecs
00890 #ifndef EPETRA_SHORT_PERFTEST
00891   for( int i = 0; i < 10; ++i )
00892     A->Multiply1(TransA, *xexact, z); // Compute z = A*xexact or z = A'*xexact using old Multiply method
00893 #endif
00894       }
00895       else {
00896   //10 matvecs
00897   for( int i = 0; i < 10; ++i )
00898     A->Multiply(TransA, *xexact, z); // Compute z = A*xexact or z = A'*xexact
00899       }
00900       
00901       double elapsed_time = timer.ElapsedTime();
00902       double total_flops = A->Flops();
00903 
00904       // Compute residual
00905       if (TransA)
00906   r.Update(-1.0, z, 1.0, *bt, 0.0); // r = bt - z
00907       else
00908   r.Update(-1.0, z, 1.0, *b, 0.0); // r = b - z
00909 
00910       r.Norm2(resvec.Values());
00911       
00912       if (verbose) cout << "ResNorm = " << resvec.NormInf() << ": ";
00913       double MFLOPs = total_flops/elapsed_time/1000000.0;
00914       if (verbose) cout << "Total MFLOPs for 10 " << oldnew << " MatVec's with " << statdyn << " Profile (Trans = " << TransA
00915       << ")  and " << contig << " optimized storage = " << MFLOPs << " (" << elapsed_time << " s)" <<endl;
00916       if (summary) {
00917   if (A->Comm().NumProc()==1) {
00918     if (TransA) cout << "TransMv" << statdyn<< "Prof" << contig << "OptStor" << '\t';
00919     else cout << "NoTransMv" << statdyn << "Prof" << contig << "OptStor" << '\t';
00920   }
00921   cout << MFLOPs << endl;
00922       }
00923     }
00924   }
00925   return;
00926 }
00927 #ifdef EPETRA_HAVE_JADMATRIX
00928 void runJadMatrixTests(Epetra_JadMatrix * A,  Epetra_MultiVector * b, Epetra_MultiVector * bt,
00929         Epetra_MultiVector * xexact, bool StaticProfile, bool verbose, bool summary) {
00930 
00931   Epetra_MultiVector z(*b);
00932   Epetra_MultiVector r(*b);
00933   Epetra_SerialDenseVector resvec(b->NumVectors());
00934 
00935   //Timings
00936   Epetra_Flops flopcounter;
00937   A->SetFlopCounter(flopcounter);
00938   Epetra_Time timer(A->Comm());
00939 
00940   for (int j=0; j<2; j++) { // j = 0 is notrans, j = 1 is trans
00941     
00942     bool TransA = (j==1);
00943     A->SetUseTranspose(TransA);
00944     flopcounter.ResetFlops();
00945     timer.ResetStartTime();
00946 
00947     //10 matvecs
00948     for( int i = 0; i < 10; ++i )
00949       A->Apply(*xexact, z); // Compute z = A*xexact or z = A'*xexact
00950     
00951     double elapsed_time = timer.ElapsedTime();
00952     double total_flops = A->Flops();
00953     
00954     // Compute residual
00955     if (TransA)
00956       r.Update(-1.0, z, 1.0, *bt, 0.0); // r = bt - z
00957     else
00958       r.Update(-1.0, z, 1.0, *b, 0.0); // r = b - z
00959     
00960     r.Norm2(resvec.Values());
00961     
00962     if (verbose) cout << "ResNorm = " << resvec.NormInf() << ": ";
00963     double MFLOPs = total_flops/elapsed_time/1000000.0;
00964     if (verbose) cout << "Total MFLOPs for 10 " << " Jagged Diagonal MatVec's with (Trans = " << TransA
00965           << ") " << MFLOPs << " (" << elapsed_time << " s)" <<endl;
00966     if (summary) {
00967       if (A->Comm().NumProc()==1) {
00968   if (TransA) cout << "TransMv" << '\t';
00969   else cout << "NoTransMv" << '\t';
00970       }
00971       cout << MFLOPs << endl;
00972     }
00973   }
00974   return;
00975 }
00976 #endif
00977 //=========================================================================================
00978 void runLUMatrixTests(Epetra_CrsMatrix * L,  Epetra_MultiVector * bL, Epetra_MultiVector * btL, Epetra_MultiVector * xexactL, 
00979           Epetra_CrsMatrix * U,  Epetra_MultiVector * bU, Epetra_MultiVector * btU, Epetra_MultiVector * xexactU, 
00980           bool StaticProfile, bool verbose, bool summary) {
00981 
00982   if (L->NoDiagonal()) {
00983     bL->Update(1.0, *xexactL, 1.0); // Add contribution of a unit diagonal to bL
00984     btL->Update(1.0, *xexactL, 1.0); // Add contribution of a unit diagonal to btL
00985   }
00986   if (U->NoDiagonal()) {
00987     bU->Update(1.0, *xexactU, 1.0); // Add contribution of a unit diagonal to bU
00988     btU->Update(1.0, *xexactU, 1.0); // Add contribution of a unit diagonal to btU
00989   }
00990 
00991   Epetra_MultiVector z(*bL);
00992   Epetra_MultiVector r(*bL);
00993   Epetra_SerialDenseVector resvec(bL->NumVectors());
00994 
00995   //Timings
00996   Epetra_Flops flopcounter;
00997   L->SetFlopCounter(flopcounter);
00998   U->SetFlopCounter(flopcounter);
00999   Epetra_Time timer(L->Comm());
01000   std::string statdyn =        "dynamic";
01001   if (StaticProfile) statdyn = "static ";
01002 
01003   for (int j=0; j<4; j++) { // j = 0/2 is notrans, j = 1/3 is trans
01004     
01005     bool TransA = (j==1 || j==3);
01006     std::string contig = "without";
01007     if (j>1) contig =    "with   ";
01008     
01009     if (j==2) {
01010       L->OptimizeStorage();
01011       U->OptimizeStorage();
01012     }
01013 
01014     flopcounter.ResetFlops();
01015     timer.ResetStartTime();
01016     
01017     //10 lower solves
01018     bool Upper = false;
01019     bool UnitDiagonal = L->NoDiagonal();  // If no diagonal, then unit must be used
01020     Epetra_MultiVector * b = TransA ? btL : bL;  // solve with the appropriate b vector
01021     for( int i = 0; i < 10; ++i )
01022       L->Solve(Upper, TransA, UnitDiagonal, *b, z); // Solve Lz = bL or L'z = bLt
01023       
01024     double elapsed_time = timer.ElapsedTime();
01025     double total_flops = L->Flops();
01026 
01027     // Compute residual
01028     r.Update(-1.0, z, 1.0, *xexactL, 0.0); // r = bt - z
01029     r.Norm2(resvec.Values());
01030 
01031     if (resvec.NormInf()>0.000001) {
01032       cout << "resvec = " << resvec << endl;
01033       cout << "z = " << z << endl;
01034       cout << "xexactL = " << *xexactL << endl;
01035       cout << "r = " << r << endl;
01036     }
01037       
01038     if (verbose) cout << "ResNorm = " << resvec.NormInf() << ": ";
01039     double MFLOPs = total_flops/elapsed_time/1000000.0;
01040     if (verbose) cout << "Total MFLOPs for 10 " << " Lower solves " << statdyn << " Profile (Trans = " << TransA
01041           << ")  and " << contig << " opt storage = " << MFLOPs << " (" << elapsed_time << " s)" <<endl;
01042     if (summary) {
01043       if (L->Comm().NumProc()==1) {
01044   if (TransA) cout << "TransLSv" << statdyn<< "Prof" << contig << "OptStor" << '\t';
01045   else cout << "NoTransLSv" << statdyn << "Prof" << contig << "OptStor" << '\t';
01046       }
01047       cout << MFLOPs << endl;
01048     }
01049     flopcounter.ResetFlops();
01050     timer.ResetStartTime();
01051     
01052     //10 upper solves
01053     Upper = true;
01054     UnitDiagonal = U->NoDiagonal();  // If no diagonal, then unit must be used
01055     b = TransA ? btU : bU;  // solve with the appropriate b vector
01056     for( int i = 0; i < 10; ++i )
01057       U->Solve(Upper, TransA, UnitDiagonal, *b, z); // Solve Lz = bL or L'z = bLt
01058       
01059     elapsed_time = timer.ElapsedTime();
01060     total_flops = U->Flops();
01061 
01062     // Compute residual
01063     r.Update(-1.0, z, 1.0, *xexactU, 0.0); // r = bt - z
01064     r.Norm2(resvec.Values());
01065 
01066     if (resvec.NormInf()>0.001) {
01067       cout << "U = " << *U << endl;
01068       //cout << "resvec = " << resvec << endl;
01069       cout << "z = " << z << endl;
01070       cout << "xexactU = " << *xexactU << endl;
01071       //cout << "r = " << r << endl;
01072       cout << "b = " << *b << endl;
01073     }
01074 
01075       
01076     if (verbose) cout << "ResNorm = " << resvec.NormInf() << ": ";
01077     MFLOPs = total_flops/elapsed_time/1000000.0;
01078     if (verbose) cout << "Total MFLOPs for 10 " << " Upper solves " << statdyn << " Profile (Trans = " << TransA
01079           << ")  and " << contig << " opt storage = " << MFLOPs <<endl;
01080     if (summary) {
01081       if (L->Comm().NumProc()==1) {
01082   if (TransA) cout << "TransUSv" << statdyn<< "Prof" << contig << "OptStor" << '\t';
01083   else cout << "NoTransUSv" << statdyn << "Prof" << contig << "OptStor" << '\t';
01084       }
01085       cout << MFLOPs << endl;
01086     }
01087   }
01088   return;
01089 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines