Epetra Package Browser (Single Doxygen Collection) Development
test/BasicPerfTest/cxx_main.cpp
Go to the documentation of this file.
00001 //@HEADER
00002 // ************************************************************************
00003 //
00004 //               Epetra: Linear Algebra Services Package
00005 //                 Copyright 2011 Sandia Corporation
00006 //
00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00008 // the U.S. Government retains certain rights in this software.
00009 //
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00038 //
00039 // ************************************************************************
00040 //@HEADER
00041 
00042 
00043 #define EPETRA_HAVE_JADMATRIX
00044 #define EPETRA_VERY_SHORT_PERFTEST
00045 #define EPETRA_HAVE_STATICPROFILE
00046 #include "Epetra_Map.h"
00047 #include "Epetra_LocalMap.h"
00048 #include "Epetra_BlockMap.h"
00049 #include "Epetra_Time.h"
00050 #include "Epetra_CrsMatrix.h"
00051 #include "Epetra_VbrMatrix.h"
00052 #include "Epetra_Vector.h"
00053 #include "Epetra_IntVector.h"
00054 #include "Epetra_MultiVector.h"
00055 #include "Epetra_IntSerialDenseVector.h"
00056 #include "Epetra_SerialDenseVector.h"
00057 #include "Epetra_Flops.h"
00058 #ifdef EPETRA_MPI
00059 #include "Epetra_MpiComm.h"
00060 #include "mpi.h"
00061 #else
00062 #include "Epetra_SerialComm.h"
00063 #endif
00064 #include "../epetra_test_err.h"
00065 #include "Epetra_Version.h"
00066 #ifdef EPETRA_HAVE_JADMATRIX
00067 #include "Epetra_JadMatrix.h"
00068 #endif
00069 
00070 // prototypes
00071 
00072 void GenerateCrsProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints,
00073       int * xoff, int * yoff,
00074       const Epetra_Comm  &comm, bool verbose, bool summary,
00075       Epetra_Map *& map,
00076       Epetra_CrsMatrix *& A,
00077       Epetra_Vector *& b,
00078       Epetra_Vector *& bt,
00079       Epetra_Vector *&xexact, bool StaticProfile, bool MakeLocalOnly);
00080 
00081 void GenerateCrsProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints,
00082       int * xoff, int * yoff, int nrhs,
00083       const Epetra_Comm  &comm, bool verbose, bool summary,
00084       Epetra_Map *& map,
00085       Epetra_CrsMatrix *& A,
00086       Epetra_MultiVector *& b,
00087       Epetra_MultiVector *& bt,
00088       Epetra_MultiVector *&xexact, bool StaticProfile, bool MakeLocalOnly);
00089 
00090 void GenerateVbrProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints,
00091       int * xoff, int * yoff,
00092       int nsizes, int * sizes,
00093       const Epetra_Comm  &comm, bool verbose, bool summary,
00094       Epetra_BlockMap *& map,
00095       Epetra_VbrMatrix *& A,
00096       Epetra_Vector *& b,
00097       Epetra_Vector *& bt,
00098       Epetra_Vector *&xexact, bool StaticProfile, bool MakeLocalOnly);
00099 
00100 void GenerateVbrProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints,
00101       int * xoff, int * yoff,
00102       int nsizes, int * sizes, int nrhs,
00103       const Epetra_Comm  &comm, bool verbose, bool summary,
00104       Epetra_BlockMap *& map,
00105       Epetra_VbrMatrix *& A,
00106       Epetra_MultiVector *& b,
00107       Epetra_MultiVector *& bt,
00108       Epetra_MultiVector *&xexact, bool StaticProfile, bool MakeLocalOnly);
00109 
00110 void GenerateMyGlobalElements(int numNodesX, int numNodesY, int numProcsX, int numProcs,
00111             int myPID, int * & myGlobalElements);
00112 
00113 void runMatrixTests(Epetra_CrsMatrix * A,  Epetra_MultiVector * b, Epetra_MultiVector * bt,
00114         Epetra_MultiVector * xexact, bool StaticProfile, bool verbose, bool summary);
00115 #ifdef EPETRA_HAVE_JADMATRIX
00116 void runJadMatrixTests(Epetra_JadMatrix * A,  Epetra_MultiVector * b, Epetra_MultiVector * bt,
00117         Epetra_MultiVector * xexact, bool StaticProfile, bool verbose, bool summary);
00118 #endif
00119 void runLUMatrixTests(Epetra_CrsMatrix * L,  Epetra_MultiVector * bL, Epetra_MultiVector * btL, Epetra_MultiVector * xexactL,
00120           Epetra_CrsMatrix * U,  Epetra_MultiVector * bU, Epetra_MultiVector * btU, Epetra_MultiVector * xexactU,
00121           bool StaticProfile, bool verbose, bool summary);
00122 int main(int argc, char *argv[])
00123 {
00124   int ierr = 0;
00125   double elapsed_time;
00126   double total_flops;
00127   double MFLOPs;
00128 
00129 
00130 #ifdef EPETRA_MPI
00131 
00132   // Initialize MPI
00133   MPI_Init(&argc,&argv);
00134   Epetra_MpiComm comm( MPI_COMM_WORLD );
00135 #else
00136   Epetra_SerialComm comm;
00137 #endif
00138 
00139   bool verbose = false;
00140   bool summary = false;
00141 
00142   // Check if we should print verbose results to standard out
00143   if (argc>6) if (argv[6][0]=='-' && argv[6][1]=='v') verbose = true;
00144 
00145   // Check if we should print verbose results to standard out
00146   if (argc>6) if (argv[6][0]=='-' && argv[6][1]=='s') summary = true;
00147 
00148   if(argc < 6) {
00149     cerr << "Usage: " << argv[0]
00150          << " NumNodesX NumNodesY NumProcX NumProcY NumPoints [-v|-s]" << endl
00151          << "where:" << endl
00152          << "NumNodesX         - Number of mesh nodes in X direction per processor" << endl
00153          << "NumNodesY         - Number of mesh nodes in Y direction per processor" << endl
00154          << "NumProcX          - Number of processors to use in X direction" << endl
00155          << "NumProcY          - Number of processors to use in Y direction" << endl
00156          << "NumPoints         - Number of points to use in stencil (5, 9 or 25 only)" << endl
00157          << "-v|-s             - (Optional) Run in verbose mode if -v present or summary mode if -s present" << endl
00158          << " NOTES: NumProcX*NumProcY must equal the number of processors used to run the problem." << endl << endl
00159    << " Serial example:" << endl
00160          << argv[0] << " 16 12 1 1 25 -v" << endl
00161    << " Run this program in verbose mode on 1 processor using a 16 X 12 grid with a 25 point stencil."<< endl <<endl
00162    << " MPI example:" << endl
00163          << "mpirun -np 32 " << argv[0] << " 10 12 4 8 9 -v" << endl
00164    << " Run this program in verbose mode on 32 processors putting a 10 X 12 subgrid on each processor using 4 processors "<< endl
00165    << " in the X direction and 8 in the Y direction.  Total grid size is 40 points in X and 96 in Y with a 9 point stencil."<< endl
00166          << endl;
00167     return(1);
00168 
00169   }
00170     //char tmp;
00171     //if (comm.MyPID()==0) cout << "Press any key to continue..."<< endl;
00172     //if (comm.MyPID()==0) cin >> tmp;
00173     //comm.Barrier();
00174 
00175   comm.SetTracebackMode(0); // This should shut down any error traceback reporting
00176   if (verbose && comm.MyPID()==0)
00177     cout << Epetra_Version() << endl << endl;
00178   if (summary && comm.MyPID()==0) {
00179     if (comm.NumProc()==1)
00180       cout << Epetra_Version() << endl << endl;
00181     else
00182       cout << endl << endl; // Print two blank line to keep output columns lined up
00183   }
00184 
00185   if (verbose) cout << comm <<endl;
00186 
00187 
00188   // Redefine verbose to only print on PE 0
00189 
00190   if (verbose && comm.MyPID()!=0) verbose = false;
00191   if (summary && comm.MyPID()!=0) summary = false;
00192 
00193   int numNodesX = atoi(argv[1]);
00194   int numNodesY = atoi(argv[2]);
00195   int numProcsX = atoi(argv[3]);
00196   int numProcsY = atoi(argv[4]);
00197   int numPoints = atoi(argv[5]);
00198 
00199   if (verbose || (summary && comm.NumProc()==1)) {
00200     cout << " Number of local nodes in X direction  = " << numNodesX << endl
00201    << " Number of local nodes in Y direction  = " << numNodesY << endl
00202    << " Number of global nodes in X direction = " << numNodesX*numProcsX << endl
00203    << " Number of global nodes in Y direction = " << numNodesY*numProcsY << endl
00204    << " Number of local nonzero entries       = " << numNodesX*numNodesY*numPoints << endl
00205    << " Number of global nonzero entries      = " << numNodesX*numNodesY*numPoints*numProcsX*numProcsY << endl
00206    << " Number of Processors in X direction   = " << numProcsX << endl
00207    << " Number of Processors in Y direction   = " << numProcsY << endl
00208    << " Number of Points in stencil           = " << numPoints << endl << endl;
00209   }
00210   // Print blank line to keep output columns lined up
00211   if (summary && comm.NumProc()>1)
00212     cout << endl << endl << endl << endl << endl << endl << endl << endl<< endl << endl;
00213 
00214   if (numProcsX*numProcsY!=comm.NumProc()) {
00215     cerr << "Number of processors = " << comm.NumProc() << endl
00216    << " is not the product of " << numProcsX << " and " << numProcsY << endl << endl;
00217     return(1);
00218   }
00219 
00220   if (numPoints!=5 && numPoints!=9 && numPoints!=25) {
00221     cerr << "Number of points specified = " << numPoints << endl
00222    << " is not 5, 9, 25" << endl << endl;
00223     return(1);
00224   }
00225 
00226   if (numNodesX*numNodesY<=0) {
00227     cerr << "Product of number of nodes is <= zero" << endl << endl;
00228     return(1);
00229   }
00230 
00231   Epetra_IntSerialDenseVector Xoff, XLoff, XUoff;
00232   Epetra_IntSerialDenseVector Yoff, YLoff, YUoff;
00233   if (numPoints==5) {
00234 
00235      // Generate a 5-point 2D Finite Difference matrix
00236     Xoff.Size(5);
00237     Yoff.Size(5);
00238     Xoff[0] = -1; Xoff[1] = 1; Xoff[2] = 0; Xoff[3] = 0;  Xoff[4] = 0;
00239     Yoff[0] = 0;  Yoff[1] = 0; Yoff[2] = 0; Yoff[3] = -1; Yoff[4] = 1;
00240 
00241      // Generate a 2-point 2D Lower triangular Finite Difference matrix
00242     XLoff.Size(2);
00243     YLoff.Size(2);
00244     XLoff[0] = -1; XLoff[1] =  0;
00245     YLoff[0] =  0; YLoff[1] = -1;
00246 
00247      // Generate a 3-point 2D upper triangular Finite Difference matrix
00248     XUoff.Size(3);
00249     YUoff.Size(3);
00250     XUoff[0] =  0; XUoff[1] =  1; XUoff[2] = 0;
00251     YUoff[0] =  0; YUoff[1] =  0; YUoff[2] = 1;
00252   }
00253   else if (numPoints==9) {
00254     // Generate a 9-point 2D Finite Difference matrix
00255     Xoff.Size(9);
00256     Yoff.Size(9);
00257     Xoff[0] = -1;  Xoff[1] =  0; Xoff[2] =  1;
00258     Yoff[0] = -1;  Yoff[1] = -1; Yoff[2] = -1;
00259     Xoff[3] = -1;  Xoff[4] =  0; Xoff[5] =  1;
00260     Yoff[3] =  0;  Yoff[4] =  0; Yoff[5] =  0;
00261     Xoff[6] = -1;  Xoff[7] =  0; Xoff[8] =  1;
00262     Yoff[6] =  1;  Yoff[7] =  1; Yoff[8] =  1;
00263 
00264     // Generate a 5-point lower triangular 2D Finite Difference matrix
00265     XLoff.Size(5);
00266     YLoff.Size(5);
00267     XLoff[0] = -1;  XLoff[1] =  0; Xoff[2] =  1;
00268     YLoff[0] = -1;  YLoff[1] = -1; Yoff[2] = -1;
00269     XLoff[3] = -1;  XLoff[4] =  0;
00270     YLoff[3] =  0;  YLoff[4] =  0;
00271 
00272     // Generate a 4-point upper triangular 2D Finite Difference matrix
00273     XUoff.Size(4);
00274     YUoff.Size(4);
00275     XUoff[0] =  1;
00276     YUoff[0] =  0;
00277     XUoff[1] = -1;  XUoff[2] =  0; XUoff[3] =  1;
00278     YUoff[1] =  1;  YUoff[2] =  1; YUoff[3] =  1;
00279 
00280   }
00281   else {
00282     // Generate a 25-point 2D Finite Difference matrix
00283     Xoff.Size(25);
00284     Yoff.Size(25);
00285     int xi = 0, yi = 0;
00286     int xo = -2, yo = -2;
00287     Xoff[xi++] = xo++;  Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++;
00288     Yoff[yi++] = yo  ;  Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ;
00289     xo = -2, yo++;
00290     Xoff[xi++] = xo++;  Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++;
00291     Yoff[yi++] = yo  ;  Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ;
00292     xo = -2, yo++;
00293     Xoff[xi++] = xo++;  Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++;
00294     Yoff[yi++] = yo  ;  Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ;
00295     xo = -2, yo++;
00296     Xoff[xi++] = xo++;  Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++;
00297     Yoff[yi++] = yo  ;  Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ;
00298     xo = -2, yo++;
00299     Xoff[xi++] = xo++;  Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++;
00300     Yoff[yi++] = yo  ;  Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ;
00301 
00302     // Generate a 13-point lower triangular 2D Finite Difference matrix
00303     XLoff.Size(13);
00304     YLoff.Size(13);
00305     xi = 0, yi = 0;
00306     xo = -2, yo = -2;
00307     XLoff[xi++] = xo++;  XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++;
00308     YLoff[yi++] = yo  ;  YLoff[yi++] = yo  ; YLoff[yi++] = yo  ; YLoff[yi++] = yo  ; YLoff[yi++] = yo  ;
00309     xo = -2, yo++;
00310     XLoff[xi++] = xo++;  XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++;
00311     YLoff[yi++] = yo  ;  YLoff[yi++] = yo  ; YLoff[yi++] = yo  ; YLoff[yi++] = yo  ; YLoff[yi++] = yo  ;
00312     xo = -2, yo++;
00313     XLoff[xi++] = xo++;  XLoff[xi++] = xo++; XLoff[xi++] = xo++;
00314     YLoff[yi++] = yo  ;  YLoff[yi++] = yo  ; YLoff[yi++] = yo  ;
00315 
00316     // Generate a 13-point upper triangular 2D Finite Difference matrix
00317     XUoff.Size(13);
00318     YUoff.Size(13);
00319     xi = 0, yi = 0;
00320     xo = 0, yo = 0;
00321     XUoff[xi++] = xo++;  XUoff[xi++] = xo++; XUoff[xi++] = xo++;
00322     YUoff[yi++] = yo  ;  YUoff[yi++] = yo  ; YUoff[yi++] = yo  ;
00323     xo = -2, yo++;
00324     XUoff[xi++] = xo++;  XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++;
00325     YUoff[yi++] = yo  ;  YUoff[yi++] = yo  ; YUoff[yi++] = yo  ; YUoff[yi++] = yo  ; YUoff[yi++] = yo  ;
00326     xo = -2, yo++;
00327     XUoff[xi++] = xo++;  XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++;
00328     YUoff[yi++] = yo  ;  YUoff[yi++] = yo  ; YUoff[yi++] = yo  ; YUoff[yi++] = yo  ; YUoff[yi++] = yo  ;
00329 
00330   }
00331 
00332   Epetra_Map * map;
00333   Epetra_Map * mapL;
00334   Epetra_Map * mapU;
00335   Epetra_CrsMatrix * A;
00336   Epetra_CrsMatrix * L;
00337   Epetra_CrsMatrix * U;
00338   Epetra_MultiVector * b;
00339   Epetra_MultiVector * bt;
00340   Epetra_MultiVector * xexact;
00341   Epetra_MultiVector * bL;
00342   Epetra_MultiVector * btL;
00343   Epetra_MultiVector * xexactL;
00344   Epetra_MultiVector * bU;
00345   Epetra_MultiVector * btU;
00346   Epetra_MultiVector * xexactU;
00347   Epetra_SerialDenseVector resvec(0);
00348 
00349   //Timings
00350   Epetra_Flops flopcounter;
00351   Epetra_Time timer(comm);
00352 
00353 #ifdef EPETRA_VERY_SHORT_PERFTEST
00354   int jstop = 1;
00355 #elif EPETRA_SHORT_PERFTEST
00356   int jstop = 1;
00357 #else
00358   int jstop = 2;
00359 #endif
00360   for (int j=0; j<jstop; j++) {
00361     for (int k=1; k<17; k++) {
00362 #ifdef EPETRA_VERY_SHORT_PERFTEST
00363       if (k<3 || (k%4==0 && k<9)) {
00364 #elif EPETRA_SHORT_PERFTEST
00365       if (k<6 || k%4==0) {
00366 #else
00367       if (k<7 || k%2==0) {
00368 #endif
00369       int nrhs=k;
00370       if (verbose) cout << "\n*************** Results for " << nrhs << " RHS with ";
00371 
00372       bool StaticProfile = (j!=0);
00373       if (verbose) {
00374         if (StaticProfile) cout << " static profile\n";
00375         else cout << " dynamic profile\n";
00376       }
00377 
00378       GenerateCrsProblem(numNodesX, numNodesY, numProcsX, numProcsY, numPoints,
00379        Xoff.Values(), Yoff.Values(), nrhs, comm, verbose, summary,
00380        map, A, b, bt, xexact, StaticProfile, false);
00381 
00382 
00383 #ifdef EPETRA_HAVE_JADMATRIX
00384 
00385       timer.ResetStartTime();
00386       Epetra_JadMatrix JA(*A);
00387       elapsed_time = timer.ElapsedTime();
00388       if (verbose) cout << "Time to create Jagged diagonal matrix = " << elapsed_time << endl;
00389 
00390       //cout << "A = " << *A << endl;
00391       //cout << "JA = " << JA << endl;
00392 
00393       runJadMatrixTests(&JA, b, bt, xexact, StaticProfile, verbose, summary);
00394 
00395 #endif
00396       runMatrixTests(A, b, bt, xexact, StaticProfile, verbose, summary);
00397 
00398       delete A;
00399       delete b;
00400       delete bt;
00401       delete xexact;
00402 
00403       GenerateCrsProblem(numNodesX, numNodesY, numProcsX, numProcsY, XLoff.Length(),
00404        XLoff.Values(), YLoff.Values(), nrhs, comm, verbose, summary,
00405        mapL, L, bL, btL, xexactL, StaticProfile, true);
00406 
00407 
00408       GenerateCrsProblem(numNodesX, numNodesY, numProcsX, numProcsY, XUoff.Length(),
00409        XUoff.Values(), YUoff.Values(), nrhs, comm, verbose, summary,
00410        mapU, U, bU, btU, xexactU, StaticProfile, true);
00411 
00412 
00413       runLUMatrixTests(L, bL, btL, xexactL, U, bU, btU, xexactU, StaticProfile, verbose, summary);
00414 
00415       delete L;
00416       delete bL;
00417       delete btL;
00418       delete xexactL;
00419       delete mapL;
00420 
00421       delete U;
00422       delete bU;
00423       delete btU;
00424       delete xexactU;
00425       delete mapU;
00426 
00427       Epetra_MultiVector q(*map, nrhs);
00428       Epetra_MultiVector z(q);
00429       Epetra_MultiVector r(q);
00430 
00431       delete map;
00432       q.SetFlopCounter(flopcounter);
00433       z.SetFlopCounter(q);
00434       r.SetFlopCounter(q);
00435 
00436       resvec.Resize(nrhs);
00437 
00438 
00439       flopcounter.ResetFlops();
00440       timer.ResetStartTime();
00441 
00442       //10 norms
00443       for( int i = 0; i < 10; ++i )
00444   q.Norm2( resvec.Values() );
00445 
00446       elapsed_time = timer.ElapsedTime();
00447       total_flops = q.Flops();
00448       MFLOPs = total_flops/elapsed_time/1000000.0;
00449       if (verbose) cout << "\nTotal MFLOPs for 10 Norm2's= " << MFLOPs << endl;
00450 
00451       if (summary) {
00452   if (comm.NumProc()==1) cout << "Norm2" << '\t';
00453   cout << MFLOPs << endl;
00454       }
00455 
00456       flopcounter.ResetFlops();
00457       timer.ResetStartTime();
00458 
00459       //10 dot's
00460       for( int i = 0; i < 10; ++i )
00461   q.Dot(z, resvec.Values());
00462 
00463       elapsed_time = timer.ElapsedTime();
00464       total_flops = q.Flops();
00465       MFLOPs = total_flops/elapsed_time/1000000.0;
00466       if (verbose) cout << "Total MFLOPs for 10 Dot's  = " << MFLOPs << endl;
00467 
00468       if (summary) {
00469   if (comm.NumProc()==1) cout << "DotProd" << '\t';
00470   cout << MFLOPs << endl;
00471       }
00472 
00473       flopcounter.ResetFlops();
00474       timer.ResetStartTime();
00475 
00476       //10 dot's
00477       for( int i = 0; i < 10; ++i )
00478   q.Update(1.0, z, 1.0, r, 0.0);
00479 
00480       elapsed_time = timer.ElapsedTime();
00481       total_flops = q.Flops();
00482       MFLOPs = total_flops/elapsed_time/1000000.0;
00483       if (verbose) cout << "Total MFLOPs for 10 Updates= " << MFLOPs << endl;
00484 
00485       if (summary) {
00486   if (comm.NumProc()==1) cout << "Update" << '\t';
00487   cout << MFLOPs << endl;
00488       }
00489     }
00490     }
00491   }
00492 #ifdef EPETRA_MPI
00493   MPI_Finalize() ;
00494 #endif
00495 
00496 return ierr ;
00497 }
00498 
00499 // Constructs a 2D PDE finite difference matrix using the list of x and y offsets.
00500 //
00501 // nx      (In) - number of grid points in x direction
00502 // ny      (In) - number of grid points in y direction
00503 //   The total number of equations will be nx*ny ordered such that the x direction changes
00504 //   most rapidly:
00505 //      First equation is at point (0,0)
00506 //      Second at                  (1,0)
00507 //       ...
00508 //      nx equation at             (nx-1,0)
00509 //      nx+1st equation at         (0,1)
00510 
00511 // numPoints (In) - number of points in finite difference stencil
00512 // xoff    (In) - stencil offsets in x direction (of length numPoints)
00513 // yoff    (In) - stencil offsets in y direction (of length numPoints)
00514 //   A standard 5-point finite difference stencil would be described as:
00515 //     numPoints = 5
00516 //     xoff = [-1, 1, 0,  0, 0]
00517 //     yoff = [ 0, 0, 0, -1, 1]
00518 
00519 // nrhs - Number of rhs to generate. (First interface produces vectors, so nrhs is not needed
00520 
00521 // comm    (In) - an Epetra_Comm object describing the parallel machine (numProcs and my proc ID)
00522 // map    (Out) - Epetra_Map describing distribution of matrix and vectors/multivectors
00523 // A      (Out) - Epetra_CrsMatrix constructed for nx by ny grid using prescribed stencil
00524 //                Off-diagonal values are random between 0 and 1.  If diagonal is part of stencil,
00525 //                diagonal will be slightly diag dominant.
00526 // b      (Out) - Generated RHS.  Values satisfy b = A*xexact
00527 // bt     (Out) - Generated RHS.  Values satisfy b = A'*xexact
00528 // xexact (Out) - Generated exact solution to Ax = b and b' = A'xexact
00529 
00530 // Note: Caller of this function is responsible for deleting all output objects.
00531 
00532 void GenerateCrsProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints,
00533       int * xoff, int * yoff,
00534       const Epetra_Comm  &comm, bool verbose, bool summary,
00535       Epetra_Map *& map,
00536       Epetra_CrsMatrix *& A,
00537       Epetra_Vector *& b,
00538       Epetra_Vector *& bt,
00539       Epetra_Vector *&xexact, bool StaticProfile, bool MakeLocalOnly) {
00540 
00541   Epetra_MultiVector * b1, * bt1, * xexact1;
00542   
00543   GenerateCrsProblem(numNodesX, numNodesY, numProcsX, numProcsY, numPoints,
00544          xoff, yoff, 1, comm, verbose, summary,
00545          map, A, b1, bt1, xexact1, StaticProfile, MakeLocalOnly);
00546 
00547   b = dynamic_cast<Epetra_Vector *>(b1);
00548   bt = dynamic_cast<Epetra_Vector *>(bt1);
00549   xexact = dynamic_cast<Epetra_Vector *>(xexact1);
00550 
00551   return;
00552 }
00553 
00554 void GenerateCrsProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints,
00555       int * xoff, int * yoff, int nrhs,
00556       const Epetra_Comm  &comm, bool verbose, bool summary,
00557       Epetra_Map *& map,
00558       Epetra_CrsMatrix *& A,
00559       Epetra_MultiVector *& b,
00560       Epetra_MultiVector *& bt,
00561       Epetra_MultiVector *&xexact, bool StaticProfile, bool MakeLocalOnly) {
00562 
00563   Epetra_Time timer(comm);
00564   // Determine my global IDs
00565   int * myGlobalElements;
00566   GenerateMyGlobalElements(numNodesX, numNodesY, numProcsX, numProcsY, comm.MyPID(), myGlobalElements);
00567 
00568   int numMyEquations = numNodesX*numNodesY;
00569 
00570   map = new Epetra_Map(-1, numMyEquations, myGlobalElements, 0, comm); // Create map with 2D block partitioning.
00571   delete [] myGlobalElements;
00572 
00573   int numGlobalEquations = map->NumGlobalElements();
00574 
00575   int profile = 0; if (StaticProfile) profile = numPoints;
00576 
00577 #ifdef EPETRA_HAVE_STATICPROFILE
00578 
00579   if (MakeLocalOnly)
00580     A = new Epetra_CrsMatrix(Copy, *map, *map, profile, StaticProfile); // Construct matrix with rowmap=colmap
00581   else
00582     A = new Epetra_CrsMatrix(Copy, *map, profile, StaticProfile); // Construct matrix
00583 
00584 #else
00585 
00586   if (MakeLocalOnly)
00587     A = new Epetra_CrsMatrix(Copy, *map, *map, profile); // Construct matrix with rowmap=colmap
00588   else
00589     A = new Epetra_CrsMatrix(Copy, *map, profile); // Construct matrix
00590 
00591 #endif
00592 
00593   int * indices = new int[numPoints];
00594   double * values = new double[numPoints];
00595 
00596   double dnumPoints = (double) numPoints;
00597   int nx = numNodesX*numProcsX;
00598 
00599   for (int i=0; i<numMyEquations; i++) {
00600 
00601     int rowID = map->GID(i);
00602     int numIndices = 0;
00603 
00604     for (int j=0; j<numPoints; j++) {
00605       int colID = rowID + xoff[j] + nx*yoff[j]; // Compute column ID based on stencil offsets
00606       if (colID>-1 && colID<numGlobalEquations) {
00607   indices[numIndices] = colID;
00608   double value = - ((double) rand())/ ((double) RAND_MAX);
00609   if (colID==rowID)
00610     values[numIndices++] = dnumPoints - value; // Make diagonal dominant
00611   else
00612     values[numIndices++] = value;
00613       }
00614     }
00615     //cout << "Building row " << rowID << endl;
00616     A->InsertGlobalValues(rowID, numIndices, values, indices);
00617   }
00618 
00619   delete [] indices;
00620   delete [] values;
00621   double insertTime = timer.ElapsedTime();
00622   timer.ResetStartTime();
00623   A->FillComplete(false);
00624   double fillCompleteTime = timer.ElapsedTime();
00625 
00626   if (verbose)
00627     cout << "Time to insert matrix values = " << insertTime << endl
00628    << "Time to complete fill        = " << fillCompleteTime << endl;
00629   if (summary) {
00630     if (comm.NumProc()==1) cout << "InsertTime" << '\t';
00631     cout << insertTime << endl;
00632     if (comm.NumProc()==1) cout << "FillCompleteTime" << '\t';
00633     cout << fillCompleteTime << endl;
00634   }
00635 
00636   if (nrhs<=1) {
00637     b = new Epetra_Vector(*map);
00638     bt = new Epetra_Vector(*map);
00639     xexact = new Epetra_Vector(*map);
00640   }
00641   else {
00642     b = new Epetra_MultiVector(*map, nrhs);
00643     bt = new Epetra_MultiVector(*map, nrhs);
00644     xexact = new Epetra_MultiVector(*map, nrhs);
00645   }
00646 
00647   xexact->Random(); // Fill xexact with random values
00648 
00649   A->Multiply(false, *xexact, *b);
00650   A->Multiply(true, *xexact, *bt);
00651 
00652   return;
00653 }
00654 
00655 
00656 // Constructs a 2D PDE finite difference matrix using the list of x and y offsets.
00657 //
00658 // nx      (In) - number of grid points in x direction
00659 // ny      (In) - number of grid points in y direction
00660 //   The total number of equations will be nx*ny ordered such that the x direction changes
00661 //   most rapidly:
00662 //      First equation is at point (0,0)
00663 //      Second at                  (1,0)
00664 //       ...
00665 //      nx equation at             (nx-1,0)
00666 //      nx+1st equation at         (0,1)
00667 
00668 // numPoints (In) - number of points in finite difference stencil
00669 // xoff    (In) - stencil offsets in x direction (of length numPoints)
00670 // yoff    (In) - stencil offsets in y direction (of length numPoints)
00671 //   A standard 5-point finite difference stencil would be described as:
00672 //     numPoints = 5
00673 //     xoff = [-1, 1, 0,  0, 0]
00674 //     yoff = [ 0, 0, 0, -1, 1]
00675 
00676 // nsizes  (In) - Length of element size list used to create variable block map and matrix
00677 // sizes   (In) - integer list of element sizes of length nsizes
00678 //    The map associated with this VbrMatrix will be created by cycling through the sizes list.
00679 //    For example, if nsize = 3 and sizes = [ 2, 4, 3], the block map will have elementsizes
00680 //    of 2, 4, 3, 2, 4, 3, ...
00681 
00682 // nrhs - Number of rhs to generate. (First interface produces vectors, so nrhs is not needed
00683 
00684 // comm    (In) - an Epetra_Comm object describing the parallel machine (numProcs and my proc ID)
00685 // map    (Out) - Epetra_Map describing distribution of matrix and vectors/multivectors
00686 // A      (Out) - Epetra_VbrMatrix constructed for nx by ny grid using prescribed stencil
00687 //                Off-diagonal values are random between 0 and 1.  If diagonal is part of stencil,
00688 //                diagonal will be slightly diag dominant.
00689 // b      (Out) - Generated RHS.  Values satisfy b = A*xexact
00690 // bt     (Out) - Generated RHS.  Values satisfy b = A'*xexact
00691 // xexact (Out) - Generated exact solution to Ax = b and b' = A'xexact
00692 
00693 // Note: Caller of this function is responsible for deleting all output objects.
00694 
00695 void GenerateVbrProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints,
00696       int * xoff, int * yoff,
00697       int nsizes, int * sizes,
00698       const Epetra_Comm  &comm, bool verbose, bool summary,
00699       Epetra_BlockMap *& map,
00700       Epetra_VbrMatrix *& A,
00701       Epetra_Vector *& b,
00702       Epetra_Vector *& bt,
00703       Epetra_Vector *&xexact, bool StaticProfile, bool MakeLocalOnly) {
00704   
00705   Epetra_MultiVector * b1, * bt1, * xexact1;
00706   
00707   GenerateVbrProblem(numNodesX, numNodesY, numProcsX, numProcsY, numPoints,
00708          xoff, yoff, nsizes, sizes,
00709          1, comm, verbose, summary, map, A, b1, bt1, xexact1, StaticProfile, MakeLocalOnly);
00710 
00711   b = dynamic_cast<Epetra_Vector *>(b1);
00712   bt = dynamic_cast<Epetra_Vector *>(bt1);
00713   xexact = dynamic_cast<Epetra_Vector *>(xexact1);
00714 
00715   return;
00716 }
00717 
00718 void GenerateVbrProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints,
00719       int * xoff, int * yoff,
00720       int nsizes, int * sizes, int nrhs,
00721       const Epetra_Comm  &comm, bool verbose, bool summary,
00722       Epetra_BlockMap *& map,
00723       Epetra_VbrMatrix *& A,
00724       Epetra_MultiVector *& b,
00725       Epetra_MultiVector *& bt,
00726       Epetra_MultiVector *&xexact, bool StaticProfile, bool MakeLocalOnly) {
00727 
00728   int i, j;
00729 
00730   // Determine my global IDs
00731   int * myGlobalElements;
00732   GenerateMyGlobalElements(numNodesX, numNodesY, numProcsX, numProcsY, comm.MyPID(), myGlobalElements);
00733 
00734   int numMyElements = numNodesX*numNodesY;
00735 
00736   Epetra_Map ptMap(-1, numMyElements, myGlobalElements, 0, comm); // Create map with 2D block partitioning.
00737   delete [] myGlobalElements;
00738 
00739   int numGlobalEquations = ptMap.NumGlobalElements();
00740 
00741   Epetra_IntVector elementSizes(ptMap); // This vector will have the list of element sizes
00742   for (i=0; i<numMyElements; i++)
00743     elementSizes[i] = sizes[ptMap.GID(i)%nsizes]; // cycle through sizes array
00744 
00745   map = new Epetra_BlockMap(-1, numMyElements, ptMap.MyGlobalElements(), elementSizes.Values(),
00746           ptMap.IndexBase(), ptMap.Comm());
00747 
00748   int profile = 0; if (StaticProfile) profile = numPoints;
00749 
00750   if (MakeLocalOnly)
00751     A = new Epetra_VbrMatrix(Copy, *map, *map, profile); // Construct matrix rowmap=colmap
00752   else
00753     A = new Epetra_VbrMatrix(Copy, *map, profile); // Construct matrix
00754 
00755   int * indices = new int[numPoints];
00756 
00757   // This section of code creates a vector of random values that will be used to create
00758   // light-weight dense matrices to pass into the VbrMatrix construction process.
00759 
00760   int maxElementSize = 0;
00761   for (i=0; i< nsizes; i++) maxElementSize = EPETRA_MAX(maxElementSize, sizes[i]);
00762 
00763   Epetra_LocalMap lmap(maxElementSize*maxElementSize, ptMap.IndexBase(), ptMap.Comm());
00764   Epetra_Vector randvec(lmap);
00765   randvec.Random();
00766   randvec.Scale(-1.0); // Make value negative
00767   int nx = numNodesX*numProcsX;
00768 
00769 
00770   for (i=0; i<numMyElements; i++) {
00771     int rowID = map->GID(i);
00772     int numIndices = 0;
00773     int rowDim = sizes[rowID%nsizes];
00774     for (j=0; j<numPoints; j++) {
00775       int colID = rowID + xoff[j] + nx*yoff[j]; // Compute column ID based on stencil offsets
00776       if (colID>-1 && colID<numGlobalEquations)
00777   indices[numIndices++] = colID;
00778     }
00779       
00780     A->BeginInsertGlobalValues(rowID, numIndices, indices);
00781     
00782     for (j=0; j < numIndices; j++) {
00783       int colDim = sizes[indices[j]%nsizes];
00784       A->SubmitBlockEntry(&(randvec[0]), rowDim, rowDim, colDim);
00785     }
00786     A->EndSubmitEntries();
00787   }
00788 
00789   delete [] indices;
00790 
00791   A->FillComplete();
00792 
00793   // Compute the InvRowSums of the matrix rows
00794   Epetra_Vector invRowSums(A->RowMap());
00795   Epetra_Vector rowSums(A->RowMap());
00796   A->InvRowSums(invRowSums);
00797   rowSums.Reciprocal(invRowSums);
00798 
00799   // Jam the row sum values into the diagonal of the Vbr matrix (to make it diag dominant)
00800   int numBlockDiagonalEntries;
00801   int * rowColDims;
00802   int * diagoffsets = map->FirstPointInElementList();
00803   A->BeginExtractBlockDiagonalView(numBlockDiagonalEntries, rowColDims);
00804   for (i=0; i< numBlockDiagonalEntries; i++) {
00805     double * diagVals;
00806     int diagLDA;
00807     A->ExtractBlockDiagonalEntryView(diagVals, diagLDA);
00808     int rowDim = map->ElementSize(i);
00809     for (j=0; j<rowDim; j++) diagVals[j+j*diagLDA] = rowSums[diagoffsets[i]+j];
00810   }
00811 
00812   if (nrhs<=1) {
00813     b = new Epetra_Vector(*map);
00814     bt = new Epetra_Vector(*map);
00815     xexact = new Epetra_Vector(*map);
00816   }
00817   else {
00818     b = new Epetra_MultiVector(*map, nrhs);
00819     bt = new Epetra_MultiVector(*map, nrhs);
00820     xexact = new Epetra_MultiVector(*map, nrhs);
00821   }
00822 
00823   xexact->Random(); // Fill xexact with random values
00824 
00825   A->Multiply(false, *xexact, *b);
00826   A->Multiply(true, *xexact, *bt);
00827 
00828   return;
00829 }
00830 
00831 void GenerateMyGlobalElements(int numNodesX, int numNodesY, int numProcsX, int numProcs,
00832             int myPID, int * & myGlobalElements) {
00833 
00834   myGlobalElements = new int[numNodesX*numNodesY];
00835   int myProcX = myPID%numProcsX;
00836   int myProcY = myPID/numProcsX;
00837   int curGID = myProcY*(numProcsX*numNodesX)*numNodesY+myProcX*numNodesX;
00838   for (int j=0; j<numNodesY; j++) {
00839     for (int i=0; i<numNodesX; i++) {
00840       myGlobalElements[j*numNodesX+i] = curGID+i;
00841     }
00842     curGID+=numNodesX*numProcsX;
00843   }
00844   //for (int i=0; i<numNodesX*numNodesY; i++) cout << "MYPID " << myPID <<" GID "<< myGlobalElements[i] << endl;
00845 
00846   return;
00847 }
00848 
00849 void runMatrixTests(Epetra_CrsMatrix * A,  Epetra_MultiVector * b, Epetra_MultiVector * bt,
00850         Epetra_MultiVector * xexact, bool StaticProfile, bool verbose, bool summary) {
00851 
00852   Epetra_MultiVector z(*b);
00853   Epetra_MultiVector r(*b);
00854   Epetra_SerialDenseVector resvec(b->NumVectors());
00855 
00856   //Timings
00857   Epetra_Flops flopcounter;
00858   A->SetFlopCounter(flopcounter);
00859   Epetra_Time timer(A->Comm());
00860   std::string statdyn =        "dynamic";
00861   if (StaticProfile) statdyn = "static ";
00862 
00863   for (int j=0; j<4; j++) { // j = 0/2 is notrans, j = 1/3 is trans
00864 
00865     bool TransA = (j==1 || j==3);
00866     std::string contig = "without";
00867     if (j>1) contig =    "with   ";
00868 
00869 #ifdef EPETRA_SHORT_PERFTEST
00870     int kstart = 1;
00871 #else
00872     int kstart = 0;
00873 #endif
00874     for (int k=kstart; k<2; k++) { // Loop over old multiply vs. new multiply
00875 
00876       std::string oldnew = "old";
00877       if (k>0) oldnew =    "new";
00878 
00879       if (j==2) A->OptimizeStorage();
00880 
00881       flopcounter.ResetFlops();
00882       timer.ResetStartTime();
00883 
00884       if (k==0) {
00885   //10 matvecs
00886 #ifndef EPETRA_SHORT_PERFTEST
00887   for( int i = 0; i < 10; ++i )
00888     A->Multiply1(TransA, *xexact, z); // Compute z = A*xexact or z = A'*xexact using old Multiply method
00889 #endif
00890       }
00891       else {
00892   //10 matvecs
00893   for( int i = 0; i < 10; ++i )
00894     A->Multiply(TransA, *xexact, z); // Compute z = A*xexact or z = A'*xexact
00895       }
00896 
00897       double elapsed_time = timer.ElapsedTime();
00898       double total_flops = A->Flops();
00899 
00900       // Compute residual
00901       if (TransA)
00902   r.Update(-1.0, z, 1.0, *bt, 0.0); // r = bt - z
00903       else
00904   r.Update(-1.0, z, 1.0, *b, 0.0); // r = b - z
00905 
00906       r.Norm2(resvec.Values());
00907 
00908       if (verbose) cout << "ResNorm = " << resvec.NormInf() << ": ";
00909       double MFLOPs = total_flops/elapsed_time/1000000.0;
00910       if (verbose) cout << "Total MFLOPs for 10 " << oldnew << " MatVec's with " << statdyn << " Profile (Trans = " << TransA
00911       << ")  and " << contig << " optimized storage = " << MFLOPs << " (" << elapsed_time << " s)" <<endl;
00912       if (summary) {
00913   if (A->Comm().NumProc()==1) {
00914     if (TransA) cout << "TransMv" << statdyn<< "Prof" << contig << "OptStor" << '\t';
00915     else cout << "NoTransMv" << statdyn << "Prof" << contig << "OptStor" << '\t';
00916   }
00917   cout << MFLOPs << endl;
00918       }
00919     }
00920   }
00921   return;
00922 }
00923 #ifdef EPETRA_HAVE_JADMATRIX
00924 void runJadMatrixTests(Epetra_JadMatrix * A,  Epetra_MultiVector * b, Epetra_MultiVector * bt,
00925         Epetra_MultiVector * xexact, bool StaticProfile, bool verbose, bool summary) {
00926 
00927   Epetra_MultiVector z(*b);
00928   Epetra_MultiVector r(*b);
00929   Epetra_SerialDenseVector resvec(b->NumVectors());
00930 
00931   //Timings
00932   Epetra_Flops flopcounter;
00933   A->SetFlopCounter(flopcounter);
00934   Epetra_Time timer(A->Comm());
00935 
00936   for (int j=0; j<2; j++) { // j = 0 is notrans, j = 1 is trans
00937 
00938     bool TransA = (j==1);
00939     A->SetUseTranspose(TransA);
00940     flopcounter.ResetFlops();
00941     timer.ResetStartTime();
00942 
00943     //10 matvecs
00944     for( int i = 0; i < 10; ++i )
00945       A->Apply(*xexact, z); // Compute z = A*xexact or z = A'*xexact
00946 
00947     double elapsed_time = timer.ElapsedTime();
00948     double total_flops = A->Flops();
00949 
00950     // Compute residual
00951     if (TransA)
00952       r.Update(-1.0, z, 1.0, *bt, 0.0); // r = bt - z
00953     else
00954       r.Update(-1.0, z, 1.0, *b, 0.0); // r = b - z
00955 
00956     r.Norm2(resvec.Values());
00957 
00958     if (verbose) cout << "ResNorm = " << resvec.NormInf() << ": ";
00959     double MFLOPs = total_flops/elapsed_time/1000000.0;
00960     if (verbose) cout << "Total MFLOPs for 10 " << " Jagged Diagonal MatVec's with (Trans = " << TransA
00961           << ") " << MFLOPs << " (" << elapsed_time << " s)" <<endl;
00962     if (summary) {
00963       if (A->Comm().NumProc()==1) {
00964   if (TransA) cout << "TransMv" << '\t';
00965   else cout << "NoTransMv" << '\t';
00966       }
00967       cout << MFLOPs << endl;
00968     }
00969   }
00970   return;
00971 }
00972 #endif
00973 //=========================================================================================
00974 void runLUMatrixTests(Epetra_CrsMatrix * L,  Epetra_MultiVector * bL, Epetra_MultiVector * btL, Epetra_MultiVector * xexactL,
00975           Epetra_CrsMatrix * U,  Epetra_MultiVector * bU, Epetra_MultiVector * btU, Epetra_MultiVector * xexactU,
00976           bool StaticProfile, bool verbose, bool summary) {
00977 
00978   if (L->NoDiagonal()) {
00979     bL->Update(1.0, *xexactL, 1.0); // Add contribution of a unit diagonal to bL
00980     btL->Update(1.0, *xexactL, 1.0); // Add contribution of a unit diagonal to btL
00981   }
00982   if (U->NoDiagonal()) {
00983     bU->Update(1.0, *xexactU, 1.0); // Add contribution of a unit diagonal to bU
00984     btU->Update(1.0, *xexactU, 1.0); // Add contribution of a unit diagonal to btU
00985   }
00986 
00987   Epetra_MultiVector z(*bL);
00988   Epetra_MultiVector r(*bL);
00989   Epetra_SerialDenseVector resvec(bL->NumVectors());
00990 
00991   //Timings
00992   Epetra_Flops flopcounter;
00993   L->SetFlopCounter(flopcounter);
00994   U->SetFlopCounter(flopcounter);
00995   Epetra_Time timer(L->Comm());
00996   std::string statdyn =        "dynamic";
00997   if (StaticProfile) statdyn = "static ";
00998 
00999   for (int j=0; j<4; j++) { // j = 0/2 is notrans, j = 1/3 is trans
01000 
01001     bool TransA = (j==1 || j==3);
01002     std::string contig = "without";
01003     if (j>1) contig =    "with   ";
01004 
01005     if (j==2) {
01006       L->OptimizeStorage();
01007       U->OptimizeStorage();
01008     }
01009 
01010     flopcounter.ResetFlops();
01011     timer.ResetStartTime();
01012 
01013     //10 lower solves
01014     bool Upper = false;
01015     bool UnitDiagonal = L->NoDiagonal();  // If no diagonal, then unit must be used
01016     Epetra_MultiVector * b = TransA ? btL : bL;  // solve with the appropriate b vector
01017     for( int i = 0; i < 10; ++i )
01018       L->Solve(Upper, TransA, UnitDiagonal, *b, z); // Solve Lz = bL or L'z = bLt
01019 
01020     double elapsed_time = timer.ElapsedTime();
01021     double total_flops = L->Flops();
01022 
01023     // Compute residual
01024     r.Update(-1.0, z, 1.0, *xexactL, 0.0); // r = bt - z
01025     r.Norm2(resvec.Values());
01026 
01027     if (resvec.NormInf()>0.000001) {
01028       cout << "resvec = " << resvec << endl;
01029       cout << "z = " << z << endl;
01030       cout << "xexactL = " << *xexactL << endl;
01031       cout << "r = " << r << endl;
01032     }
01033 
01034     if (verbose) cout << "ResNorm = " << resvec.NormInf() << ": ";
01035     double MFLOPs = total_flops/elapsed_time/1000000.0;
01036     if (verbose) cout << "Total MFLOPs for 10 " << " Lower solves " << statdyn << " Profile (Trans = " << TransA
01037           << ")  and " << contig << " opt storage = " << MFLOPs << " (" << elapsed_time << " s)" <<endl;
01038     if (summary) {
01039       if (L->Comm().NumProc()==1) {
01040   if (TransA) cout << "TransLSv" << statdyn<< "Prof" << contig << "OptStor" << '\t';
01041   else cout << "NoTransLSv" << statdyn << "Prof" << contig << "OptStor" << '\t';
01042       }
01043       cout << MFLOPs << endl;
01044     }
01045     flopcounter.ResetFlops();
01046     timer.ResetStartTime();
01047 
01048     //10 upper solves
01049     Upper = true;
01050     UnitDiagonal = U->NoDiagonal();  // If no diagonal, then unit must be used
01051     b = TransA ? btU : bU;  // solve with the appropriate b vector
01052     for( int i = 0; i < 10; ++i )
01053       U->Solve(Upper, TransA, UnitDiagonal, *b, z); // Solve Lz = bL or L'z = bLt
01054 
01055     elapsed_time = timer.ElapsedTime();
01056     total_flops = U->Flops();
01057 
01058     // Compute residual
01059     r.Update(-1.0, z, 1.0, *xexactU, 0.0); // r = bt - z
01060     r.Norm2(resvec.Values());
01061 
01062     if (resvec.NormInf()>0.001) {
01063       cout << "U = " << *U << endl;
01064       //cout << "resvec = " << resvec << endl;
01065       cout << "z = " << z << endl;
01066       cout << "xexactU = " << *xexactU << endl;
01067       //cout << "r = " << r << endl;
01068       cout << "b = " << *b << endl;
01069     }
01070 
01071 
01072     if (verbose) cout << "ResNorm = " << resvec.NormInf() << ": ";
01073     MFLOPs = total_flops/elapsed_time/1000000.0;
01074     if (verbose) cout << "Total MFLOPs for 10 " << " Upper solves " << statdyn << " Profile (Trans = " << TransA
01075           << ")  and " << contig << " opt storage = " << MFLOPs <<endl;
01076     if (summary) {
01077       if (L->Comm().NumProc()==1) {
01078   if (TransA) cout << "TransUSv" << statdyn<< "Prof" << contig << "OptStor" << '\t';
01079   else cout << "NoTransUSv" << statdyn << "Prof" << contig << "OptStor" << '\t';
01080       }
01081       cout << MFLOPs << endl;
01082     }
01083   }
01084   return;
01085 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines