Epetra Package Browser (Single Doxygen Collection) Development
test/BasicPerfTest/cxx_main.cpp
Go to the documentation of this file.
00001 //@HEADER
00002 // ************************************************************************
00003 // 
00004 //               Epetra: Linear Algebra Services Package 
00005 //                 Copyright 2001 Sandia Corporation
00006 // 
00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00008 // the U.S. Government retains certain rights in this software.
00009 //
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00038 // 
00039 // ************************************************************************
00040 //@HEADER
00041 
00042 
00043 #define EPETRA_HAVE_JADMATRIX
00044 #define EPETRA_VERY_SHORT_PERFTEST
00045 #define EPETRA_HAVE_STATICPROFILE
00046 #include "Epetra_Map.h"
00047 #include "Epetra_LocalMap.h"
00048 #include "Epetra_BlockMap.h"
00049 #include "Epetra_Time.h"
00050 #include "Epetra_CrsMatrix.h"
00051 #include "Epetra_VbrMatrix.h"
00052 #include "Epetra_Vector.h"
00053 #include "Epetra_IntVector.h"
00054 #include "Epetra_MultiVector.h"
00055 #include "Epetra_IntSerialDenseVector.h"
00056 #include "Epetra_SerialDenseVector.h"
00057 #include "Epetra_Flops.h"
00058 #ifdef EPETRA_MPI
00059 #include "Epetra_MpiComm.h"
00060 #include "mpi.h"
00061 #else
00062 #include "Epetra_SerialComm.h"
00063 #endif
00064 #include "../epetra_test_err.h"
00065 #include "Epetra_Version.h"
00066 #ifdef EPETRA_HAVE_JADMATRIX
00067 #include "Epetra_JadMatrix.h"
00068 #endif
00069 
00070 // prototypes
00071 
00072 void GenerateCrsProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints, 
00073       int * xoff, int * yoff,
00074       const Epetra_Comm  &comm, bool verbose, bool summary, 
00075       Epetra_Map *& map, 
00076       Epetra_CrsMatrix *& A, 
00077       Epetra_Vector *& b, 
00078       Epetra_Vector *& bt,
00079       Epetra_Vector *&xexact, bool StaticProfile, bool MakeLocalOnly);
00080 
00081 void GenerateCrsProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints, 
00082       int * xoff, int * yoff, int nrhs,
00083       const Epetra_Comm  &comm, bool verbose, bool summary, 
00084       Epetra_Map *& map, 
00085       Epetra_CrsMatrix *& A, 
00086       Epetra_MultiVector *& b, 
00087       Epetra_MultiVector *& bt,
00088       Epetra_MultiVector *&xexact, bool StaticProfile, bool MakeLocalOnly);
00089  
00090 void GenerateVbrProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints, 
00091       int * xoff, int * yoff,
00092       int nsizes, int * sizes,
00093       const Epetra_Comm  &comm, bool verbose, bool summary, 
00094       Epetra_BlockMap *& map, 
00095       Epetra_VbrMatrix *& A, 
00096       Epetra_Vector *& b, 
00097       Epetra_Vector *& bt,
00098       Epetra_Vector *&xexact, bool StaticProfile, bool MakeLocalOnly);
00099 
00100 void GenerateVbrProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints, 
00101       int * xoff, int * yoff, 
00102       int nsizes, int * sizes, int nrhs,
00103       const Epetra_Comm  &comm, bool verbose, bool summary, 
00104       Epetra_BlockMap *& map, 
00105       Epetra_VbrMatrix *& A, 
00106       Epetra_MultiVector *& b, 
00107       Epetra_MultiVector *& bt,
00108       Epetra_MultiVector *&xexact, bool StaticProfile, bool MakeLocalOnly);
00109 
00110 void GenerateMyGlobalElements(int numNodesX, int numNodesY, int numProcsX, int numProcs,
00111             int myPID, int * & myGlobalElements);
00112 
00113 void runMatrixTests(Epetra_CrsMatrix * A,  Epetra_MultiVector * b, Epetra_MultiVector * bt,
00114         Epetra_MultiVector * xexact, bool StaticProfile, bool verbose, bool summary);
00115 #ifdef EPETRA_HAVE_JADMATRIX
00116 void runJadMatrixTests(Epetra_JadMatrix * A,  Epetra_MultiVector * b, Epetra_MultiVector * bt,
00117         Epetra_MultiVector * xexact, bool StaticProfile, bool verbose, bool summary);
00118 #endif
00119 void runLUMatrixTests(Epetra_CrsMatrix * L,  Epetra_MultiVector * bL, Epetra_MultiVector * btL, Epetra_MultiVector * xexactL, 
00120           Epetra_CrsMatrix * U,  Epetra_MultiVector * bU, Epetra_MultiVector * btU, Epetra_MultiVector * xexactU, 
00121           bool StaticProfile, bool verbose, bool summary);
00122 int main(int argc, char *argv[])
00123 {
00124   int ierr = 0;
00125   double elapsed_time;
00126   double total_flops;
00127   double MFLOPs;
00128     
00129 
00130 #ifdef EPETRA_MPI
00131 
00132   // Initialize MPI
00133   MPI_Init(&argc,&argv);
00134   Epetra_MpiComm comm( MPI_COMM_WORLD );
00135 #else
00136   Epetra_SerialComm comm;
00137 #endif
00138 
00139   bool verbose = false;
00140   bool summary = false;
00141 
00142   // Check if we should print verbose results to standard out
00143   if (argc>6) if (argv[6][0]=='-' && argv[6][1]=='v') verbose = true;
00144 
00145   // Check if we should print verbose results to standard out
00146   if (argc>6) if (argv[6][0]=='-' && argv[6][1]=='s') summary = true;
00147 
00148   if(argc < 6) {
00149     cerr << "Usage: " << argv[0]
00150          << " NumNodesX NumNodesY NumProcX NumProcY NumPoints [-v|-s]" << endl
00151          << "where:" << endl
00152          << "NumNodesX         - Number of mesh nodes in X direction per processor" << endl
00153          << "NumNodesY         - Number of mesh nodes in Y direction per processor" << endl
00154          << "NumProcX          - Number of processors to use in X direction" << endl
00155          << "NumProcY          - Number of processors to use in Y direction" << endl
00156          << "NumPoints         - Number of points to use in stencil (5, 9 or 25 only)" << endl
00157          << "-v|-s             - (Optional) Run in verbose mode if -v present or summary mode if -s present" << endl
00158          << " NOTES: NumProcX*NumProcY must equal the number of processors used to run the problem." << endl << endl
00159    << " Serial example:" << endl
00160          << argv[0] << " 16 12 1 1 25 -v" << endl
00161    << " Run this program in verbose mode on 1 processor using a 16 X 12 grid with a 25 point stencil."<< endl <<endl
00162    << " MPI example:" << endl
00163          << "mpirun -np 32 " << argv[0] << " 10 12 4 8 9 -v" << endl
00164    << " Run this program in verbose mode on 32 processors putting a 10 X 12 subgrid on each processor using 4 processors "<< endl
00165    << " in the X direction and 8 in the Y direction.  Total grid size is 40 points in X and 96 in Y with a 9 point stencil."<< endl
00166          << endl;
00167     return(1);
00168 
00169   }
00170     //char tmp;
00171     //if (comm.MyPID()==0) cout << "Press any key to continue..."<< endl;
00172     //if (comm.MyPID()==0) cin >> tmp;
00173     //comm.Barrier();
00174 
00175   comm.SetTracebackMode(0); // This should shut down any error traceback reporting
00176   if (verbose && comm.MyPID()==0)
00177     cout << Epetra_Version() << endl << endl;
00178   if (summary && comm.MyPID()==0) {
00179     if (comm.NumProc()==1)
00180       cout << Epetra_Version() << endl << endl;
00181     else
00182       cout << endl << endl; // Print two blank line to keep output columns lined up
00183   }
00184 
00185   if (verbose) cout << comm <<endl;
00186 
00187 
00188   // Redefine verbose to only print on PE 0
00189 
00190   if (verbose && comm.MyPID()!=0) verbose = false;
00191   if (summary && comm.MyPID()!=0) summary = false;
00192 
00193   int numNodesX = atoi(argv[1]);
00194   int numNodesY = atoi(argv[2]);
00195   int numProcsX = atoi(argv[3]);
00196   int numProcsY = atoi(argv[4]);
00197   int numPoints = atoi(argv[5]);
00198 
00199   if (verbose || (summary && comm.NumProc()==1)) {
00200     cout << " Number of local nodes in X direction  = " << numNodesX << endl
00201    << " Number of local nodes in Y direction  = " << numNodesY << endl
00202    << " Number of global nodes in X direction = " << numNodesX*numProcsX << endl
00203    << " Number of global nodes in Y direction = " << numNodesY*numProcsY << endl
00204    << " Number of local nonzero entries       = " << numNodesX*numNodesY*numPoints << endl
00205    << " Number of global nonzero entries      = " << numNodesX*numNodesY*numPoints*numProcsX*numProcsY << endl
00206    << " Number of Processors in X direction   = " << numProcsX << endl
00207    << " Number of Processors in Y direction   = " << numProcsY << endl
00208    << " Number of Points in stencil           = " << numPoints << endl << endl;
00209   }
00210   // Print blank line to keep output columns lined up
00211   if (summary && comm.NumProc()>1)
00212     cout << endl << endl << endl << endl << endl << endl << endl << endl<< endl << endl;
00213 
00214   if (numProcsX*numProcsY!=comm.NumProc()) {
00215     cerr << "Number of processors = " << comm.NumProc() << endl
00216    << " is not the product of " << numProcsX << " and " << numProcsY << endl << endl;
00217     return(1);
00218   }
00219 
00220   if (numPoints!=5 && numPoints!=9 && numPoints!=25) {
00221     cerr << "Number of points specified = " << numPoints << endl
00222    << " is not 5, 9, 25" << endl << endl;
00223     return(1);
00224   }
00225 
00226   if (numNodesX*numNodesY<=0) {
00227     cerr << "Product of number of nodes is <= zero" << endl << endl;
00228     return(1);
00229   }
00230 
00231   Epetra_IntSerialDenseVector Xoff, XLoff, XUoff;
00232   Epetra_IntSerialDenseVector Yoff, YLoff, YUoff;
00233   if (numPoints==5) {
00234 
00235      // Generate a 5-point 2D Finite Difference matrix
00236     Xoff.Size(5);
00237     Yoff.Size(5);
00238     Xoff[0] = -1; Xoff[1] = 1; Xoff[2] = 0; Xoff[3] = 0;  Xoff[4] = 0; 
00239     Yoff[0] = 0;  Yoff[1] = 0; Yoff[2] = 0; Yoff[3] = -1; Yoff[4] = 1; 
00240 
00241      // Generate a 2-point 2D Lower triangular Finite Difference matrix
00242     XLoff.Size(2);
00243     YLoff.Size(2);
00244     XLoff[0] = -1; XLoff[1] =  0; 
00245     YLoff[0] =  0; YLoff[1] = -1;
00246 
00247      // Generate a 3-point 2D upper triangular Finite Difference matrix
00248     XUoff.Size(3);
00249     YUoff.Size(3);
00250     XUoff[0] =  0; XUoff[1] =  1; XUoff[2] = 0; 
00251     YUoff[0] =  0; YUoff[1] =  0; YUoff[2] = 1;
00252   }
00253   else if (numPoints==9) {
00254     // Generate a 9-point 2D Finite Difference matrix
00255     Xoff.Size(9);
00256     Yoff.Size(9);
00257     Xoff[0] = -1;  Xoff[1] =  0; Xoff[2] =  1; 
00258     Yoff[0] = -1;  Yoff[1] = -1; Yoff[2] = -1; 
00259     Xoff[3] = -1;  Xoff[4] =  0; Xoff[5] =  1; 
00260     Yoff[3] =  0;  Yoff[4] =  0; Yoff[5] =  0; 
00261     Xoff[6] = -1;  Xoff[7] =  0; Xoff[8] =  1; 
00262     Yoff[6] =  1;  Yoff[7] =  1; Yoff[8] =  1; 
00263 
00264     // Generate a 5-point lower triangular 2D Finite Difference matrix
00265     XLoff.Size(5);
00266     YLoff.Size(5);
00267     XLoff[0] = -1;  XLoff[1] =  0; Xoff[2] =  1; 
00268     YLoff[0] = -1;  YLoff[1] = -1; Yoff[2] = -1; 
00269     XLoff[3] = -1;  XLoff[4] =  0; 
00270     YLoff[3] =  0;  YLoff[4] =  0;
00271 
00272     // Generate a 4-point upper triangular 2D Finite Difference matrix
00273     XUoff.Size(4);
00274     YUoff.Size(4);
00275     XUoff[0] =  1; 
00276     YUoff[0] =  0; 
00277     XUoff[1] = -1;  XUoff[2] =  0; XUoff[3] =  1; 
00278     YUoff[1] =  1;  YUoff[2] =  1; YUoff[3] =  1; 
00279 
00280   }
00281   else {
00282     // Generate a 25-point 2D Finite Difference matrix
00283     Xoff.Size(25);
00284     Yoff.Size(25);
00285     int xi = 0, yi = 0;
00286     int xo = -2, yo = -2;
00287     Xoff[xi++] = xo++;  Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++;
00288     Yoff[yi++] = yo  ;  Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; 
00289     xo = -2, yo++;
00290     Xoff[xi++] = xo++;  Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++;
00291     Yoff[yi++] = yo  ;  Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; 
00292     xo = -2, yo++;
00293     Xoff[xi++] = xo++;  Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++;
00294     Yoff[yi++] = yo  ;  Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; 
00295     xo = -2, yo++;
00296     Xoff[xi++] = xo++;  Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++;
00297     Yoff[yi++] = yo  ;  Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; 
00298     xo = -2, yo++;
00299     Xoff[xi++] = xo++;  Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++;
00300     Yoff[yi++] = yo  ;  Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; 
00301 
00302     // Generate a 13-point lower triangular 2D Finite Difference matrix
00303     XLoff.Size(13);
00304     YLoff.Size(13);
00305     xi = 0, yi = 0;
00306     xo = -2, yo = -2;
00307     XLoff[xi++] = xo++;  XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++;
00308     YLoff[yi++] = yo  ;  YLoff[yi++] = yo  ; YLoff[yi++] = yo  ; YLoff[yi++] = yo  ; YLoff[yi++] = yo  ; 
00309     xo = -2, yo++;
00310     XLoff[xi++] = xo++;  XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++;
00311     YLoff[yi++] = yo  ;  YLoff[yi++] = yo  ; YLoff[yi++] = yo  ; YLoff[yi++] = yo  ; YLoff[yi++] = yo  ; 
00312     xo = -2, yo++;
00313     XLoff[xi++] = xo++;  XLoff[xi++] = xo++; XLoff[xi++] = xo++;
00314     YLoff[yi++] = yo  ;  YLoff[yi++] = yo  ; YLoff[yi++] = yo  ;
00315 
00316     // Generate a 13-point upper triangular 2D Finite Difference matrix
00317     XUoff.Size(13);
00318     YUoff.Size(13);
00319     xi = 0, yi = 0;
00320     xo = 0, yo = 0;
00321     XUoff[xi++] = xo++;  XUoff[xi++] = xo++; XUoff[xi++] = xo++;
00322     YUoff[yi++] = yo  ;  YUoff[yi++] = yo  ; YUoff[yi++] = yo  ; 
00323     xo = -2, yo++;
00324     XUoff[xi++] = xo++;  XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++;
00325     YUoff[yi++] = yo  ;  YUoff[yi++] = yo  ; YUoff[yi++] = yo  ; YUoff[yi++] = yo  ; YUoff[yi++] = yo  ; 
00326     xo = -2, yo++;
00327     XUoff[xi++] = xo++;  XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++;
00328     YUoff[yi++] = yo  ;  YUoff[yi++] = yo  ; YUoff[yi++] = yo  ; YUoff[yi++] = yo  ; YUoff[yi++] = yo  ; 
00329 
00330   }
00331 
00332   Epetra_Map * map;
00333   Epetra_Map * mapL;
00334   Epetra_Map * mapU;
00335   Epetra_CrsMatrix * A;
00336   Epetra_CrsMatrix * L;
00337   Epetra_CrsMatrix * U;
00338   Epetra_MultiVector * b;
00339   Epetra_MultiVector * bt;
00340   Epetra_MultiVector * xexact;
00341   Epetra_MultiVector * bL;
00342   Epetra_MultiVector * btL;
00343   Epetra_MultiVector * xexactL;
00344   Epetra_MultiVector * bU;
00345   Epetra_MultiVector * btU;
00346   Epetra_MultiVector * xexactU;
00347   Epetra_SerialDenseVector resvec(0);
00348 
00349   //Timings
00350   Epetra_Flops flopcounter;
00351   Epetra_Time timer(comm);
00352 
00353 #ifdef EPETRA_VERY_SHORT_PERFTEST
00354   int jstop = 1;
00355 #elif EPETRA_SHORT_PERFTEST
00356   int jstop = 1;
00357 #else
00358   int jstop = 2;
00359 #endif
00360   for (int j=0; j<jstop; j++) {
00361     for (int k=1; k<17; k++) {
00362 #ifdef EPETRA_VERY_SHORT_PERFTEST
00363       if (k<3 || (k%4==0 && k<9)) {
00364 #elif EPETRA_SHORT_PERFTEST
00365       if (k<6 || k%4==0) {
00366 #else
00367       if (k<7 || k%2==0) {
00368 #endif
00369       int nrhs=k;
00370       if (verbose) cout << "\n*************** Results for " << nrhs << " RHS with ";
00371 
00372       bool StaticProfile = (j!=0);
00373       if (verbose) 
00374   if (StaticProfile) cout << " static profile\n";
00375   else cout << " dynamic profile\n";
00376       
00377       GenerateCrsProblem(numNodesX, numNodesY, numProcsX, numProcsY, numPoints,
00378        Xoff.Values(), Yoff.Values(), nrhs, comm, verbose, summary,
00379        map, A, b, bt, xexact, StaticProfile, false);
00380 
00381       
00382 #ifdef EPETRA_HAVE_JADMATRIX
00383       
00384       timer.ResetStartTime();
00385       Epetra_JadMatrix JA(*A);
00386       elapsed_time = timer.ElapsedTime();
00387       if (verbose) cout << "Time to create Jagged diagonal matrix = " << elapsed_time << endl;
00388 
00389       //cout << "A = " << *A << endl;
00390       //cout << "JA = " << JA << endl;
00391 
00392       runJadMatrixTests(&JA, b, bt, xexact, StaticProfile, verbose, summary);
00393 
00394 #endif
00395       runMatrixTests(A, b, bt, xexact, StaticProfile, verbose, summary);
00396 
00397       delete A;
00398       delete b;
00399       delete bt; 
00400       delete xexact;
00401 
00402       GenerateCrsProblem(numNodesX, numNodesY, numProcsX, numProcsY, XLoff.Length(),
00403        XLoff.Values(), YLoff.Values(), nrhs, comm, verbose, summary,
00404        mapL, L, bL, btL, xexactL, StaticProfile, true);
00405       
00406 
00407       GenerateCrsProblem(numNodesX, numNodesY, numProcsX, numProcsY, XUoff.Length(),
00408        XUoff.Values(), YUoff.Values(), nrhs, comm, verbose, summary,
00409        mapU, U, bU, btU, xexactU, StaticProfile, true);
00410       
00411 
00412       runLUMatrixTests(L, bL, btL, xexactL, U, bU, btU, xexactU, StaticProfile, verbose, summary);
00413 
00414       delete L;
00415       delete bL;
00416       delete btL; 
00417       delete xexactL;
00418       delete mapL;
00419 
00420       delete U;
00421       delete bU;
00422       delete btU; 
00423       delete xexactU;
00424       delete mapU;
00425 
00426       Epetra_MultiVector q(*map, nrhs);
00427       Epetra_MultiVector z(q);
00428       Epetra_MultiVector r(q);
00429       
00430       delete map;
00431       q.SetFlopCounter(flopcounter);
00432       z.SetFlopCounter(q);
00433       r.SetFlopCounter(q);
00434 
00435       resvec.Resize(nrhs);
00436       
00437     
00438       flopcounter.ResetFlops();
00439       timer.ResetStartTime();
00440 
00441       //10 norms
00442       for( int i = 0; i < 10; ++i )
00443   q.Norm2( resvec.Values() );
00444 
00445       elapsed_time = timer.ElapsedTime();
00446       total_flops = q.Flops();
00447       MFLOPs = total_flops/elapsed_time/1000000.0;
00448       if (verbose) cout << "\nTotal MFLOPs for 10 Norm2's= " << MFLOPs << endl;
00449       
00450       if (summary) {
00451   if (comm.NumProc()==1) cout << "Norm2" << '\t';
00452   cout << MFLOPs << endl;
00453       }
00454       
00455       flopcounter.ResetFlops();
00456       timer.ResetStartTime();
00457       
00458       //10 dot's
00459       for( int i = 0; i < 10; ++i )
00460   q.Dot(z, resvec.Values());
00461       
00462       elapsed_time = timer.ElapsedTime();
00463       total_flops = q.Flops();
00464       MFLOPs = total_flops/elapsed_time/1000000.0;
00465       if (verbose) cout << "Total MFLOPs for 10 Dot's  = " << MFLOPs << endl;
00466       
00467       if (summary) {
00468   if (comm.NumProc()==1) cout << "DotProd" << '\t';
00469   cout << MFLOPs << endl;
00470       }
00471       
00472       flopcounter.ResetFlops();
00473       timer.ResetStartTime();
00474       
00475       //10 dot's
00476       for( int i = 0; i < 10; ++i )
00477   q.Update(1.0, z, 1.0, r, 0.0);
00478       
00479       elapsed_time = timer.ElapsedTime();
00480       total_flops = q.Flops();
00481       MFLOPs = total_flops/elapsed_time/1000000.0;
00482       if (verbose) cout << "Total MFLOPs for 10 Updates= " << MFLOPs << endl;
00483       
00484       if (summary) {
00485   if (comm.NumProc()==1) cout << "Update" << '\t';
00486   cout << MFLOPs << endl;
00487       }
00488     }
00489     }
00490   }
00491 #ifdef EPETRA_MPI
00492   MPI_Finalize() ;
00493 #endif
00494 
00495 return ierr ;
00496 }
00497 
00498 // Constructs a 2D PDE finite difference matrix using the list of x and y offsets.
00499 // 
00500 // nx      (In) - number of grid points in x direction
00501 // ny      (In) - number of grid points in y direction
00502 //   The total number of equations will be nx*ny ordered such that the x direction changes
00503 //   most rapidly: 
00504 //      First equation is at point (0,0)
00505 //      Second at                  (1,0)
00506 //       ...
00507 //      nx equation at             (nx-1,0)
00508 //      nx+1st equation at         (0,1)
00509 
00510 // numPoints (In) - number of points in finite difference stencil
00511 // xoff    (In) - stencil offsets in x direction (of length numPoints)
00512 // yoff    (In) - stencil offsets in y direction (of length numPoints)
00513 //   A standard 5-point finite difference stencil would be described as:
00514 //     numPoints = 5
00515 //     xoff = [-1, 1, 0,  0, 0]
00516 //     yoff = [ 0, 0, 0, -1, 1]
00517 
00518 // nrhs - Number of rhs to generate. (First interface produces vectors, so nrhs is not needed
00519 
00520 // comm    (In) - an Epetra_Comm object describing the parallel machine (numProcs and my proc ID)
00521 // map    (Out) - Epetra_Map describing distribution of matrix and vectors/multivectors
00522 // A      (Out) - Epetra_CrsMatrix constructed for nx by ny grid using prescribed stencil
00523 //                Off-diagonal values are random between 0 and 1.  If diagonal is part of stencil,
00524 //                diagonal will be slightly diag dominant.
00525 // b      (Out) - Generated RHS.  Values satisfy b = A*xexact
00526 // bt     (Out) - Generated RHS.  Values satisfy b = A'*xexact
00527 // xexact (Out) - Generated exact solution to Ax = b and b' = A'xexact
00528 
00529 // Note: Caller of this function is responsible for deleting all output objects.
00530 
00531 void GenerateCrsProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints, 
00532       int * xoff, int * yoff,
00533       const Epetra_Comm  &comm, bool verbose, bool summary, 
00534       Epetra_Map *& map, 
00535       Epetra_CrsMatrix *& A, 
00536       Epetra_Vector *& b, 
00537       Epetra_Vector *& bt,
00538       Epetra_Vector *&xexact, bool StaticProfile, bool MakeLocalOnly) {
00539 
00540   Epetra_MultiVector * b1, * bt1, * xexact1;
00541   
00542   GenerateCrsProblem(numNodesX, numNodesY, numProcsX, numProcsY, numPoints, 
00543          xoff, yoff, 1, comm, verbose, summary, 
00544          map, A, b1, bt1, xexact1, StaticProfile, MakeLocalOnly);
00545 
00546   b = dynamic_cast<Epetra_Vector *>(b1);
00547   bt = dynamic_cast<Epetra_Vector *>(bt1);
00548   xexact = dynamic_cast<Epetra_Vector *>(xexact1);
00549 
00550   return;
00551 }
00552 
00553 void GenerateCrsProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints, 
00554       int * xoff, int * yoff, int nrhs,
00555       const Epetra_Comm  &comm, bool verbose, bool summary,
00556       Epetra_Map *& map, 
00557       Epetra_CrsMatrix *& A, 
00558       Epetra_MultiVector *& b, 
00559       Epetra_MultiVector *& bt,
00560       Epetra_MultiVector *&xexact, bool StaticProfile, bool MakeLocalOnly) {
00561   
00562   Epetra_Time timer(comm);
00563   // Determine my global IDs
00564   int * myGlobalElements;
00565   GenerateMyGlobalElements(numNodesX, numNodesY, numProcsX, numProcsY, comm.MyPID(), myGlobalElements);
00566 
00567   int numMyEquations = numNodesX*numNodesY;
00568   
00569   map = new Epetra_Map(-1, numMyEquations, myGlobalElements, 0, comm); // Create map with 2D block partitioning.
00570   delete [] myGlobalElements;
00571 
00572   int numGlobalEquations = map->NumGlobalElements();
00573 
00574   int profile = 0; if (StaticProfile) profile = numPoints;
00575 
00576 #ifdef EPETRA_HAVE_STATICPROFILE
00577 
00578   if (MakeLocalOnly) 
00579     A = new Epetra_CrsMatrix(Copy, *map, *map, profile, StaticProfile); // Construct matrix with rowmap=colmap
00580   else
00581     A = new Epetra_CrsMatrix(Copy, *map, profile, StaticProfile); // Construct matrix
00582 
00583 #else
00584 
00585   if (MakeLocalOnly) 
00586     A = new Epetra_CrsMatrix(Copy, *map, *map, profile); // Construct matrix with rowmap=colmap
00587   else
00588     A = new Epetra_CrsMatrix(Copy, *map, profile); // Construct matrix
00589 
00590 #endif
00591 
00592   int * indices = new int[numPoints];
00593   double * values = new double[numPoints];
00594 
00595   double dnumPoints = (double) numPoints;
00596   int nx = numNodesX*numProcsX;
00597 
00598   for (int i=0; i<numMyEquations; i++) {
00599 
00600     int rowID = map->GID(i);
00601     int numIndices = 0;
00602 
00603     for (int j=0; j<numPoints; j++) {
00604       int colID = rowID + xoff[j] + nx*yoff[j]; // Compute column ID based on stencil offsets
00605       if (colID>-1 && colID<numGlobalEquations) {
00606   indices[numIndices] = colID;
00607   double value = - ((double) rand())/ ((double) RAND_MAX);
00608   if (colID==rowID)
00609     values[numIndices++] = dnumPoints - value; // Make diagonal dominant
00610   else
00611     values[numIndices++] = value;
00612       }
00613     }
00614     //cout << "Building row " << rowID << endl;
00615     A->InsertGlobalValues(rowID, numIndices, values, indices);
00616   }
00617 
00618   delete [] indices;
00619   delete [] values;
00620   double insertTime = timer.ElapsedTime();
00621   timer.ResetStartTime();
00622   A->FillComplete(false);
00623   double fillCompleteTime = timer.ElapsedTime();
00624 
00625   if (verbose)
00626     cout << "Time to insert matrix values = " << insertTime << endl
00627    << "Time to complete fill        = " << fillCompleteTime << endl;
00628   if (summary) {
00629     if (comm.NumProc()==1) cout << "InsertTime" << '\t';
00630     cout << insertTime << endl;
00631     if (comm.NumProc()==1) cout << "FillCompleteTime" << '\t';
00632     cout << fillCompleteTime << endl;
00633   }
00634 
00635   if (nrhs<=1) {  
00636     b = new Epetra_Vector(*map);
00637     bt = new Epetra_Vector(*map);
00638     xexact = new Epetra_Vector(*map);
00639   }
00640   else {
00641     b = new Epetra_MultiVector(*map, nrhs);
00642     bt = new Epetra_MultiVector(*map, nrhs);
00643     xexact = new Epetra_MultiVector(*map, nrhs);
00644   }
00645 
00646   xexact->Random(); // Fill xexact with random values
00647 
00648   A->Multiply(false, *xexact, *b);
00649   A->Multiply(true, *xexact, *bt);
00650 
00651   return;
00652 }
00653 
00654 
00655 // Constructs a 2D PDE finite difference matrix using the list of x and y offsets.
00656 // 
00657 // nx      (In) - number of grid points in x direction
00658 // ny      (In) - number of grid points in y direction
00659 //   The total number of equations will be nx*ny ordered such that the x direction changes
00660 //   most rapidly: 
00661 //      First equation is at point (0,0)
00662 //      Second at                  (1,0)
00663 //       ...
00664 //      nx equation at             (nx-1,0)
00665 //      nx+1st equation at         (0,1)
00666 
00667 // numPoints (In) - number of points in finite difference stencil
00668 // xoff    (In) - stencil offsets in x direction (of length numPoints)
00669 // yoff    (In) - stencil offsets in y direction (of length numPoints)
00670 //   A standard 5-point finite difference stencil would be described as:
00671 //     numPoints = 5
00672 //     xoff = [-1, 1, 0,  0, 0]
00673 //     yoff = [ 0, 0, 0, -1, 1]
00674 
00675 // nsizes  (In) - Length of element size list used to create variable block map and matrix
00676 // sizes   (In) - integer list of element sizes of length nsizes
00677 //    The map associated with this VbrMatrix will be created by cycling through the sizes list.
00678 //    For example, if nsize = 3 and sizes = [ 2, 4, 3], the block map will have elementsizes
00679 //    of 2, 4, 3, 2, 4, 3, ...
00680 
00681 // nrhs - Number of rhs to generate. (First interface produces vectors, so nrhs is not needed
00682 
00683 // comm    (In) - an Epetra_Comm object describing the parallel machine (numProcs and my proc ID)
00684 // map    (Out) - Epetra_Map describing distribution of matrix and vectors/multivectors
00685 // A      (Out) - Epetra_VbrMatrix constructed for nx by ny grid using prescribed stencil
00686 //                Off-diagonal values are random between 0 and 1.  If diagonal is part of stencil,
00687 //                diagonal will be slightly diag dominant.
00688 // b      (Out) - Generated RHS.  Values satisfy b = A*xexact
00689 // bt     (Out) - Generated RHS.  Values satisfy b = A'*xexact
00690 // xexact (Out) - Generated exact solution to Ax = b and b' = A'xexact
00691 
00692 // Note: Caller of this function is responsible for deleting all output objects.
00693 
00694 void GenerateVbrProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints, 
00695       int * xoff, int * yoff,
00696       int nsizes, int * sizes,
00697       const Epetra_Comm  &comm, bool verbose, bool summary, 
00698       Epetra_BlockMap *& map, 
00699       Epetra_VbrMatrix *& A, 
00700       Epetra_Vector *& b, 
00701       Epetra_Vector *& bt,
00702       Epetra_Vector *&xexact, bool StaticProfile, bool MakeLocalOnly) {
00703   
00704   Epetra_MultiVector * b1, * bt1, * xexact1;
00705   
00706   GenerateVbrProblem(numNodesX, numNodesY, numProcsX, numProcsY, numPoints,
00707          xoff, yoff, nsizes, sizes,
00708          1, comm, verbose, summary, map, A, b1, bt1, xexact1, StaticProfile, MakeLocalOnly);
00709 
00710   b = dynamic_cast<Epetra_Vector *>(b1);
00711   bt = dynamic_cast<Epetra_Vector *>(bt1);
00712   xexact = dynamic_cast<Epetra_Vector *>(xexact1);
00713 
00714   return;
00715 }
00716 
00717 void GenerateVbrProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints, 
00718       int * xoff, int * yoff, 
00719       int nsizes, int * sizes, int nrhs,
00720       const Epetra_Comm  &comm, bool verbose, bool summary, 
00721       Epetra_BlockMap *& map, 
00722       Epetra_VbrMatrix *& A, 
00723       Epetra_MultiVector *& b, 
00724       Epetra_MultiVector *& bt,
00725       Epetra_MultiVector *&xexact, bool StaticProfile, bool MakeLocalOnly) {
00726 
00727   int i, j;
00728 
00729   // Determine my global IDs
00730   int * myGlobalElements;
00731   GenerateMyGlobalElements(numNodesX, numNodesY, numProcsX, numProcsY, comm.MyPID(), myGlobalElements);
00732 
00733   int numMyElements = numNodesX*numNodesY;
00734   
00735   Epetra_Map ptMap(-1, numMyElements, myGlobalElements, 0, comm); // Create map with 2D block partitioning.
00736   delete [] myGlobalElements;
00737 
00738   int numGlobalEquations = ptMap.NumGlobalElements();
00739 
00740   Epetra_IntVector elementSizes(ptMap); // This vector will have the list of element sizes
00741   for (i=0; i<numMyElements; i++) 
00742     elementSizes[i] = sizes[ptMap.GID(i)%nsizes]; // cycle through sizes array
00743 
00744   map = new Epetra_BlockMap(-1, numMyElements, ptMap.MyGlobalElements(), elementSizes.Values(),
00745           ptMap.IndexBase(), ptMap.Comm());
00746 
00747   int profile = 0; if (StaticProfile) profile = numPoints;
00748   
00749   if (MakeLocalOnly) 
00750     A = new Epetra_VbrMatrix(Copy, *map, *map, profile); // Construct matrix rowmap=colmap
00751   else
00752     A = new Epetra_VbrMatrix(Copy, *map, profile); // Construct matrix
00753 
00754   int * indices = new int[numPoints];
00755 
00756   // This section of code creates a vector of random values that will be used to create
00757   // light-weight dense matrices to pass into the VbrMatrix construction process.
00758 
00759   int maxElementSize = 0;
00760   for (i=0; i< nsizes; i++) maxElementSize = EPETRA_MAX(maxElementSize, sizes[i]);
00761 
00762   Epetra_LocalMap lmap(maxElementSize*maxElementSize, ptMap.IndexBase(), ptMap.Comm());
00763   Epetra_Vector randvec(lmap);
00764   randvec.Random();
00765   randvec.Scale(-1.0); // Make value negative
00766   int nx = numNodesX*numProcsX;
00767 
00768 
00769   for (i=0; i<numMyElements; i++) {
00770     int rowID = map->GID(i);
00771     int numIndices = 0;
00772     int rowDim = sizes[rowID%nsizes];
00773     for (j=0; j<numPoints; j++) {
00774       int colID = rowID + xoff[j] + nx*yoff[j]; // Compute column ID based on stencil offsets
00775       if (colID>-1 && colID<numGlobalEquations)
00776   indices[numIndices++] = colID;
00777     }
00778       
00779     A->BeginInsertGlobalValues(rowID, numIndices, indices);
00780     
00781     for (j=0; j < numIndices; j++) {
00782       int colDim = sizes[indices[j]%nsizes];
00783       A->SubmitBlockEntry(&(randvec[0]), rowDim, rowDim, colDim);
00784     }
00785     A->EndSubmitEntries();
00786   }
00787 
00788   delete [] indices;
00789 
00790   A->FillComplete();
00791 
00792   // Compute the InvRowSums of the matrix rows
00793   Epetra_Vector invRowSums(A->RowMap());
00794   Epetra_Vector rowSums(A->RowMap());
00795   A->InvRowSums(invRowSums);
00796   rowSums.Reciprocal(invRowSums);
00797 
00798   // Jam the row sum values into the diagonal of the Vbr matrix (to make it diag dominant)
00799   int numBlockDiagonalEntries;
00800   int * rowColDims;
00801   int * diagoffsets = map->FirstPointInElementList();
00802   A->BeginExtractBlockDiagonalView(numBlockDiagonalEntries, rowColDims);
00803   for (i=0; i< numBlockDiagonalEntries; i++) {
00804     double * diagVals;
00805     int diagLDA;
00806     A->ExtractBlockDiagonalEntryView(diagVals, diagLDA);
00807     int rowDim = map->ElementSize(i);
00808     for (j=0; j<rowDim; j++) diagVals[j+j*diagLDA] = rowSums[diagoffsets[i]+j];
00809   }
00810 
00811   if (nrhs<=1) {  
00812     b = new Epetra_Vector(*map);
00813     bt = new Epetra_Vector(*map);
00814     xexact = new Epetra_Vector(*map);
00815   }
00816   else {
00817     b = new Epetra_MultiVector(*map, nrhs);
00818     bt = new Epetra_MultiVector(*map, nrhs);
00819     xexact = new Epetra_MultiVector(*map, nrhs);
00820   }
00821 
00822   xexact->Random(); // Fill xexact with random values
00823 
00824   A->Multiply(false, *xexact, *b);
00825   A->Multiply(true, *xexact, *bt);
00826 
00827   return;
00828 }
00829 
00830 void GenerateMyGlobalElements(int numNodesX, int numNodesY, int numProcsX, int numProcs,
00831             int myPID, int * & myGlobalElements) {
00832 
00833   myGlobalElements = new int[numNodesX*numNodesY];
00834   int myProcX = myPID%numProcsX;
00835   int myProcY = myPID/numProcsX;
00836   int curGID = myProcY*(numProcsX*numNodesX)*numNodesY+myProcX*numNodesX;
00837   for (int j=0; j<numNodesY; j++) {
00838     for (int i=0; i<numNodesX; i++) {
00839       myGlobalElements[j*numNodesX+i] = curGID+i;
00840     }
00841     curGID+=numNodesX*numProcsX;
00842   }
00843   //for (int i=0; i<numNodesX*numNodesY; i++) cout << "MYPID " << myPID <<" GID "<< myGlobalElements[i] << endl;
00844   
00845   return;
00846 }
00847 
00848 void runMatrixTests(Epetra_CrsMatrix * A,  Epetra_MultiVector * b, Epetra_MultiVector * bt,
00849         Epetra_MultiVector * xexact, bool StaticProfile, bool verbose, bool summary) {
00850 
00851   Epetra_MultiVector z(*b);
00852   Epetra_MultiVector r(*b);
00853   Epetra_SerialDenseVector resvec(b->NumVectors());
00854 
00855   //Timings
00856   Epetra_Flops flopcounter;
00857   A->SetFlopCounter(flopcounter);
00858   Epetra_Time timer(A->Comm());
00859   std::string statdyn =        "dynamic";
00860   if (StaticProfile) statdyn = "static ";
00861 
00862   for (int j=0; j<4; j++) { // j = 0/2 is notrans, j = 1/3 is trans
00863     
00864     bool TransA = (j==1 || j==3);
00865     std::string contig = "without";
00866     if (j>1) contig =    "with   ";
00867     
00868 #ifdef EPETRA_SHORT_PERFTEST
00869     int kstart = 1;
00870 #else
00871     int kstart = 0;
00872 #endif
00873     for (int k=kstart; k<2; k++) { // Loop over old multiply vs. new multiply
00874       
00875       std::string oldnew = "old";
00876       if (k>0) oldnew =    "new";
00877 
00878       if (j==2) A->OptimizeStorage();
00879 
00880       flopcounter.ResetFlops();
00881       timer.ResetStartTime();
00882 
00883       if (k==0) {
00884   //10 matvecs
00885 #ifndef EPETRA_SHORT_PERFTEST
00886   for( int i = 0; i < 10; ++i )
00887     A->Multiply1(TransA, *xexact, z); // Compute z = A*xexact or z = A'*xexact using old Multiply method
00888 #endif
00889       }
00890       else {
00891   //10 matvecs
00892   for( int i = 0; i < 10; ++i )
00893     A->Multiply(TransA, *xexact, z); // Compute z = A*xexact or z = A'*xexact
00894       }
00895       
00896       double elapsed_time = timer.ElapsedTime();
00897       double total_flops = A->Flops();
00898 
00899       // Compute residual
00900       if (TransA)
00901   r.Update(-1.0, z, 1.0, *bt, 0.0); // r = bt - z
00902       else
00903   r.Update(-1.0, z, 1.0, *b, 0.0); // r = b - z
00904 
00905       r.Norm2(resvec.Values());
00906       
00907       if (verbose) cout << "ResNorm = " << resvec.NormInf() << ": ";
00908       double MFLOPs = total_flops/elapsed_time/1000000.0;
00909       if (verbose) cout << "Total MFLOPs for 10 " << oldnew << " MatVec's with " << statdyn << " Profile (Trans = " << TransA
00910       << ")  and " << contig << " optimized storage = " << MFLOPs << " (" << elapsed_time << " s)" <<endl;
00911       if (summary) {
00912   if (A->Comm().NumProc()==1) {
00913     if (TransA) cout << "TransMv" << statdyn<< "Prof" << contig << "OptStor" << '\t';
00914     else cout << "NoTransMv" << statdyn << "Prof" << contig << "OptStor" << '\t';
00915   }
00916   cout << MFLOPs << endl;
00917       }
00918     }
00919   }
00920   return;
00921 }
00922 #ifdef EPETRA_HAVE_JADMATRIX
00923 void runJadMatrixTests(Epetra_JadMatrix * A,  Epetra_MultiVector * b, Epetra_MultiVector * bt,
00924         Epetra_MultiVector * xexact, bool StaticProfile, bool verbose, bool summary) {
00925 
00926   Epetra_MultiVector z(*b);
00927   Epetra_MultiVector r(*b);
00928   Epetra_SerialDenseVector resvec(b->NumVectors());
00929 
00930   //Timings
00931   Epetra_Flops flopcounter;
00932   A->SetFlopCounter(flopcounter);
00933   Epetra_Time timer(A->Comm());
00934 
00935   for (int j=0; j<2; j++) { // j = 0 is notrans, j = 1 is trans
00936     
00937     bool TransA = (j==1);
00938     A->SetUseTranspose(TransA);
00939     flopcounter.ResetFlops();
00940     timer.ResetStartTime();
00941 
00942     //10 matvecs
00943     for( int i = 0; i < 10; ++i )
00944       A->Apply(*xexact, z); // Compute z = A*xexact or z = A'*xexact
00945     
00946     double elapsed_time = timer.ElapsedTime();
00947     double total_flops = A->Flops();
00948     
00949     // Compute residual
00950     if (TransA)
00951       r.Update(-1.0, z, 1.0, *bt, 0.0); // r = bt - z
00952     else
00953       r.Update(-1.0, z, 1.0, *b, 0.0); // r = b - z
00954     
00955     r.Norm2(resvec.Values());
00956     
00957     if (verbose) cout << "ResNorm = " << resvec.NormInf() << ": ";
00958     double MFLOPs = total_flops/elapsed_time/1000000.0;
00959     if (verbose) cout << "Total MFLOPs for 10 " << " Jagged Diagonal MatVec's with (Trans = " << TransA
00960           << ") " << MFLOPs << " (" << elapsed_time << " s)" <<endl;
00961     if (summary) {
00962       if (A->Comm().NumProc()==1) {
00963   if (TransA) cout << "TransMv" << '\t';
00964   else cout << "NoTransMv" << '\t';
00965       }
00966       cout << MFLOPs << endl;
00967     }
00968   }
00969   return;
00970 }
00971 #endif
00972 //=========================================================================================
00973 void runLUMatrixTests(Epetra_CrsMatrix * L,  Epetra_MultiVector * bL, Epetra_MultiVector * btL, Epetra_MultiVector * xexactL, 
00974           Epetra_CrsMatrix * U,  Epetra_MultiVector * bU, Epetra_MultiVector * btU, Epetra_MultiVector * xexactU, 
00975           bool StaticProfile, bool verbose, bool summary) {
00976 
00977   if (L->NoDiagonal()) {
00978     bL->Update(1.0, *xexactL, 1.0); // Add contribution of a unit diagonal to bL
00979     btL->Update(1.0, *xexactL, 1.0); // Add contribution of a unit diagonal to btL
00980   }
00981   if (U->NoDiagonal()) {
00982     bU->Update(1.0, *xexactU, 1.0); // Add contribution of a unit diagonal to bU
00983     btU->Update(1.0, *xexactU, 1.0); // Add contribution of a unit diagonal to btU
00984   }
00985 
00986   Epetra_MultiVector z(*bL);
00987   Epetra_MultiVector r(*bL);
00988   Epetra_SerialDenseVector resvec(bL->NumVectors());
00989 
00990   //Timings
00991   Epetra_Flops flopcounter;
00992   L->SetFlopCounter(flopcounter);
00993   U->SetFlopCounter(flopcounter);
00994   Epetra_Time timer(L->Comm());
00995   std::string statdyn =        "dynamic";
00996   if (StaticProfile) statdyn = "static ";
00997 
00998   for (int j=0; j<4; j++) { // j = 0/2 is notrans, j = 1/3 is trans
00999     
01000     bool TransA = (j==1 || j==3);
01001     std::string contig = "without";
01002     if (j>1) contig =    "with   ";
01003     
01004     if (j==2) {
01005       L->OptimizeStorage();
01006       U->OptimizeStorage();
01007     }
01008 
01009     flopcounter.ResetFlops();
01010     timer.ResetStartTime();
01011     
01012     //10 lower solves
01013     bool Upper = false;
01014     bool UnitDiagonal = L->NoDiagonal();  // If no diagonal, then unit must be used
01015     Epetra_MultiVector * b = TransA ? btL : bL;  // solve with the appropriate b vector
01016     for( int i = 0; i < 10; ++i )
01017       L->Solve(Upper, TransA, UnitDiagonal, *b, z); // Solve Lz = bL or L'z = bLt
01018       
01019     double elapsed_time = timer.ElapsedTime();
01020     double total_flops = L->Flops();
01021 
01022     // Compute residual
01023     r.Update(-1.0, z, 1.0, *xexactL, 0.0); // r = bt - z
01024     r.Norm2(resvec.Values());
01025 
01026     if (resvec.NormInf()>0.000001) {
01027       cout << "resvec = " << resvec << endl;
01028       cout << "z = " << z << endl;
01029       cout << "xexactL = " << *xexactL << endl;
01030       cout << "r = " << r << endl;
01031     }
01032       
01033     if (verbose) cout << "ResNorm = " << resvec.NormInf() << ": ";
01034     double MFLOPs = total_flops/elapsed_time/1000000.0;
01035     if (verbose) cout << "Total MFLOPs for 10 " << " Lower solves " << statdyn << " Profile (Trans = " << TransA
01036           << ")  and " << contig << " opt storage = " << MFLOPs << " (" << elapsed_time << " s)" <<endl;
01037     if (summary) {
01038       if (L->Comm().NumProc()==1) {
01039   if (TransA) cout << "TransLSv" << statdyn<< "Prof" << contig << "OptStor" << '\t';
01040   else cout << "NoTransLSv" << statdyn << "Prof" << contig << "OptStor" << '\t';
01041       }
01042       cout << MFLOPs << endl;
01043     }
01044     flopcounter.ResetFlops();
01045     timer.ResetStartTime();
01046     
01047     //10 upper solves
01048     Upper = true;
01049     UnitDiagonal = U->NoDiagonal();  // If no diagonal, then unit must be used
01050     b = TransA ? btU : bU;  // solve with the appropriate b vector
01051     for( int i = 0; i < 10; ++i )
01052       U->Solve(Upper, TransA, UnitDiagonal, *b, z); // Solve Lz = bL or L'z = bLt
01053       
01054     elapsed_time = timer.ElapsedTime();
01055     total_flops = U->Flops();
01056 
01057     // Compute residual
01058     r.Update(-1.0, z, 1.0, *xexactU, 0.0); // r = bt - z
01059     r.Norm2(resvec.Values());
01060 
01061     if (resvec.NormInf()>0.001) {
01062       cout << "U = " << *U << endl;
01063       //cout << "resvec = " << resvec << endl;
01064       cout << "z = " << z << endl;
01065       cout << "xexactU = " << *xexactU << endl;
01066       //cout << "r = " << r << endl;
01067       cout << "b = " << *b << endl;
01068     }
01069 
01070       
01071     if (verbose) cout << "ResNorm = " << resvec.NormInf() << ": ";
01072     MFLOPs = total_flops/elapsed_time/1000000.0;
01073     if (verbose) cout << "Total MFLOPs for 10 " << " Upper solves " << statdyn << " Profile (Trans = " << TransA
01074           << ")  and " << contig << " opt storage = " << MFLOPs <<endl;
01075     if (summary) {
01076       if (L->Comm().NumProc()==1) {
01077   if (TransA) cout << "TransUSv" << statdyn<< "Prof" << contig << "OptStor" << '\t';
01078   else cout << "NoTransUSv" << statdyn << "Prof" << contig << "OptStor" << '\t';
01079       }
01080       cout << MFLOPs << endl;
01081     }
01082   }
01083   return;
01084 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines