test/BasicPerfTest/cxx_main.cpp

Go to the documentation of this file.
00001 //@HEADER
00002 // ************************************************************************
00003 // 
00004 //               Epetra: Linear Algebra Services Package 
00005 //                 Copyright (2001) Sandia Corporation
00006 // 
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 // 
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //  
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //  
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00025 // 
00026 // ************************************************************************
00027 //@HEADER
00028 
00029 #define EPETRA_HAVE_JADMATRIX
00030 #define EPETRA_VERY_SHORT_PERFTEST
00031 #define EPETRA_HAVE_STATICPROFILE
00032 #include "Epetra_Map.h"
00033 #include "Epetra_LocalMap.h"
00034 #include "Epetra_BlockMap.h"
00035 #include "Epetra_Time.h"
00036 #include "Epetra_CrsMatrix.h"
00037 #include "Epetra_VbrMatrix.h"
00038 #include "Epetra_Vector.h"
00039 #include "Epetra_IntVector.h"
00040 #include "Epetra_MultiVector.h"
00041 #include "Epetra_IntSerialDenseVector.h"
00042 #include "Epetra_SerialDenseVector.h"
00043 #include "Epetra_Flops.h"
00044 #ifdef EPETRA_MPI
00045 #include "Epetra_MpiComm.h"
00046 #include "mpi.h"
00047 #else
00048 #include "Epetra_SerialComm.h"
00049 #endif
00050 #include "../epetra_test_err.h"
00051 #include "Epetra_Version.h"
00052 #ifdef EPETRA_HAVE_JADMATRIX
00053 #include "Epetra_JadMatrix.h"
00054 #endif
00055 
00056 // prototypes
00057 
00058 void GenerateCrsProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints, 
00059       int * xoff, int * yoff,
00060       const Epetra_Comm  &comm, bool verbose, bool summary, 
00061       Epetra_Map *& map, 
00062       Epetra_CrsMatrix *& A, 
00063       Epetra_Vector *& b, 
00064       Epetra_Vector *& bt,
00065       Epetra_Vector *&xexact, bool StaticProfile, bool MakeLocalOnly);
00066 
00067 void GenerateCrsProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints, 
00068       int * xoff, int * yoff, int nrhs,
00069       const Epetra_Comm  &comm, bool verbose, bool summary, 
00070       Epetra_Map *& map, 
00071       Epetra_CrsMatrix *& A, 
00072       Epetra_MultiVector *& b, 
00073       Epetra_MultiVector *& bt,
00074       Epetra_MultiVector *&xexact, bool StaticProfile, bool MakeLocalOnly);
00075  
00076 void GenerateVbrProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints, 
00077       int * xoff, int * yoff,
00078       int nsizes, int * sizes,
00079       const Epetra_Comm  &comm, bool verbose, bool summary, 
00080       Epetra_BlockMap *& map, 
00081       Epetra_VbrMatrix *& A, 
00082       Epetra_Vector *& b, 
00083       Epetra_Vector *& bt,
00084       Epetra_Vector *&xexact, bool StaticProfile, bool MakeLocalOnly);
00085 
00086 void GenerateVbrProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints, 
00087       int * xoff, int * yoff, 
00088       int nsizes, int * sizes, int nrhs,
00089       const Epetra_Comm  &comm, bool verbose, bool summary, 
00090       Epetra_BlockMap *& map, 
00091       Epetra_VbrMatrix *& A, 
00092       Epetra_MultiVector *& b, 
00093       Epetra_MultiVector *& bt,
00094       Epetra_MultiVector *&xexact, bool StaticProfile, bool MakeLocalOnly);
00095 
00096 void GenerateMyGlobalElements(int numNodesX, int numNodesY, int numProcsX, int numProcs,
00097             int myPID, int * & myGlobalElements);
00098 
00099 void runMatrixTests(Epetra_CrsMatrix * A,  Epetra_MultiVector * b, Epetra_MultiVector * bt,
00100         Epetra_MultiVector * xexact, bool StaticProfile, bool verbose, bool summary);
00101 #ifdef EPETRA_HAVE_JADMATRIX
00102 void runJadMatrixTests(Epetra_JadMatrix * A,  Epetra_MultiVector * b, Epetra_MultiVector * bt,
00103         Epetra_MultiVector * xexact, bool StaticProfile, bool verbose, bool summary);
00104 #endif
00105 void runLUMatrixTests(Epetra_CrsMatrix * L,  Epetra_MultiVector * bL, Epetra_MultiVector * btL, Epetra_MultiVector * xexactL, 
00106           Epetra_CrsMatrix * U,  Epetra_MultiVector * bU, Epetra_MultiVector * btU, Epetra_MultiVector * xexactU, 
00107           bool StaticProfile, bool verbose, bool summary);
00108 int main(int argc, char *argv[])
00109 {
00110   int ierr = 0;
00111   double elapsed_time;
00112   double total_flops;
00113   double MFLOPs;
00114     
00115 
00116 #ifdef EPETRA_MPI
00117 
00118   // Initialize MPI
00119   MPI_Init(&argc,&argv);
00120   Epetra_MpiComm comm( MPI_COMM_WORLD );
00121 #else
00122   Epetra_SerialComm comm;
00123 #endif
00124 
00125   bool verbose = false;
00126   bool summary = false;
00127 
00128   // Check if we should print verbose results to standard out
00129   if (argc>6) if (argv[6][0]=='-' && argv[6][1]=='v') verbose = true;
00130 
00131   // Check if we should print verbose results to standard out
00132   if (argc>6) if (argv[6][0]=='-' && argv[6][1]=='s') summary = true;
00133 
00134   if(argc < 6) {
00135     cerr << "Usage: " << argv[0]
00136          << " NumNodesX NumNodesY NumProcX NumProcY NumPoints [-v|-s]" << endl
00137          << "where:" << endl
00138          << "NumNodesX         - Number of mesh nodes in X direction per processor" << endl
00139          << "NumNodesY         - Number of mesh nodes in Y direction per processor" << endl
00140          << "NumProcX          - Number of processors to use in X direction" << endl
00141          << "NumProcY          - Number of processors to use in Y direction" << endl
00142          << "NumPoints         - Number of points to use in stencil (5, 9 or 25 only)" << endl
00143          << "-v|-s             - (Optional) Run in verbose mode if -v present or summary mode if -s present" << endl
00144          << " NOTES: NumProcX*NumProcY must equal the number of processors used to run the problem." << endl << endl
00145    << " Serial example:" << endl
00146          << argv[0] << " 16 12 1 1 25 -v" << endl
00147    << " Run this program in verbose mode on 1 processor using a 16 X 12 grid with a 25 point stencil."<< endl <<endl
00148    << " MPI example:" << endl
00149          << "mpirun -np 32 " << argv[0] << " 10 12 4 8 9 -v" << endl
00150    << " Run this program in verbose mode on 32 processors putting a 10 X 12 subgrid on each processor using 4 processors "<< endl
00151    << " in the X direction and 8 in the Y direction.  Total grid size is 40 points in X and 96 in Y with a 9 point stencil."<< endl
00152          << endl;
00153     return(1);
00154 
00155   }
00156     //char tmp;
00157     //if (comm.MyPID()==0) cout << "Press any key to continue..."<< endl;
00158     //if (comm.MyPID()==0) cin >> tmp;
00159     //comm.Barrier();
00160 
00161   comm.SetTracebackMode(0); // This should shut down any error traceback reporting
00162   if (verbose && comm.MyPID()==0)
00163     cout << Epetra_Version() << endl << endl;
00164   if (summary && comm.MyPID()==0) {
00165     if (comm.NumProc()==1)
00166       cout << Epetra_Version() << endl << endl;
00167     else
00168       cout << endl << endl; // Print two blank line to keep output columns lined up
00169   }
00170 
00171   if (verbose) cout << comm <<endl;
00172 
00173 
00174   // Redefine verbose to only print on PE 0
00175 
00176   if (verbose && comm.MyPID()!=0) verbose = false;
00177   if (summary && comm.MyPID()!=0) summary = false;
00178 
00179   int numNodesX = atoi(argv[1]);
00180   int numNodesY = atoi(argv[2]);
00181   int numProcsX = atoi(argv[3]);
00182   int numProcsY = atoi(argv[4]);
00183   int numPoints = atoi(argv[5]);
00184 
00185   if (verbose || (summary && comm.NumProc()==1)) {
00186     cout << " Number of local nodes in X direction  = " << numNodesX << endl
00187    << " Number of local nodes in Y direction  = " << numNodesY << endl
00188    << " Number of global nodes in X direction = " << numNodesX*numProcsX << endl
00189    << " Number of global nodes in Y direction = " << numNodesY*numProcsY << endl
00190    << " Number of local nonzero entries       = " << numNodesX*numNodesY*numPoints << endl
00191    << " Number of global nonzero entries      = " << numNodesX*numNodesY*numPoints*numProcsX*numProcsY << endl
00192    << " Number of Processors in X direction   = " << numProcsX << endl
00193    << " Number of Processors in Y direction   = " << numProcsY << endl
00194    << " Number of Points in stencil           = " << numPoints << endl << endl;
00195   }
00196   // Print blank line to keep output columns lined up
00197   if (summary && comm.NumProc()>1)
00198     cout << endl << endl << endl << endl << endl << endl << endl << endl<< endl << endl;
00199 
00200   if (numProcsX*numProcsY!=comm.NumProc()) {
00201     cerr << "Number of processors = " << comm.NumProc() << endl
00202    << " is not the product of " << numProcsX << " and " << numProcsY << endl << endl;
00203     return(1);
00204   }
00205 
00206   if (numPoints!=5 && numPoints!=9 && numPoints!=25) {
00207     cerr << "Number of points specified = " << numPoints << endl
00208    << " is not 5, 9, 25" << endl << endl;
00209     return(1);
00210   }
00211 
00212   if (numNodesX*numNodesY<=0) {
00213     cerr << "Product of number of nodes is <= zero" << endl << endl;
00214     return(1);
00215   }
00216 
00217   Epetra_IntSerialDenseVector Xoff, XLoff, XUoff;
00218   Epetra_IntSerialDenseVector Yoff, YLoff, YUoff;
00219   if (numPoints==5) {
00220 
00221      // Generate a 5-point 2D Finite Difference matrix
00222     Xoff.Size(5);
00223     Yoff.Size(5);
00224     Xoff[0] = -1; Xoff[1] = 1; Xoff[2] = 0; Xoff[3] = 0;  Xoff[4] = 0; 
00225     Yoff[0] = 0;  Yoff[1] = 0; Yoff[2] = 0; Yoff[3] = -1; Yoff[4] = 1; 
00226 
00227      // Generate a 2-point 2D Lower triangular Finite Difference matrix
00228     XLoff.Size(2);
00229     YLoff.Size(2);
00230     XLoff[0] = -1; XLoff[1] =  0; 
00231     YLoff[0] =  0; YLoff[1] = -1;
00232 
00233      // Generate a 3-point 2D upper triangular Finite Difference matrix
00234     XUoff.Size(3);
00235     YUoff.Size(3);
00236     XUoff[0] =  0; XUoff[1] =  1; XUoff[2] = 0; 
00237     YUoff[0] =  0; YUoff[1] =  0; YUoff[2] = 1;
00238   }
00239   else if (numPoints==9) {
00240     // Generate a 9-point 2D Finite Difference matrix
00241     Xoff.Size(9);
00242     Yoff.Size(9);
00243     Xoff[0] = -1;  Xoff[1] =  0; Xoff[2] =  1; 
00244     Yoff[0] = -1;  Yoff[1] = -1; Yoff[2] = -1; 
00245     Xoff[3] = -1;  Xoff[4] =  0; Xoff[5] =  1; 
00246     Yoff[3] =  0;  Yoff[4] =  0; Yoff[5] =  0; 
00247     Xoff[6] = -1;  Xoff[7] =  0; Xoff[8] =  1; 
00248     Yoff[6] =  1;  Yoff[7] =  1; Yoff[8] =  1; 
00249 
00250     // Generate a 5-point lower triangular 2D Finite Difference matrix
00251     XLoff.Size(5);
00252     YLoff.Size(5);
00253     XLoff[0] = -1;  XLoff[1] =  0; Xoff[2] =  1; 
00254     YLoff[0] = -1;  YLoff[1] = -1; Yoff[2] = -1; 
00255     XLoff[3] = -1;  XLoff[4] =  0; 
00256     YLoff[3] =  0;  YLoff[4] =  0;
00257 
00258     // Generate a 4-point upper triangular 2D Finite Difference matrix
00259     XUoff.Size(4);
00260     YUoff.Size(4);
00261     XUoff[0] =  1; 
00262     YUoff[0] =  0; 
00263     XUoff[1] = -1;  XUoff[2] =  0; XUoff[3] =  1; 
00264     YUoff[1] =  1;  YUoff[2] =  1; YUoff[3] =  1; 
00265 
00266   }
00267   else {
00268     // Generate a 25-point 2D Finite Difference matrix
00269     Xoff.Size(25);
00270     Yoff.Size(25);
00271     int xi = 0, yi = 0;
00272     int xo = -2, yo = -2;
00273     Xoff[xi++] = xo++;  Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++;
00274     Yoff[yi++] = yo  ;  Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; 
00275     xo = -2, yo++;
00276     Xoff[xi++] = xo++;  Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++;
00277     Yoff[yi++] = yo  ;  Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; 
00278     xo = -2, yo++;
00279     Xoff[xi++] = xo++;  Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++;
00280     Yoff[yi++] = yo  ;  Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; 
00281     xo = -2, yo++;
00282     Xoff[xi++] = xo++;  Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++;
00283     Yoff[yi++] = yo  ;  Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; 
00284     xo = -2, yo++;
00285     Xoff[xi++] = xo++;  Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++;
00286     Yoff[yi++] = yo  ;  Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; 
00287 
00288     // Generate a 13-point lower triangular 2D Finite Difference matrix
00289     XLoff.Size(13);
00290     YLoff.Size(13);
00291     xi = 0, yi = 0;
00292     xo = -2, yo = -2;
00293     XLoff[xi++] = xo++;  XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++;
00294     YLoff[yi++] = yo  ;  YLoff[yi++] = yo  ; YLoff[yi++] = yo  ; YLoff[yi++] = yo  ; YLoff[yi++] = yo  ; 
00295     xo = -2, yo++;
00296     XLoff[xi++] = xo++;  XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++;
00297     YLoff[yi++] = yo  ;  YLoff[yi++] = yo  ; YLoff[yi++] = yo  ; YLoff[yi++] = yo  ; YLoff[yi++] = yo  ; 
00298     xo = -2, yo++;
00299     XLoff[xi++] = xo++;  XLoff[xi++] = xo++; XLoff[xi++] = xo++;
00300     YLoff[yi++] = yo  ;  YLoff[yi++] = yo  ; YLoff[yi++] = yo  ;
00301 
00302     // Generate a 13-point upper triangular 2D Finite Difference matrix
00303     XUoff.Size(13);
00304     YUoff.Size(13);
00305     xi = 0, yi = 0;
00306     xo = 0, yo = 0;
00307     XUoff[xi++] = xo++;  XUoff[xi++] = xo++; XUoff[xi++] = xo++;
00308     YUoff[yi++] = yo  ;  YUoff[yi++] = yo  ; YUoff[yi++] = yo  ; 
00309     xo = -2, yo++;
00310     XUoff[xi++] = xo++;  XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++;
00311     YUoff[yi++] = yo  ;  YUoff[yi++] = yo  ; YUoff[yi++] = yo  ; YUoff[yi++] = yo  ; YUoff[yi++] = yo  ; 
00312     xo = -2, yo++;
00313     XUoff[xi++] = xo++;  XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++;
00314     YUoff[yi++] = yo  ;  YUoff[yi++] = yo  ; YUoff[yi++] = yo  ; YUoff[yi++] = yo  ; YUoff[yi++] = yo  ; 
00315 
00316   }
00317 
00318   Epetra_Map * map;
00319   Epetra_Map * mapL;
00320   Epetra_Map * mapU;
00321   Epetra_CrsMatrix * A;
00322   Epetra_CrsMatrix * L;
00323   Epetra_CrsMatrix * U;
00324   Epetra_MultiVector * b;
00325   Epetra_MultiVector * bt;
00326   Epetra_MultiVector * xexact;
00327   Epetra_MultiVector * bL;
00328   Epetra_MultiVector * btL;
00329   Epetra_MultiVector * xexactL;
00330   Epetra_MultiVector * bU;
00331   Epetra_MultiVector * btU;
00332   Epetra_MultiVector * xexactU;
00333   Epetra_SerialDenseVector resvec(0);
00334 
00335   //Timings
00336   Epetra_Flops flopcounter;
00337   Epetra_Time timer(comm);
00338 
00339 #ifdef EPETRA_VERY_SHORT_PERFTEST
00340   int jstop = 1;
00341 #elif EPETRA_SHORT_PERFTEST
00342   int jstop = 1;
00343 #else
00344   int jstop = 2;
00345 #endif
00346   for (int j=0; j<jstop; j++) {
00347     for (int k=1; k<17; k++) {
00348 #ifdef EPETRA_VERY_SHORT_PERFTEST
00349       if (k<3 || (k%4==0 && k<9)) {
00350 #elif EPETRA_SHORT_PERFTEST
00351       if (k<6 || k%4==0) {
00352 #else
00353       if (k<7 || k%2==0) {
00354 #endif
00355       int nrhs=k;
00356       if (verbose) cout << "\n*************** Results for " << nrhs << " RHS with ";
00357 
00358       bool StaticProfile = (j==0);
00359       if (verbose) 
00360   if (StaticProfile) cout << " static profile\n";
00361   else cout << " dynamic profile\n";
00362       
00363       GenerateCrsProblem(numNodesX, numNodesY, numProcsX, numProcsY, numPoints,
00364        Xoff.Values(), Yoff.Values(), nrhs, comm, verbose, summary,
00365        map, A, b, bt, xexact, StaticProfile, false);
00366 
00367       
00368 #ifdef EPETRA_HAVE_JADMATRIX
00369       
00370       timer.ResetStartTime();
00371       Epetra_JadMatrix JA(*A);
00372       elapsed_time = timer.ElapsedTime();
00373       if (verbose) cout << "Time to create Jagged diagonal matrix = " << elapsed_time << endl;
00374 
00375       //cout << "A = " << *A << endl;
00376       //cout << "JA = " << JA << endl;
00377 
00378       runJadMatrixTests(&JA, b, bt, xexact, StaticProfile, verbose, summary);
00379 
00380 #endif
00381       runMatrixTests(A, b, bt, xexact, StaticProfile, verbose, summary);
00382 
00383       delete A;
00384       delete b;
00385       delete bt; 
00386       delete xexact;
00387 
00388       GenerateCrsProblem(numNodesX, numNodesY, numProcsX, numProcsY, XLoff.Length(),
00389        XLoff.Values(), YLoff.Values(), nrhs, comm, verbose, summary,
00390        mapL, L, bL, btL, xexactL, StaticProfile, true);
00391       
00392 
00393       GenerateCrsProblem(numNodesX, numNodesY, numProcsX, numProcsY, XUoff.Length(),
00394        XUoff.Values(), YUoff.Values(), nrhs, comm, verbose, summary,
00395        mapU, U, bU, btU, xexactU, StaticProfile, true);
00396       
00397 
00398       runLUMatrixTests(L, bL, btL, xexactL, U, bU, btU, xexactU, StaticProfile, verbose, summary);
00399 
00400       delete L;
00401       delete bL;
00402       delete btL; 
00403       delete xexactL;
00404       delete mapL;
00405 
00406       delete U;
00407       delete bU;
00408       delete btU; 
00409       delete xexactU;
00410       delete mapU;
00411 
00412       Epetra_MultiVector q(*map, nrhs);
00413       Epetra_MultiVector z(q);
00414       Epetra_MultiVector r(q);
00415       
00416       delete map;
00417       q.SetFlopCounter(flopcounter);
00418       z.SetFlopCounter(q);
00419       r.SetFlopCounter(q);
00420 
00421       resvec.Resize(nrhs);
00422       
00423     
00424       flopcounter.ResetFlops();
00425       timer.ResetStartTime();
00426 
00427       //10 norms
00428       for( int i = 0; i < 10; ++i )
00429   q.Norm2( resvec.Values() );
00430 
00431       elapsed_time = timer.ElapsedTime();
00432       total_flops = q.Flops();
00433       MFLOPs = total_flops/elapsed_time/1000000.0;
00434       if (verbose) cout << "\nTotal MFLOPs for 10 Norm2's= " << MFLOPs << endl;
00435       
00436       if (summary) {
00437   if (comm.NumProc()==1) cout << "Norm2" << '\t';
00438   cout << MFLOPs << endl;
00439       }
00440       
00441       flopcounter.ResetFlops();
00442       timer.ResetStartTime();
00443       
00444       //10 dot's
00445       for( int i = 0; i < 10; ++i )
00446   q.Dot(z, resvec.Values());
00447       
00448       elapsed_time = timer.ElapsedTime();
00449       total_flops = q.Flops();
00450       MFLOPs = total_flops/elapsed_time/1000000.0;
00451       if (verbose) cout << "Total MFLOPs for 10 Dot's  = " << MFLOPs << endl;
00452       
00453       if (summary) {
00454   if (comm.NumProc()==1) cout << "DotProd" << '\t';
00455   cout << MFLOPs << endl;
00456       }
00457       
00458       flopcounter.ResetFlops();
00459       timer.ResetStartTime();
00460       
00461       //10 dot's
00462       for( int i = 0; i < 10; ++i )
00463   q.Update(1.0, z, 1.0, r, 0.0);
00464       
00465       elapsed_time = timer.ElapsedTime();
00466       total_flops = q.Flops();
00467       MFLOPs = total_flops/elapsed_time/1000000.0;
00468       if (verbose) cout << "Total MFLOPs for 10 Updates= " << MFLOPs << endl;
00469       
00470       if (summary) {
00471   if (comm.NumProc()==1) cout << "Update" << '\t';
00472   cout << MFLOPs << endl;
00473       }
00474     }
00475     }
00476   }
00477 #ifdef EPETRA_MPI
00478   MPI_Finalize() ;
00479 #endif
00480 
00481 return ierr ;
00482 }
00483 
00484 // Constructs a 2D PDE finite difference matrix using the list of x and y offsets.
00485 // 
00486 // nx      (In) - number of grid points in x direction
00487 // ny      (In) - number of grid points in y direction
00488 //   The total number of equations will be nx*ny ordered such that the x direction changes
00489 //   most rapidly: 
00490 //      First equation is at point (0,0)
00491 //      Second at                  (1,0)
00492 //       ...
00493 //      nx equation at             (nx-1,0)
00494 //      nx+1st equation at         (0,1)
00495 
00496 // numPoints (In) - number of points in finite difference stencil
00497 // xoff    (In) - stencil offsets in x direction (of length numPoints)
00498 // yoff    (In) - stencil offsets in y direction (of length numPoints)
00499 //   A standard 5-point finite difference stencil would be described as:
00500 //     numPoints = 5
00501 //     xoff = [-1, 1, 0,  0, 0]
00502 //     yoff = [ 0, 0, 0, -1, 1]
00503 
00504 // nrhs - Number of rhs to generate. (First interface produces vectors, so nrhs is not needed
00505 
00506 // comm    (In) - an Epetra_Comm object describing the parallel machine (numProcs and my proc ID)
00507 // map    (Out) - Epetra_Map describing distribution of matrix and vectors/multivectors
00508 // A      (Out) - Epetra_CrsMatrix constructed for nx by ny grid using prescribed stencil
00509 //                Off-diagonal values are random between 0 and 1.  If diagonal is part of stencil,
00510 //                diagonal will be slightly diag dominant.
00511 // b      (Out) - Generated RHS.  Values satisfy b = A*xexact
00512 // bt     (Out) - Generated RHS.  Values satisfy b = A'*xexact
00513 // xexact (Out) - Generated exact solution to Ax = b and b' = A'xexact
00514 
00515 // Note: Caller of this function is responsible for deleting all output objects.
00516 
00517 void GenerateCrsProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints, 
00518       int * xoff, int * yoff,
00519       const Epetra_Comm  &comm, bool verbose, bool summary, 
00520       Epetra_Map *& map, 
00521       Epetra_CrsMatrix *& A, 
00522       Epetra_Vector *& b, 
00523       Epetra_Vector *& bt,
00524       Epetra_Vector *&xexact, bool StaticProfile, bool MakeLocalOnly) {
00525 
00526   Epetra_MultiVector * b1, * bt1, * xexact1;
00527   
00528   GenerateCrsProblem(numNodesX, numNodesY, numProcsX, numProcsY, numPoints, 
00529          xoff, yoff, 1, comm, verbose, summary, 
00530          map, A, b1, bt1, xexact1, StaticProfile, MakeLocalOnly);
00531 
00532   b = dynamic_cast<Epetra_Vector *>(b1);
00533   bt = dynamic_cast<Epetra_Vector *>(bt1);
00534   xexact = dynamic_cast<Epetra_Vector *>(xexact1);
00535 
00536   return;
00537 }
00538 
00539 void GenerateCrsProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints, 
00540       int * xoff, int * yoff, int nrhs,
00541       const Epetra_Comm  &comm, bool verbose, bool summary,
00542       Epetra_Map *& map, 
00543       Epetra_CrsMatrix *& A, 
00544       Epetra_MultiVector *& b, 
00545       Epetra_MultiVector *& bt,
00546       Epetra_MultiVector *&xexact, bool StaticProfile, bool MakeLocalOnly) {
00547   
00548   Epetra_Time timer(comm);
00549   // Determine my global IDs
00550   int * myGlobalElements;
00551   GenerateMyGlobalElements(numNodesX, numNodesY, numProcsX, numProcsY, comm.MyPID(), myGlobalElements);
00552 
00553   int numMyEquations = numNodesX*numNodesY;
00554   
00555   map = new Epetra_Map(-1, numMyEquations, myGlobalElements, 0, comm); // Create map with 2D block partitioning.
00556   delete [] myGlobalElements;
00557 
00558   int numGlobalEquations = map->NumGlobalElements();
00559 
00560   int profile = 0; if (StaticProfile) profile = numPoints;
00561 
00562 #ifdef EPETRA_HAVE_STATICPROFILE
00563 
00564   if (MakeLocalOnly) 
00565     A = new Epetra_CrsMatrix(Copy, *map, *map, profile, StaticProfile); // Construct matrix with rowmap=colmap
00566   else
00567     A = new Epetra_CrsMatrix(Copy, *map, profile, StaticProfile); // Construct matrix
00568 
00569 #else
00570 
00571   if (MakeLocalOnly) 
00572     A = new Epetra_CrsMatrix(Copy, *map, *map, profile); // Construct matrix with rowmap=colmap
00573   else
00574     A = new Epetra_CrsMatrix(Copy, *map, profile); // Construct matrix
00575 
00576 #endif
00577 
00578   int * indices = new int[numPoints];
00579   double * values = new double[numPoints];
00580 
00581   double dnumPoints = (double) numPoints;
00582   int nx = numNodesX*numProcsX;
00583 
00584   for (int i=0; i<numMyEquations; i++) {
00585 
00586     int rowID = map->GID(i);
00587     int numIndices = 0;
00588 
00589     for (int j=0; j<numPoints; j++) {
00590       int colID = rowID + xoff[j] + nx*yoff[j]; // Compute column ID based on stencil offsets
00591       if (colID>-1 && colID<numGlobalEquations) {
00592   indices[numIndices] = colID;
00593   double value = - ((double) rand())/ ((double) RAND_MAX);
00594   if (colID==rowID)
00595     values[numIndices++] = dnumPoints - value; // Make diagonal dominant
00596   else
00597     values[numIndices++] = value;
00598       }
00599     }
00600     //cout << "Building row " << rowID << endl;
00601     A->InsertGlobalValues(rowID, numIndices, values, indices);
00602   }
00603 
00604   delete [] indices;
00605   delete [] values;
00606   double insertTime = timer.ElapsedTime();
00607   timer.ResetStartTime();
00608   A->FillComplete(false);
00609   double fillCompleteTime = timer.ElapsedTime();
00610 
00611   if (verbose)
00612     cout << "Time to insert matrix values = " << insertTime << endl
00613    << "Time to complete fill        = " << fillCompleteTime << endl;
00614   if (summary) {
00615     if (comm.NumProc()==1) cout << "InsertTime" << '\t';
00616     cout << insertTime << endl;
00617     if (comm.NumProc()==1) cout << "FillCompleteTime" << '\t';
00618     cout << fillCompleteTime << endl;
00619   }
00620 
00621   if (nrhs<=1) {  
00622     b = new Epetra_Vector(*map);
00623     bt = new Epetra_Vector(*map);
00624     xexact = new Epetra_Vector(*map);
00625   }
00626   else {
00627     b = new Epetra_MultiVector(*map, nrhs);
00628     bt = new Epetra_MultiVector(*map, nrhs);
00629     xexact = new Epetra_MultiVector(*map, nrhs);
00630   }
00631 
00632   xexact->Random(); // Fill xexact with random values
00633 
00634   A->Multiply(false, *xexact, *b);
00635   A->Multiply(true, *xexact, *bt);
00636 
00637   return;
00638 }
00639 
00640 
00641 // Constructs a 2D PDE finite difference matrix using the list of x and y offsets.
00642 // 
00643 // nx      (In) - number of grid points in x direction
00644 // ny      (In) - number of grid points in y direction
00645 //   The total number of equations will be nx*ny ordered such that the x direction changes
00646 //   most rapidly: 
00647 //      First equation is at point (0,0)
00648 //      Second at                  (1,0)
00649 //       ...
00650 //      nx equation at             (nx-1,0)
00651 //      nx+1st equation at         (0,1)
00652 
00653 // numPoints (In) - number of points in finite difference stencil
00654 // xoff    (In) - stencil offsets in x direction (of length numPoints)
00655 // yoff    (In) - stencil offsets in y direction (of length numPoints)
00656 //   A standard 5-point finite difference stencil would be described as:
00657 //     numPoints = 5
00658 //     xoff = [-1, 1, 0,  0, 0]
00659 //     yoff = [ 0, 0, 0, -1, 1]
00660 
00661 // nsizes  (In) - Length of element size list used to create variable block map and matrix
00662 // sizes   (In) - integer list of element sizes of length nsizes
00663 //    The map associated with this VbrMatrix will be created by cycling through the sizes list.
00664 //    For example, if nsize = 3 and sizes = [ 2, 4, 3], the block map will have elementsizes
00665 //    of 2, 4, 3, 2, 4, 3, ...
00666 
00667 // nrhs - Number of rhs to generate. (First interface produces vectors, so nrhs is not needed
00668 
00669 // comm    (In) - an Epetra_Comm object describing the parallel machine (numProcs and my proc ID)
00670 // map    (Out) - Epetra_Map describing distribution of matrix and vectors/multivectors
00671 // A      (Out) - Epetra_VbrMatrix constructed for nx by ny grid using prescribed stencil
00672 //                Off-diagonal values are random between 0 and 1.  If diagonal is part of stencil,
00673 //                diagonal will be slightly diag dominant.
00674 // b      (Out) - Generated RHS.  Values satisfy b = A*xexact
00675 // bt     (Out) - Generated RHS.  Values satisfy b = A'*xexact
00676 // xexact (Out) - Generated exact solution to Ax = b and b' = A'xexact
00677 
00678 // Note: Caller of this function is responsible for deleting all output objects.
00679 
00680 void GenerateVbrProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints, 
00681       int * xoff, int * yoff,
00682       int nsizes, int * sizes,
00683       const Epetra_Comm  &comm, bool verbose, bool summary, 
00684       Epetra_BlockMap *& map, 
00685       Epetra_VbrMatrix *& A, 
00686       Epetra_Vector *& b, 
00687       Epetra_Vector *& bt,
00688       Epetra_Vector *&xexact, bool StaticProfile, bool MakeLocalOnly) {
00689   
00690   Epetra_MultiVector * b1, * bt1, * xexact1;
00691   
00692   GenerateVbrProblem(numNodesX, numNodesY, numProcsX, numProcsY, numPoints,
00693          xoff, yoff, nsizes, sizes,
00694          1, comm, verbose, summary, map, A, b1, bt1, xexact1, StaticProfile, MakeLocalOnly);
00695 
00696   b = dynamic_cast<Epetra_Vector *>(b1);
00697   bt = dynamic_cast<Epetra_Vector *>(bt1);
00698   xexact = dynamic_cast<Epetra_Vector *>(xexact1);
00699 
00700   return;
00701 }
00702 
00703 void GenerateVbrProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints, 
00704       int * xoff, int * yoff, 
00705       int nsizes, int * sizes, int nrhs,
00706       const Epetra_Comm  &comm, bool verbose, bool summary, 
00707       Epetra_BlockMap *& map, 
00708       Epetra_VbrMatrix *& A, 
00709       Epetra_MultiVector *& b, 
00710       Epetra_MultiVector *& bt,
00711       Epetra_MultiVector *&xexact, bool StaticProfile, bool MakeLocalOnly) {
00712 
00713   int i, j;
00714 
00715   // Determine my global IDs
00716   int * myGlobalElements;
00717   GenerateMyGlobalElements(numNodesX, numNodesY, numProcsX, numProcsY, comm.MyPID(), myGlobalElements);
00718 
00719   int numMyElements = numNodesX*numNodesY;
00720   
00721   Epetra_Map ptMap(-1, numMyElements, myGlobalElements, 0, comm); // Create map with 2D block partitioning.
00722   delete [] myGlobalElements;
00723 
00724   int numGlobalEquations = ptMap.NumGlobalElements();
00725 
00726   Epetra_IntVector elementSizes(ptMap); // This vector will have the list of element sizes
00727   for (i=0; i<numMyElements; i++) 
00728     elementSizes[i] = sizes[ptMap.GID(i)%nsizes]; // cycle through sizes array
00729 
00730   map = new Epetra_BlockMap(-1, numMyElements, ptMap.MyGlobalElements(), elementSizes.Values(),
00731           ptMap.IndexBase(), ptMap.Comm());
00732 
00733   int profile = 0; if (StaticProfile) profile = numPoints;
00734   
00735   if (MakeLocalOnly) 
00736     A = new Epetra_VbrMatrix(Copy, *map, *map, profile); // Construct matrix rowmap=colmap
00737   else
00738     A = new Epetra_VbrMatrix(Copy, *map, profile); // Construct matrix
00739 
00740   int * indices = new int[numPoints];
00741 
00742   // This section of code creates a vector of random values that will be used to create
00743   // light-weight dense matrices to pass into the VbrMatrix construction process.
00744 
00745   int maxElementSize = 0;
00746   for (i=0; i< nsizes; i++) maxElementSize = EPETRA_MAX(maxElementSize, sizes[i]);
00747 
00748   Epetra_LocalMap lmap(maxElementSize*maxElementSize, ptMap.IndexBase(), ptMap.Comm());
00749   Epetra_Vector randvec(lmap);
00750   randvec.Random();
00751   randvec.Scale(-1.0); // Make value negative
00752   int nx = numNodesX*numProcsX;
00753 
00754 
00755   for (i=0; i<numMyElements; i++) {
00756     int rowID = map->GID(i);
00757     int numIndices = 0;
00758     int rowDim = sizes[rowID%nsizes];
00759     for (j=0; j<numPoints; j++) {
00760       int colID = rowID + xoff[j] + nx*yoff[j]; // Compute column ID based on stencil offsets
00761       if (colID>-1 && colID<numGlobalEquations)
00762   indices[numIndices++] = colID;
00763     }
00764       
00765     A->BeginInsertGlobalValues(rowID, numIndices, indices);
00766     
00767     for (j=0; j < numIndices; j++) {
00768       int colDim = sizes[indices[j]%nsizes];
00769       A->SubmitBlockEntry(&(randvec[0]), rowDim, rowDim, colDim);
00770     }
00771     A->EndSubmitEntries();
00772   }
00773 
00774   delete [] indices;
00775 
00776   A->FillComplete();
00777 
00778   // Compute the InvRowSums of the matrix rows
00779   Epetra_Vector invRowSums(A->RowMap());
00780   Epetra_Vector rowSums(A->RowMap());
00781   A->InvRowSums(invRowSums);
00782   rowSums.Reciprocal(invRowSums);
00783 
00784   // Jam the row sum values into the diagonal of the Vbr matrix (to make it diag dominant)
00785   int numBlockDiagonalEntries;
00786   int * rowColDims;
00787   int * diagoffsets = map->FirstPointInElementList();
00788   A->BeginExtractBlockDiagonalView(numBlockDiagonalEntries, rowColDims);
00789   for (i=0; i< numBlockDiagonalEntries; i++) {
00790     double * diagVals;
00791     int diagLDA;
00792     A->ExtractBlockDiagonalEntryView(diagVals, diagLDA);
00793     int rowDim = map->ElementSize(i);
00794     for (j=0; j<rowDim; j++) diagVals[j+j*diagLDA] = rowSums[diagoffsets[i]+j];
00795   }
00796 
00797   if (nrhs<=1) {  
00798     b = new Epetra_Vector(*map);
00799     bt = new Epetra_Vector(*map);
00800     xexact = new Epetra_Vector(*map);
00801   }
00802   else {
00803     b = new Epetra_MultiVector(*map, nrhs);
00804     bt = new Epetra_MultiVector(*map, nrhs);
00805     xexact = new Epetra_MultiVector(*map, nrhs);
00806   }
00807 
00808   xexact->Random(); // Fill xexact with random values
00809 
00810   A->Multiply(false, *xexact, *b);
00811   A->Multiply(true, *xexact, *bt);
00812 
00813   return;
00814 }
00815 
00816 void GenerateMyGlobalElements(int numNodesX, int numNodesY, int numProcsX, int numProcs,
00817             int myPID, int * & myGlobalElements) {
00818 
00819   myGlobalElements = new int[numNodesX*numNodesY];
00820   int myProcX = myPID%numProcsX;
00821   int myProcY = myPID/numProcsX;
00822   int curGID = myProcY*(numProcsX*numNodesX)*numNodesY+myProcX*numNodesX;
00823   for (int j=0; j<numNodesY; j++) {
00824     for (int i=0; i<numNodesX; i++) {
00825       myGlobalElements[j*numNodesX+i] = curGID+i;
00826     }
00827     curGID+=numNodesX*numProcsX;
00828   }
00829   //for (int i=0; i<numNodesX*numNodesY; i++) cout << "MYPID " << myPID <<" GID "<< myGlobalElements[i] << endl;
00830   
00831   return;
00832 }
00833 
00834 void runMatrixTests(Epetra_CrsMatrix * A,  Epetra_MultiVector * b, Epetra_MultiVector * bt,
00835         Epetra_MultiVector * xexact, bool StaticProfile, bool verbose, bool summary) {
00836 
00837   Epetra_MultiVector z(*b);
00838   Epetra_MultiVector r(*b);
00839   Epetra_SerialDenseVector resvec(b->NumVectors());
00840 
00841   //Timings
00842   Epetra_Flops flopcounter;
00843   A->SetFlopCounter(flopcounter);
00844   Epetra_Time timer(A->Comm());
00845   std::string statdyn =        "dynamic";
00846   if (StaticProfile) statdyn = "static ";
00847 
00848   for (int j=0; j<4; j++) { // j = 0/2 is notrans, j = 1/3 is trans
00849     
00850     bool TransA = (j==1 || j==3);
00851     std::string contig = "without";
00852     if (j>1) contig =    "with   ";
00853     
00854 #ifdef EPETRA_SHORT_PERFTEST
00855     int kstart = 1;
00856 #else
00857     int kstart = 0;
00858 #endif
00859     for (int k=kstart; k<2; k++) { // Loop over old multiply vs. new multiply
00860       
00861       std::string oldnew = "old";
00862       if (k>0) oldnew =    "new";
00863 
00864       if (j==2) A->OptimizeStorage();
00865 
00866       flopcounter.ResetFlops();
00867       timer.ResetStartTime();
00868 
00869       if (k==0) {
00870   //10 matvecs
00871 #ifndef EPETRA_SHORT_PERFTEST
00872   for( int i = 0; i < 10; ++i )
00873     A->Multiply1(TransA, *xexact, z); // Compute z = A*xexact or z = A'*xexact using old Multiply method
00874 #endif
00875       }
00876       else {
00877   //10 matvecs
00878   for( int i = 0; i < 10; ++i )
00879     A->Multiply(TransA, *xexact, z); // Compute z = A*xexact or z = A'*xexact
00880       }
00881       
00882       double elapsed_time = timer.ElapsedTime();
00883       double total_flops = A->Flops();
00884 
00885       // Compute residual
00886       if (TransA)
00887   r.Update(-1.0, z, 1.0, *bt, 0.0); // r = bt - z
00888       else
00889   r.Update(-1.0, z, 1.0, *b, 0.0); // r = b - z
00890 
00891       r.Norm2(resvec.Values());
00892       
00893       if (verbose) cout << "ResNorm = " << resvec.NormInf() << ": ";
00894       double MFLOPs = total_flops/elapsed_time/1000000.0;
00895       if (verbose) cout << "Total MFLOPs for 10 " << oldnew << " MatVec's with " << statdyn << " Profile (Trans = " << TransA
00896       << ")  and " << contig << " optimized storage = " << MFLOPs << " (" << elapsed_time << " s)" <<endl;
00897       if (summary) {
00898   if (A->Comm().NumProc()==1) {
00899     if (TransA) cout << "TransMv" << statdyn<< "Prof" << contig << "OptStor" << '\t';
00900     else cout << "NoTransMv" << statdyn << "Prof" << contig << "OptStor" << '\t';
00901   }
00902   cout << MFLOPs << endl;
00903       }
00904     }
00905   }
00906   return;
00907 }
00908 #ifdef EPETRA_HAVE_JADMATRIX
00909 void runJadMatrixTests(Epetra_JadMatrix * A,  Epetra_MultiVector * b, Epetra_MultiVector * bt,
00910         Epetra_MultiVector * xexact, bool StaticProfile, bool verbose, bool summary) {
00911 
00912   Epetra_MultiVector z(*b);
00913   Epetra_MultiVector r(*b);
00914   Epetra_SerialDenseVector resvec(b->NumVectors());
00915 
00916   //Timings
00917   Epetra_Flops flopcounter;
00918   A->SetFlopCounter(flopcounter);
00919   Epetra_Time timer(A->Comm());
00920 
00921   for (int j=0; j<2; j++) { // j = 0 is notrans, j = 1 is trans
00922     
00923     bool TransA = (j==1);
00924     A->SetUseTranspose(TransA);
00925     flopcounter.ResetFlops();
00926     timer.ResetStartTime();
00927 
00928     //10 matvecs
00929     for( int i = 0; i < 10; ++i )
00930       A->Apply(*xexact, z); // Compute z = A*xexact or z = A'*xexact
00931     
00932     double elapsed_time = timer.ElapsedTime();
00933     double total_flops = A->Flops();
00934     
00935     // Compute residual
00936     if (TransA)
00937       r.Update(-1.0, z, 1.0, *bt, 0.0); // r = bt - z
00938     else
00939       r.Update(-1.0, z, 1.0, *b, 0.0); // r = b - z
00940     
00941     r.Norm2(resvec.Values());
00942     
00943     if (verbose) cout << "ResNorm = " << resvec.NormInf() << ": ";
00944     double MFLOPs = total_flops/elapsed_time/1000000.0;
00945     if (verbose) cout << "Total MFLOPs for 10 " << " Jagged Diagonal MatVec's with (Trans = " << TransA
00946           << ") " << MFLOPs << " (" << elapsed_time << " s)" <<endl;
00947     if (summary) {
00948       if (A->Comm().NumProc()==1) {
00949   if (TransA) cout << "TransMv" << '\t';
00950   else cout << "NoTransMv" << '\t';
00951       }
00952       cout << MFLOPs << endl;
00953     }
00954   }
00955   return;
00956 }
00957 #endif
00958 //=========================================================================================
00959 void runLUMatrixTests(Epetra_CrsMatrix * L,  Epetra_MultiVector * bL, Epetra_MultiVector * btL, Epetra_MultiVector * xexactL, 
00960           Epetra_CrsMatrix * U,  Epetra_MultiVector * bU, Epetra_MultiVector * btU, Epetra_MultiVector * xexactU, 
00961           bool StaticProfile, bool verbose, bool summary) {
00962 
00963   if (L->NoDiagonal()) {
00964     bL->Update(1.0, *xexactL, 1.0); // Add contribution of a unit diagonal to bL
00965     btL->Update(1.0, *xexactL, 1.0); // Add contribution of a unit diagonal to btL
00966   }
00967   if (U->NoDiagonal()) {
00968     bU->Update(1.0, *xexactU, 1.0); // Add contribution of a unit diagonal to bU
00969     btU->Update(1.0, *xexactU, 1.0); // Add contribution of a unit diagonal to btU
00970   }
00971 
00972   Epetra_MultiVector z(*bL);
00973   Epetra_MultiVector r(*bL);
00974   Epetra_SerialDenseVector resvec(bL->NumVectors());
00975 
00976   //Timings
00977   Epetra_Flops flopcounter;
00978   L->SetFlopCounter(flopcounter);
00979   U->SetFlopCounter(flopcounter);
00980   Epetra_Time timer(L->Comm());
00981   std::string statdyn =        "dynamic";
00982   if (StaticProfile) statdyn = "static ";
00983 
00984   for (int j=0; j<4; j++) { // j = 0/2 is notrans, j = 1/3 is trans
00985     
00986     bool TransA = (j==1 || j==3);
00987     std::string contig = "without";
00988     if (j>1) contig =    "with   ";
00989     
00990     if (j==2) {
00991       L->OptimizeStorage();
00992       U->OptimizeStorage();
00993     }
00994 
00995     flopcounter.ResetFlops();
00996     timer.ResetStartTime();
00997     
00998     //10 lower solves
00999     bool Upper = false;
01000     bool UnitDiagonal = L->NoDiagonal();  // If no diagonal, then unit must be used
01001     Epetra_MultiVector * b = TransA ? btL : bL;  // solve with the appropriate b vector
01002     for( int i = 0; i < 10; ++i )
01003       L->Solve(Upper, TransA, UnitDiagonal, *b, z); // Solve Lz = bL or L'z = bLt
01004       
01005     double elapsed_time = timer.ElapsedTime();
01006     double total_flops = L->Flops();
01007 
01008     // Compute residual
01009     r.Update(-1.0, z, 1.0, *xexactL, 0.0); // r = bt - z
01010     r.Norm2(resvec.Values());
01011 
01012     if (resvec.NormInf()>0.000001) {
01013       cout << "resvec = " << resvec << endl;
01014       cout << "z = " << z << endl;
01015       cout << "xexactL = " << *xexactL << endl;
01016       cout << "r = " << r << endl;
01017     }
01018       
01019     if (verbose) cout << "ResNorm = " << resvec.NormInf() << ": ";
01020     double MFLOPs = total_flops/elapsed_time/1000000.0;
01021     if (verbose) cout << "Total MFLOPs for 10 " << " Lower solves " << statdyn << " Profile (Trans = " << TransA
01022           << ")  and " << contig << " opt storage = " << MFLOPs << " (" << elapsed_time << " s)" <<endl;
01023     if (summary) {
01024       if (L->Comm().NumProc()==1) {
01025   if (TransA) cout << "TransLSv" << statdyn<< "Prof" << contig << "OptStor" << '\t';
01026   else cout << "NoTransLSv" << statdyn << "Prof" << contig << "OptStor" << '\t';
01027       }
01028       cout << MFLOPs << endl;
01029     }
01030     flopcounter.ResetFlops();
01031     timer.ResetStartTime();
01032     
01033     //10 upper solves
01034     Upper = true;
01035     UnitDiagonal = U->NoDiagonal();  // If no diagonal, then unit must be used
01036     b = TransA ? btU : bU;  // solve with the appropriate b vector
01037     for( int i = 0; i < 10; ++i )
01038       U->Solve(Upper, TransA, UnitDiagonal, *b, z); // Solve Lz = bL or L'z = bLt
01039       
01040     elapsed_time = timer.ElapsedTime();
01041     total_flops = U->Flops();
01042 
01043     // Compute residual
01044     r.Update(-1.0, z, 1.0, *xexactU, 0.0); // r = bt - z
01045     r.Norm2(resvec.Values());
01046 
01047     if (resvec.NormInf()>0.001) {
01048       cout << "U = " << *U << endl;
01049       //cout << "resvec = " << resvec << endl;
01050       cout << "z = " << z << endl;
01051       cout << "xexactU = " << *xexactU << endl;
01052       //cout << "r = " << r << endl;
01053       cout << "b = " << *b << endl;
01054     }
01055 
01056       
01057     if (verbose) cout << "ResNorm = " << resvec.NormInf() << ": ";
01058     MFLOPs = total_flops/elapsed_time/1000000.0;
01059     if (verbose) cout << "Total MFLOPs for 10 " << " Upper solves " << statdyn << " Profile (Trans = " << TransA
01060           << ")  and " << contig << " opt storage = " << MFLOPs <<endl;
01061     if (summary) {
01062       if (L->Comm().NumProc()==1) {
01063   if (TransA) cout << "TransUSv" << statdyn<< "Prof" << contig << "OptStor" << '\t';
01064   else cout << "NoTransUSv" << statdyn << "Prof" << contig << "OptStor" << '\t';
01065       }
01066       cout << MFLOPs << endl;
01067     }
01068   }
01069   return;
01070 }

Generated on Wed May 12 21:41:04 2010 for Epetra Package Browser (Single Doxygen Collection) by  doxygen 1.4.7