test/BasicPerfTest/cxx_main.cpp

Go to the documentation of this file.
00001 //@HEADER
00002 // ************************************************************************
00003 // 
00004 //               Epetra: Linear Algebra Services Package 
00005 //                 Copyright (2001) Sandia Corporation
00006 // 
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 // 
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //  
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //  
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00025 // 
00026 // ************************************************************************
00027 //@HEADER
00028 
00029 #define EPETRA_HAVE_JADMATRIX
00030 #define EPETRA_SHORT_PERFTEST
00031 #define EPETRA_HAVE_STATICPROFILE
00032 #include "Epetra_Map.h"
00033 #include "Epetra_LocalMap.h"
00034 #include "Epetra_BlockMap.h"
00035 #include "Epetra_Time.h"
00036 #include "Epetra_CrsMatrix.h"
00037 #include "Epetra_VbrMatrix.h"
00038 #include "Epetra_Vector.h"
00039 #include "Epetra_IntVector.h"
00040 #include "Epetra_MultiVector.h"
00041 #include "Epetra_IntSerialDenseVector.h"
00042 #include "Epetra_SerialDenseVector.h"
00043 #include "Epetra_Flops.h"
00044 #ifdef EPETRA_MPI
00045 #include "Epetra_MpiComm.h"
00046 #include "mpi.h"
00047 #else
00048 #include "Epetra_SerialComm.h"
00049 #endif
00050 #include "../epetra_test_err.h"
00051 #include "Epetra_Version.h"
00052 #ifdef EPETRA_HAVE_JADMATRIX
00053 #include "Epetra_JadMatrix.h"
00054 #endif
00055 
00056 // prototypes
00057 
00058 void GenerateCrsProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints, 
00059       int * xoff, int * yoff,
00060       const Epetra_Comm  &comm, bool verbose, bool summary, 
00061       Epetra_Map *& map, 
00062       Epetra_CrsMatrix *& A, 
00063       Epetra_Vector *& b, 
00064       Epetra_Vector *& bt,
00065       Epetra_Vector *&xexact, bool StaticProfile, bool MakeLocalOnly);
00066 
00067 void GenerateCrsProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints, 
00068       int * xoff, int * yoff, int nrhs,
00069       const Epetra_Comm  &comm, bool verbose, bool summary, 
00070       Epetra_Map *& map, 
00071       Epetra_CrsMatrix *& A, 
00072       Epetra_MultiVector *& b, 
00073       Epetra_MultiVector *& bt,
00074       Epetra_MultiVector *&xexact, bool StaticProfile, bool MakeLocalOnly);
00075  
00076 void GenerateVbrProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints, 
00077       int * xoff, int * yoff,
00078       int nsizes, int * sizes,
00079       const Epetra_Comm  &comm, bool verbose, bool summary, 
00080       Epetra_BlockMap *& map, 
00081       Epetra_VbrMatrix *& A, 
00082       Epetra_Vector *& b, 
00083       Epetra_Vector *& bt,
00084       Epetra_Vector *&xexact, bool StaticProfile, bool MakeLocalOnly);
00085 
00086 void GenerateVbrProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints, 
00087       int * xoff, int * yoff, 
00088       int nsizes, int * sizes, int nrhs,
00089       const Epetra_Comm  &comm, bool verbose, bool summary, 
00090       Epetra_BlockMap *& map, 
00091       Epetra_VbrMatrix *& A, 
00092       Epetra_MultiVector *& b, 
00093       Epetra_MultiVector *& bt,
00094       Epetra_MultiVector *&xexact, bool StaticProfile, bool MakeLocalOnly);
00095 
00096 void GenerateMyGlobalElements(int numNodesX, int numNodesY, int numProcsX, int numProcs,
00097             int myPID, int * & myGlobalElements);
00098 
00099 void runMatrixTests(Epetra_CrsMatrix * A,  Epetra_MultiVector * b, Epetra_MultiVector * bt,
00100         Epetra_MultiVector * xexact, bool StaticProfile, bool verbose, bool summary);
00101 #ifdef EPETRA_HAVE_JADMATRIX
00102 void runJadMatrixTests(Epetra_JadMatrix * A,  Epetra_MultiVector * b, Epetra_MultiVector * bt,
00103         Epetra_MultiVector * xexact, bool StaticProfile, bool verbose, bool summary);
00104 #endif
00105 void runLUMatrixTests(Epetra_CrsMatrix * L,  Epetra_MultiVector * bL, Epetra_MultiVector * btL, Epetra_MultiVector * xexactL, 
00106           Epetra_CrsMatrix * U,  Epetra_MultiVector * bU, Epetra_MultiVector * btU, Epetra_MultiVector * xexactU, 
00107           bool StaticProfile, bool verbose, bool summary);
00108 int main(int argc, char *argv[])
00109 {
00110   int ierr = 0;
00111   double elapsed_time;
00112   double total_flops;
00113   double MFLOPs;
00114     
00115 
00116 #ifdef EPETRA_MPI
00117 
00118   // Initialize MPI
00119   MPI_Init(&argc,&argv);
00120   Epetra_MpiComm comm( MPI_COMM_WORLD );
00121 #else
00122   Epetra_SerialComm comm;
00123 #endif
00124 
00125   bool verbose = false;
00126   bool summary = false;
00127 
00128   // Check if we should print verbose results to standard out
00129   if (argc>6) if (argv[6][0]=='-' && argv[6][1]=='v') verbose = true;
00130 
00131   // Check if we should print verbose results to standard out
00132   if (argc>6) if (argv[6][0]=='-' && argv[6][1]=='s') summary = true;
00133 
00134   if(argc < 6) {
00135     cerr << "Usage: " << argv[0]
00136          << " NumNodesX NumNodesY NumProcX NumProcY NumPoints [-v|-s]" << endl
00137          << "where:" << endl
00138          << "NumNodesX         - Number of mesh nodes in X direction per processor" << endl
00139          << "NumNodesY         - Number of mesh nodes in Y direction per processor" << endl
00140          << "NumProcX          - Number of processors to use in X direction" << endl
00141          << "NumProcY          - Number of processors to use in Y direction" << endl
00142          << "NumPoints         - Number of points to use in stencil (5, 9 or 25 only)" << endl
00143          << "-v|-s             - (Optional) Run in verbose mode if -v present or summary mode if -s present" << endl
00144          << " NOTES: NumProcX*NumProcY must equal the number of processors used to run the problem." << endl << endl
00145    << " Serial example:" << endl
00146          << argv[0] << " 16 12 1 1 25 -v" << endl
00147    << " Run this program in verbose mode on 1 processor using a 16 X 12 grid with a 25 point stencil."<< endl <<endl
00148    << " MPI example:" << endl
00149          << "mpirun -np 32 " << argv[0] << " 10 12 4 8 9 -v" << endl
00150    << " Run this program in verbose mode on 32 processors putting a 10 X 12 subgrid on each processor using 4 processors "<< endl
00151    << " in the X direction and 8 in the Y direction.  Total grid size is 40 points in X and 96 in Y with a 9 point stencil."<< endl
00152          << endl;
00153     return(1);
00154 
00155   }
00156     //char tmp;
00157     //if (comm.MyPID()==0) cout << "Press any key to continue..."<< endl;
00158     //if (comm.MyPID()==0) cin >> tmp;
00159     //comm.Barrier();
00160 
00161   comm.SetTracebackMode(0); // This should shut down any error traceback reporting
00162   if (verbose && comm.MyPID()==0)
00163     cout << Epetra_Version() << endl << endl;
00164   if (summary && comm.MyPID()==0) {
00165     if (comm.NumProc()==1)
00166       cout << Epetra_Version() << endl << endl;
00167     else
00168       cout << endl << endl; // Print two blank line to keep output columns lined up
00169   }
00170 
00171   if (verbose) cout << comm <<endl;
00172 
00173 
00174   // Redefine verbose to only print on PE 0
00175 
00176   if (verbose && comm.MyPID()!=0) verbose = false;
00177   if (summary && comm.MyPID()!=0) summary = false;
00178 
00179   int numNodesX = atoi(argv[1]);
00180   int numNodesY = atoi(argv[2]);
00181   int numProcsX = atoi(argv[3]);
00182   int numProcsY = atoi(argv[4]);
00183   int numPoints = atoi(argv[5]);
00184 
00185   if (verbose || (summary && comm.NumProc()==1)) {
00186     cout << " Number of local nodes in X direction  = " << numNodesX << endl
00187    << " Number of local nodes in Y direction  = " << numNodesY << endl
00188    << " Number of global nodes in X direction = " << numNodesX*numProcsX << endl
00189    << " Number of global nodes in Y direction = " << numNodesY*numProcsY << endl
00190    << " Number of local nonzero entries       = " << numNodesX*numNodesY*numPoints << endl
00191    << " Number of global nonzero entries      = " << numNodesX*numNodesY*numPoints*numProcsX*numProcsY << endl
00192    << " Number of Processors in X direction   = " << numProcsX << endl
00193    << " Number of Processors in Y direction   = " << numProcsY << endl
00194    << " Number of Points in stencil           = " << numPoints << endl << endl;
00195   }
00196   // Print blank line to keep output columns lined up
00197   if (summary && comm.NumProc()>1)
00198     cout << endl << endl << endl << endl << endl << endl << endl << endl<< endl << endl;
00199 
00200   if (numProcsX*numProcsY!=comm.NumProc()) {
00201     cerr << "Number of processors = " << comm.NumProc() << endl
00202    << " is not the product of " << numProcsX << " and " << numProcsY << endl << endl;
00203     return(1);
00204   }
00205 
00206   if (numPoints!=5 && numPoints!=9 && numPoints!=25) {
00207     cerr << "Number of points specified = " << numPoints << endl
00208    << " is not 5, 9, 25" << endl << endl;
00209     return(1);
00210   }
00211 
00212   if (numNodesX*numNodesY<=0) {
00213     cerr << "Product of number of nodes is <= zero" << endl << endl;
00214     return(1);
00215   }
00216 
00217   Epetra_IntSerialDenseVector Xoff, XLoff, XUoff;
00218   Epetra_IntSerialDenseVector Yoff, YLoff, YUoff;
00219   if (numPoints==5) {
00220 
00221      // Generate a 5-point 2D Finite Difference matrix
00222     Xoff.Size(5);
00223     Yoff.Size(5);
00224     Xoff[0] = -1; Xoff[1] = 1; Xoff[2] = 0; Xoff[3] = 0;  Xoff[4] = 0; 
00225     Yoff[0] = 0;  Yoff[1] = 0; Yoff[2] = 0; Yoff[3] = -1; Yoff[4] = 1; 
00226 
00227      // Generate a 2-point 2D Lower triangular Finite Difference matrix
00228     XLoff.Size(2);
00229     YLoff.Size(2);
00230     XLoff[0] = -1; XLoff[1] =  0; 
00231     YLoff[0] =  0; YLoff[1] = -1;
00232 
00233      // Generate a 3-point 2D upper triangular Finite Difference matrix
00234     XUoff.Size(3);
00235     YUoff.Size(3);
00236     XUoff[0] =  0; XUoff[1] =  1; XUoff[2] = 0; 
00237     YUoff[0] =  0; YUoff[1] =  0; YUoff[2] = 1;
00238   }
00239   else if (numPoints==9) {
00240     // Generate a 9-point 2D Finite Difference matrix
00241     Xoff.Size(9);
00242     Yoff.Size(9);
00243     Xoff[0] = -1;  Xoff[1] =  0; Xoff[2] =  1; 
00244     Yoff[0] = -1;  Yoff[1] = -1; Yoff[2] = -1; 
00245     Xoff[3] = -1;  Xoff[4] =  0; Xoff[5] =  1; 
00246     Yoff[3] =  0;  Yoff[4] =  0; Yoff[5] =  0; 
00247     Xoff[6] = -1;  Xoff[7] =  0; Xoff[8] =  1; 
00248     Yoff[6] =  1;  Yoff[7] =  1; Yoff[8] =  1; 
00249 
00250     // Generate a 5-point lower triangular 2D Finite Difference matrix
00251     XLoff.Size(5);
00252     YLoff.Size(5);
00253     XLoff[0] = -1;  XLoff[1] =  0; Xoff[2] =  1; 
00254     YLoff[0] = -1;  YLoff[1] = -1; Yoff[2] = -1; 
00255     XLoff[3] = -1;  XLoff[4] =  0; 
00256     YLoff[3] =  0;  YLoff[4] =  0;
00257 
00258     // Generate a 4-point upper triangular 2D Finite Difference matrix
00259     XUoff.Size(4);
00260     YUoff.Size(4);
00261     XUoff[0] =  1; 
00262     YUoff[0] =  0; 
00263     XUoff[1] = -1;  XUoff[2] =  0; XUoff[3] =  1; 
00264     YUoff[1] =  1;  YUoff[2] =  1; YUoff[3] =  1; 
00265 
00266   }
00267   else {
00268     // Generate a 25-point 2D Finite Difference matrix
00269     Xoff.Size(25);
00270     Yoff.Size(25);
00271     int xi = 0, yi = 0;
00272     int xo = -2, yo = -2;
00273     Xoff[xi++] = xo++;  Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++;
00274     Yoff[yi++] = yo  ;  Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; 
00275     xo = -2, yo++;
00276     Xoff[xi++] = xo++;  Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++;
00277     Yoff[yi++] = yo  ;  Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; 
00278     xo = -2, yo++;
00279     Xoff[xi++] = xo++;  Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++;
00280     Yoff[yi++] = yo  ;  Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; 
00281     xo = -2, yo++;
00282     Xoff[xi++] = xo++;  Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++;
00283     Yoff[yi++] = yo  ;  Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; 
00284     xo = -2, yo++;
00285     Xoff[xi++] = xo++;  Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++;
00286     Yoff[yi++] = yo  ;  Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; Yoff[yi++] = yo  ; 
00287 
00288     // Generate a 13-point lower triangular 2D Finite Difference matrix
00289     XLoff.Size(13);
00290     YLoff.Size(13);
00291     xi = 0, yi = 0;
00292     xo = -2, yo = -2;
00293     XLoff[xi++] = xo++;  XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++;
00294     YLoff[yi++] = yo  ;  YLoff[yi++] = yo  ; YLoff[yi++] = yo  ; YLoff[yi++] = yo  ; YLoff[yi++] = yo  ; 
00295     xo = -2, yo++;
00296     XLoff[xi++] = xo++;  XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++;
00297     YLoff[yi++] = yo  ;  YLoff[yi++] = yo  ; YLoff[yi++] = yo  ; YLoff[yi++] = yo  ; YLoff[yi++] = yo  ; 
00298     xo = -2, yo++;
00299     XLoff[xi++] = xo++;  XLoff[xi++] = xo++; XLoff[xi++] = xo++;
00300     YLoff[yi++] = yo  ;  YLoff[yi++] = yo  ; YLoff[yi++] = yo  ;
00301 
00302     // Generate a 13-point upper triangular 2D Finite Difference matrix
00303     XUoff.Size(13);
00304     YUoff.Size(13);
00305     xi = 0, yi = 0;
00306     xo = 0, yo = 0;
00307     XUoff[xi++] = xo++;  XUoff[xi++] = xo++; XUoff[xi++] = xo++;
00308     YUoff[yi++] = yo  ;  YUoff[yi++] = yo  ; YUoff[yi++] = yo  ; 
00309     xo = -2, yo++;
00310     XUoff[xi++] = xo++;  XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++;
00311     YUoff[yi++] = yo  ;  YUoff[yi++] = yo  ; YUoff[yi++] = yo  ; YUoff[yi++] = yo  ; YUoff[yi++] = yo  ; 
00312     xo = -2, yo++;
00313     XUoff[xi++] = xo++;  XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++;
00314     YUoff[yi++] = yo  ;  YUoff[yi++] = yo  ; YUoff[yi++] = yo  ; YUoff[yi++] = yo  ; YUoff[yi++] = yo  ; 
00315 
00316   }
00317 
00318   Epetra_Map * map;
00319   Epetra_Map * mapL;
00320   Epetra_Map * mapU;
00321   Epetra_CrsMatrix * A;
00322   Epetra_CrsMatrix * L;
00323   Epetra_CrsMatrix * U;
00324   Epetra_MultiVector * b;
00325   Epetra_MultiVector * bt;
00326   Epetra_MultiVector * xexact;
00327   Epetra_MultiVector * bL;
00328   Epetra_MultiVector * btL;
00329   Epetra_MultiVector * xexactL;
00330   Epetra_MultiVector * bU;
00331   Epetra_MultiVector * btU;
00332   Epetra_MultiVector * xexactU;
00333   Epetra_SerialDenseVector resvec(0);
00334 
00335   //Timings
00336   Epetra_Flops flopcounter;
00337   Epetra_Time timer(comm);
00338 
00339 #ifdef EPETRA_SHORT_PERFTEST
00340   int jstop = 1;
00341 #else
00342   int jstop = 2;
00343 #endif
00344   for (int j=0; j<jstop; j++) {
00345     for (int k=1; k<17; k++) {
00346 #ifdef EPETRA_SHORT_PERFTEST
00347       if (k<6 || k%4==0) {
00348 #else
00349       if (k<7 || k%2==0) {
00350 #endif
00351       int nrhs=k;
00352       if (verbose) cout << "\n*************** Results for " << nrhs << " RHS with ";
00353 
00354       bool StaticProfile = (j==0);
00355       if (verbose) 
00356   if (StaticProfile) cout << " static profile\n";
00357   else cout << " dynamic profile\n";
00358       
00359       GenerateCrsProblem(numNodesX, numNodesY, numProcsX, numProcsY, numPoints,
00360        Xoff.Values(), Yoff.Values(), nrhs, comm, verbose, summary,
00361        map, A, b, bt, xexact, StaticProfile, false);
00362 
00363       
00364 #ifdef EPETRA_HAVE_JADMATRIX
00365       
00366       timer.ResetStartTime();
00367       Epetra_JadMatrix JA(*A);
00368       elapsed_time = timer.ElapsedTime();
00369       if (verbose) cout << "Time to create Jagged diagonal matrix = " << elapsed_time << endl;
00370 
00371       //cout << "A = " << *A << endl;
00372       //cout << "JA = " << JA << endl;
00373 
00374       runJadMatrixTests(&JA, b, bt, xexact, StaticProfile, verbose, summary);
00375 
00376 #endif
00377       runMatrixTests(A, b, bt, xexact, StaticProfile, verbose, summary);
00378 
00379       delete A;
00380       delete b;
00381       delete bt; 
00382       delete xexact;
00383 
00384       GenerateCrsProblem(numNodesX, numNodesY, numProcsX, numProcsY, XLoff.Length(),
00385        XLoff.Values(), YLoff.Values(), nrhs, comm, verbose, summary,
00386        mapL, L, bL, btL, xexactL, StaticProfile, true);
00387       
00388 
00389       GenerateCrsProblem(numNodesX, numNodesY, numProcsX, numProcsY, XUoff.Length(),
00390        XUoff.Values(), YUoff.Values(), nrhs, comm, verbose, summary,
00391        mapU, U, bU, btU, xexactU, StaticProfile, true);
00392       
00393 
00394       runLUMatrixTests(L, bL, btL, xexactL, U, bU, btU, xexactU, StaticProfile, verbose, summary);
00395 
00396       delete L;
00397       delete bL;
00398       delete btL; 
00399       delete xexactL;
00400       delete mapL;
00401 
00402       delete U;
00403       delete bU;
00404       delete btU; 
00405       delete xexactU;
00406       delete mapU;
00407 
00408       Epetra_MultiVector q(*map, nrhs);
00409       Epetra_MultiVector z(q);
00410       Epetra_MultiVector r(q);
00411       
00412       delete map;
00413       q.SetFlopCounter(flopcounter);
00414       z.SetFlopCounter(q);
00415       r.SetFlopCounter(q);
00416 
00417       resvec.Resize(nrhs);
00418       
00419     
00420       flopcounter.ResetFlops();
00421       timer.ResetStartTime();
00422 
00423       //10 norms
00424       for( int i = 0; i < 10; ++i )
00425   q.Norm2( resvec.Values() );
00426 
00427       elapsed_time = timer.ElapsedTime();
00428       total_flops = q.Flops();
00429       MFLOPs = total_flops/elapsed_time/1000000.0;
00430       if (verbose) cout << "\nTotal MFLOPs for 10 Norm2's= " << MFLOPs << endl;
00431       
00432       if (summary) {
00433   if (comm.NumProc()==1) cout << "Norm2" << '\t';
00434   cout << MFLOPs << endl;
00435       }
00436       
00437       flopcounter.ResetFlops();
00438       timer.ResetStartTime();
00439       
00440       //10 dot's
00441       for( int i = 0; i < 10; ++i )
00442   q.Dot(z, resvec.Values());
00443       
00444       elapsed_time = timer.ElapsedTime();
00445       total_flops = q.Flops();
00446       MFLOPs = total_flops/elapsed_time/1000000.0;
00447       if (verbose) cout << "Total MFLOPs for 10 Dot's  = " << MFLOPs << endl;
00448       
00449       if (summary) {
00450   if (comm.NumProc()==1) cout << "DotProd" << '\t';
00451   cout << MFLOPs << endl;
00452       }
00453       
00454       flopcounter.ResetFlops();
00455       timer.ResetStartTime();
00456       
00457       //10 dot's
00458       for( int i = 0; i < 10; ++i )
00459   q.Update(1.0, z, 1.0, r, 0.0);
00460       
00461       elapsed_time = timer.ElapsedTime();
00462       total_flops = q.Flops();
00463       MFLOPs = total_flops/elapsed_time/1000000.0;
00464       if (verbose) cout << "Total MFLOPs for 10 Updates= " << MFLOPs << endl;
00465       
00466       if (summary) {
00467   if (comm.NumProc()==1) cout << "Update" << '\t';
00468   cout << MFLOPs << endl;
00469       }
00470     }
00471     }
00472   }
00473 #ifdef EPETRA_MPI
00474   MPI_Finalize() ;
00475 #endif
00476 
00477 return ierr ;
00478 }
00479 
00480 // Constructs a 2D PDE finite difference matrix using the list of x and y offsets.
00481 // 
00482 // nx      (In) - number of grid points in x direction
00483 // ny      (In) - number of grid points in y direction
00484 //   The total number of equations will be nx*ny ordered such that the x direction changes
00485 //   most rapidly: 
00486 //      First equation is at point (0,0)
00487 //      Second at                  (1,0)
00488 //       ...
00489 //      nx equation at             (nx-1,0)
00490 //      nx+1st equation at         (0,1)
00491 
00492 // numPoints (In) - number of points in finite difference stencil
00493 // xoff    (In) - stencil offsets in x direction (of length numPoints)
00494 // yoff    (In) - stencil offsets in y direction (of length numPoints)
00495 //   A standard 5-point finite difference stencil would be described as:
00496 //     numPoints = 5
00497 //     xoff = [-1, 1, 0,  0, 0]
00498 //     yoff = [ 0, 0, 0, -1, 1]
00499 
00500 // nrhs - Number of rhs to generate. (First interface produces vectors, so nrhs is not needed
00501 
00502 // comm    (In) - an Epetra_Comm object describing the parallel machine (numProcs and my proc ID)
00503 // map    (Out) - Epetra_Map describing distribution of matrix and vectors/multivectors
00504 // A      (Out) - Epetra_CrsMatrix constructed for nx by ny grid using prescribed stencil
00505 //                Off-diagonal values are random between 0 and 1.  If diagonal is part of stencil,
00506 //                diagonal will be slightly diag dominant.
00507 // b      (Out) - Generated RHS.  Values satisfy b = A*xexact
00508 // bt     (Out) - Generated RHS.  Values satisfy b = A'*xexact
00509 // xexact (Out) - Generated exact solution to Ax = b and b' = A'xexact
00510 
00511 // Note: Caller of this function is responsible for deleting all output objects.
00512 
00513 void GenerateCrsProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints, 
00514       int * xoff, int * yoff,
00515       const Epetra_Comm  &comm, bool verbose, bool summary, 
00516       Epetra_Map *& map, 
00517       Epetra_CrsMatrix *& A, 
00518       Epetra_Vector *& b, 
00519       Epetra_Vector *& bt,
00520       Epetra_Vector *&xexact, bool StaticProfile, bool MakeLocalOnly) {
00521 
00522   Epetra_MultiVector * b1, * bt1, * xexact1;
00523   
00524   GenerateCrsProblem(numNodesX, numNodesY, numProcsX, numProcsY, numPoints, 
00525          xoff, yoff, 1, comm, verbose, summary, 
00526          map, A, b1, bt1, xexact1, StaticProfile, MakeLocalOnly);
00527 
00528   b = dynamic_cast<Epetra_Vector *>(b1);
00529   bt = dynamic_cast<Epetra_Vector *>(bt1);
00530   xexact = dynamic_cast<Epetra_Vector *>(xexact1);
00531 
00532   return;
00533 }
00534 
00535 void GenerateCrsProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints, 
00536       int * xoff, int * yoff, int nrhs,
00537       const Epetra_Comm  &comm, bool verbose, bool summary,
00538       Epetra_Map *& map, 
00539       Epetra_CrsMatrix *& A, 
00540       Epetra_MultiVector *& b, 
00541       Epetra_MultiVector *& bt,
00542       Epetra_MultiVector *&xexact, bool StaticProfile, bool MakeLocalOnly) {
00543   
00544   Epetra_Time timer(comm);
00545   // Determine my global IDs
00546   int * myGlobalElements;
00547   GenerateMyGlobalElements(numNodesX, numNodesY, numProcsX, numProcsY, comm.MyPID(), myGlobalElements);
00548 
00549   int numMyEquations = numNodesX*numNodesY;
00550   
00551   map = new Epetra_Map(-1, numMyEquations, myGlobalElements, 0, comm); // Create map with 2D block partitioning.
00552   delete [] myGlobalElements;
00553 
00554   int numGlobalEquations = map->NumGlobalElements();
00555 
00556   int profile = 0; if (StaticProfile) profile = numPoints;
00557 
00558 #ifdef EPETRA_HAVE_STATICPROFILE
00559 
00560   if (MakeLocalOnly) 
00561     A = new Epetra_CrsMatrix(Copy, *map, *map, profile, StaticProfile); // Construct matrix with rowmap=colmap
00562   else
00563     A = new Epetra_CrsMatrix(Copy, *map, profile, StaticProfile); // Construct matrix
00564 
00565 #else
00566 
00567   if (MakeLocalOnly) 
00568     A = new Epetra_CrsMatrix(Copy, *map, *map, profile); // Construct matrix with rowmap=colmap
00569   else
00570     A = new Epetra_CrsMatrix(Copy, *map, profile); // Construct matrix
00571 
00572 #endif
00573 
00574   int * indices = new int[numPoints];
00575   double * values = new double[numPoints];
00576 
00577   double dnumPoints = (double) numPoints;
00578   int nx = numNodesX*numProcsX;
00579 
00580   for (int i=0; i<numMyEquations; i++) {
00581 
00582     int rowID = map->GID(i);
00583     int numIndices = 0;
00584 
00585     for (int j=0; j<numPoints; j++) {
00586       int colID = rowID + xoff[j] + nx*yoff[j]; // Compute column ID based on stencil offsets
00587       if (colID>-1 && colID<numGlobalEquations) {
00588   indices[numIndices] = colID;
00589   double value = - ((double) rand())/ ((double) RAND_MAX);
00590   if (colID==rowID)
00591     values[numIndices++] = dnumPoints - value; // Make diagonal dominant
00592   else
00593     values[numIndices++] = value;
00594       }
00595     }
00596     //cout << "Building row " << rowID << endl;
00597     A->InsertGlobalValues(rowID, numIndices, values, indices);
00598   }
00599 
00600   delete [] indices;
00601   delete [] values;
00602   double insertTime = timer.ElapsedTime();
00603   timer.ResetStartTime();
00604   A->FillComplete();
00605   double fillCompleteTime = timer.ElapsedTime();
00606 
00607   if (verbose)
00608     cout << "Time to insert matrix values = " << insertTime << endl
00609    << "Time to complete fill        = " << fillCompleteTime << endl;
00610   if (summary) {
00611     if (comm.NumProc()==1) cout << "InsertTime" << '\t';
00612     cout << insertTime << endl;
00613     if (comm.NumProc()==1) cout << "FillCompleteTime" << '\t';
00614     cout << fillCompleteTime << endl;
00615   }
00616 
00617   if (nrhs<=1) {  
00618     b = new Epetra_Vector(*map);
00619     bt = new Epetra_Vector(*map);
00620     xexact = new Epetra_Vector(*map);
00621   }
00622   else {
00623     b = new Epetra_MultiVector(*map, nrhs);
00624     bt = new Epetra_MultiVector(*map, nrhs);
00625     xexact = new Epetra_MultiVector(*map, nrhs);
00626   }
00627 
00628   xexact->Random(); // Fill xexact with random values
00629 
00630   A->Multiply(false, *xexact, *b);
00631   A->Multiply(true, *xexact, *bt);
00632 
00633   return;
00634 }
00635 
00636 
00637 // Constructs a 2D PDE finite difference matrix using the list of x and y offsets.
00638 // 
00639 // nx      (In) - number of grid points in x direction
00640 // ny      (In) - number of grid points in y direction
00641 //   The total number of equations will be nx*ny ordered such that the x direction changes
00642 //   most rapidly: 
00643 //      First equation is at point (0,0)
00644 //      Second at                  (1,0)
00645 //       ...
00646 //      nx equation at             (nx-1,0)
00647 //      nx+1st equation at         (0,1)
00648 
00649 // numPoints (In) - number of points in finite difference stencil
00650 // xoff    (In) - stencil offsets in x direction (of length numPoints)
00651 // yoff    (In) - stencil offsets in y direction (of length numPoints)
00652 //   A standard 5-point finite difference stencil would be described as:
00653 //     numPoints = 5
00654 //     xoff = [-1, 1, 0,  0, 0]
00655 //     yoff = [ 0, 0, 0, -1, 1]
00656 
00657 // nsizes  (In) - Length of element size list used to create variable block map and matrix
00658 // sizes   (In) - integer list of element sizes of length nsizes
00659 //    The map associated with this VbrMatrix will be created by cycling through the sizes list.
00660 //    For example, if nsize = 3 and sizes = [ 2, 4, 3], the block map will have elementsizes
00661 //    of 2, 4, 3, 2, 4, 3, ...
00662 
00663 // nrhs - Number of rhs to generate. (First interface produces vectors, so nrhs is not needed
00664 
00665 // comm    (In) - an Epetra_Comm object describing the parallel machine (numProcs and my proc ID)
00666 // map    (Out) - Epetra_Map describing distribution of matrix and vectors/multivectors
00667 // A      (Out) - Epetra_VbrMatrix constructed for nx by ny grid using prescribed stencil
00668 //                Off-diagonal values are random between 0 and 1.  If diagonal is part of stencil,
00669 //                diagonal will be slightly diag dominant.
00670 // b      (Out) - Generated RHS.  Values satisfy b = A*xexact
00671 // bt     (Out) - Generated RHS.  Values satisfy b = A'*xexact
00672 // xexact (Out) - Generated exact solution to Ax = b and b' = A'xexact
00673 
00674 // Note: Caller of this function is responsible for deleting all output objects.
00675 
00676 void GenerateVbrProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints, 
00677       int * xoff, int * yoff,
00678       int nsizes, int * sizes,
00679       const Epetra_Comm  &comm, bool verbose, bool summary, 
00680       Epetra_BlockMap *& map, 
00681       Epetra_VbrMatrix *& A, 
00682       Epetra_Vector *& b, 
00683       Epetra_Vector *& bt,
00684       Epetra_Vector *&xexact, bool StaticProfile, bool MakeLocalOnly) {
00685   
00686   Epetra_MultiVector * b1, * bt1, * xexact1;
00687   
00688   GenerateVbrProblem(numNodesX, numNodesY, numProcsX, numProcsY, numPoints,
00689          xoff, yoff, nsizes, sizes,
00690          1, comm, verbose, summary, map, A, b1, bt1, xexact1, StaticProfile, MakeLocalOnly);
00691 
00692   b = dynamic_cast<Epetra_Vector *>(b1);
00693   bt = dynamic_cast<Epetra_Vector *>(bt1);
00694   xexact = dynamic_cast<Epetra_Vector *>(xexact1);
00695 
00696   return;
00697 }
00698 
00699 void GenerateVbrProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints, 
00700       int * xoff, int * yoff, 
00701       int nsizes, int * sizes, int nrhs,
00702       const Epetra_Comm  &comm, bool verbose, bool summary, 
00703       Epetra_BlockMap *& map, 
00704       Epetra_VbrMatrix *& A, 
00705       Epetra_MultiVector *& b, 
00706       Epetra_MultiVector *& bt,
00707       Epetra_MultiVector *&xexact, bool StaticProfile, bool MakeLocalOnly) {
00708 
00709   int i, j;
00710 
00711   // Determine my global IDs
00712   int * myGlobalElements;
00713   GenerateMyGlobalElements(numNodesX, numNodesY, numProcsX, numProcsY, comm.MyPID(), myGlobalElements);
00714 
00715   int numMyElements = numNodesX*numNodesY;
00716   
00717   Epetra_Map ptMap(-1, numMyElements, myGlobalElements, 0, comm); // Create map with 2D block partitioning.
00718   delete [] myGlobalElements;
00719 
00720   int numGlobalEquations = ptMap.NumGlobalElements();
00721 
00722   Epetra_IntVector elementSizes(ptMap); // This vector will have the list of element sizes
00723   for (i=0; i<numMyElements; i++) 
00724     elementSizes[i] = sizes[ptMap.GID(i)%nsizes]; // cycle through sizes array
00725 
00726   map = new Epetra_BlockMap(-1, numMyElements, ptMap.MyGlobalElements(), elementSizes.Values(),
00727           ptMap.IndexBase(), ptMap.Comm());
00728 
00729   int profile = 0; if (StaticProfile) profile = numPoints;
00730   
00731   if (MakeLocalOnly) 
00732     A = new Epetra_VbrMatrix(Copy, *map, *map, profile); // Construct matrix rowmap=colmap
00733   else
00734     A = new Epetra_VbrMatrix(Copy, *map, profile); // Construct matrix
00735 
00736   int * indices = new int[numPoints];
00737 
00738   // This section of code creates a vector of random values that will be used to create
00739   // light-weight dense matrices to pass into the VbrMatrix construction process.
00740 
00741   int maxElementSize = 0;
00742   for (i=0; i< nsizes; i++) maxElementSize = EPETRA_MAX(maxElementSize, sizes[i]);
00743 
00744   Epetra_LocalMap lmap(maxElementSize*maxElementSize, ptMap.IndexBase(), ptMap.Comm());
00745   Epetra_Vector randvec(lmap);
00746   randvec.Random();
00747   randvec.Scale(-1.0); // Make value negative
00748   int nx = numNodesX*numProcsX;
00749 
00750 
00751   for (i=0; i<numMyElements; i++) {
00752     int rowID = map->GID(i);
00753     int numIndices = 0;
00754     int rowDim = sizes[rowID%nsizes];
00755     for (j=0; j<numPoints; j++) {
00756       int colID = rowID + xoff[j] + nx*yoff[j]; // Compute column ID based on stencil offsets
00757       if (colID>-1 && colID<numGlobalEquations)
00758   indices[numIndices++] = colID;
00759     }
00760       
00761     A->BeginInsertGlobalValues(rowID, numIndices, indices);
00762     
00763     for (j=0; j < numIndices; j++) {
00764       int colDim = sizes[indices[j]%nsizes];
00765       A->SubmitBlockEntry(&(randvec[0]), rowDim, rowDim, colDim);
00766     }
00767     A->EndSubmitEntries();
00768   }
00769 
00770   delete [] indices;
00771 
00772   A->FillComplete();
00773 
00774   // Compute the InvRowSums of the matrix rows
00775   Epetra_Vector invRowSums(A->RowMap());
00776   Epetra_Vector rowSums(A->RowMap());
00777   A->InvRowSums(invRowSums);
00778   rowSums.Reciprocal(invRowSums);
00779 
00780   // Jam the row sum values into the diagonal of the Vbr matrix (to make it diag dominant)
00781   int numBlockDiagonalEntries;
00782   int * rowColDims;
00783   int * diagoffsets = map->FirstPointInElementList();
00784   A->BeginExtractBlockDiagonalView(numBlockDiagonalEntries, rowColDims);
00785   for (i=0; i< numBlockDiagonalEntries; i++) {
00786     double * diagVals;
00787     int diagLDA;
00788     A->ExtractBlockDiagonalEntryView(diagVals, diagLDA);
00789     int rowDim = map->ElementSize(i);
00790     for (j=0; j<rowDim; j++) diagVals[j+j*diagLDA] = rowSums[diagoffsets[i]+j];
00791   }
00792 
00793   if (nrhs<=1) {  
00794     b = new Epetra_Vector(*map);
00795     bt = new Epetra_Vector(*map);
00796     xexact = new Epetra_Vector(*map);
00797   }
00798   else {
00799     b = new Epetra_MultiVector(*map, nrhs);
00800     bt = new Epetra_MultiVector(*map, nrhs);
00801     xexact = new Epetra_MultiVector(*map, nrhs);
00802   }
00803 
00804   xexact->Random(); // Fill xexact with random values
00805 
00806   A->Multiply(false, *xexact, *b);
00807   A->Multiply(true, *xexact, *bt);
00808 
00809   return;
00810 }
00811 
00812 void GenerateMyGlobalElements(int numNodesX, int numNodesY, int numProcsX, int numProcs,
00813             int myPID, int * & myGlobalElements) {
00814 
00815   myGlobalElements = new int[numNodesX*numNodesY];
00816   int myProcX = myPID%numProcsX;
00817   int myProcY = myPID/numProcsX;
00818   int curGID = myProcY*(numProcsX*numNodesX)*numNodesY+myProcX*numNodesX;
00819   for (int j=0; j<numNodesY; j++) {
00820     for (int i=0; i<numNodesX; i++) {
00821       myGlobalElements[j*numNodesX+i] = curGID+i;
00822     }
00823     curGID+=numNodesX*numProcsX;
00824   }
00825   //for (int i=0; i<numNodesX*numNodesY; i++) cout << "MYPID " << myPID <<" GID "<< myGlobalElements[i] << endl;
00826   
00827   return;
00828 }
00829 
00830 void runMatrixTests(Epetra_CrsMatrix * A,  Epetra_MultiVector * b, Epetra_MultiVector * bt,
00831         Epetra_MultiVector * xexact, bool StaticProfile, bool verbose, bool summary) {
00832 
00833   Epetra_MultiVector z(*b);
00834   Epetra_MultiVector r(*b);
00835   Epetra_SerialDenseVector resvec(b->NumVectors());
00836 
00837   //Timings
00838   Epetra_Flops flopcounter;
00839   A->SetFlopCounter(flopcounter);
00840   Epetra_Time timer(A->Comm());
00841   std::string statdyn =        "dynamic";
00842   if (StaticProfile) statdyn = "static ";
00843 
00844   for (int j=0; j<4; j++) { // j = 0/2 is notrans, j = 1/3 is trans
00845     
00846     bool TransA = (j==1 || j==3);
00847     std::string contig = "without";
00848     if (j>1) contig =    "with   ";
00849     
00850 #ifdef EPETRA_SHORT_PERFTEST
00851     int kstart = 1;
00852 #else
00853     int kstart = 0;
00854 #endif
00855     for (int k=kstart; k<2; k++) { // Loop over old multiply vs. new multiply
00856       
00857       std::string oldnew = "old";
00858       if (k>0) oldnew =    "new";
00859 
00860       if (j==2) A->OptimizeStorage();
00861 
00862       flopcounter.ResetFlops();
00863       timer.ResetStartTime();
00864 
00865       if (k==0) {
00866   //10 matvecs
00867 #ifndef EPETRA_SHORT_PERFTEST
00868   for( int i = 0; i < 10; ++i )
00869     A->Multiply1(TransA, *xexact, z); // Compute z = A*xexact or z = A'*xexact using old Multiply method
00870 #endif
00871       }
00872       else {
00873   //10 matvecs
00874   for( int i = 0; i < 10; ++i )
00875     A->Multiply(TransA, *xexact, z); // Compute z = A*xexact or z = A'*xexact
00876       }
00877       
00878       double elapsed_time = timer.ElapsedTime();
00879       double total_flops = A->Flops();
00880 
00881       // Compute residual
00882       if (TransA)
00883   r.Update(-1.0, z, 1.0, *bt, 0.0); // r = bt - z
00884       else
00885   r.Update(-1.0, z, 1.0, *b, 0.0); // r = b - z
00886 
00887       r.Norm2(resvec.Values());
00888       
00889       if (verbose) cout << "ResNorm = " << resvec.NormInf() << ": ";
00890       double MFLOPs = total_flops/elapsed_time/1000000.0;
00891       if (verbose) cout << "Total MFLOPs for 10 " << oldnew << " MatVec's with " << statdyn << " Profile (Trans = " << TransA
00892       << ")  and " << contig << " optimized storage = " << MFLOPs << " (" << elapsed_time << " s)" <<endl;
00893       if (summary) {
00894   if (A->Comm().NumProc()==1) {
00895     if (TransA) cout << "TransMv" << statdyn<< "Prof" << contig << "OptStor" << '\t';
00896     else cout << "NoTransMv" << statdyn << "Prof" << contig << "OptStor" << '\t';
00897   }
00898   cout << MFLOPs << endl;
00899       }
00900     }
00901   }
00902   return;
00903 }
00904 #ifdef EPETRA_HAVE_JADMATRIX
00905 void runJadMatrixTests(Epetra_JadMatrix * A,  Epetra_MultiVector * b, Epetra_MultiVector * bt,
00906         Epetra_MultiVector * xexact, bool StaticProfile, bool verbose, bool summary) {
00907 
00908   Epetra_MultiVector z(*b);
00909   Epetra_MultiVector r(*b);
00910   Epetra_SerialDenseVector resvec(b->NumVectors());
00911 
00912   //Timings
00913   Epetra_Flops flopcounter;
00914   A->SetFlopCounter(flopcounter);
00915   Epetra_Time timer(A->Comm());
00916 
00917   for (int j=0; j<2; j++) { // j = 0 is notrans, j = 1 is trans
00918     
00919     bool TransA = (j==1);
00920     A->SetUseTranspose(TransA);
00921     flopcounter.ResetFlops();
00922     timer.ResetStartTime();
00923 
00924     //10 matvecs
00925     for( int i = 0; i < 10; ++i )
00926       A->Apply(*xexact, z); // Compute z = A*xexact or z = A'*xexact
00927     
00928     double elapsed_time = timer.ElapsedTime();
00929     double total_flops = A->Flops();
00930     
00931     // Compute residual
00932     if (TransA)
00933       r.Update(-1.0, z, 1.0, *bt, 0.0); // r = bt - z
00934     else
00935       r.Update(-1.0, z, 1.0, *b, 0.0); // r = b - z
00936     
00937     r.Norm2(resvec.Values());
00938     
00939     if (verbose) cout << "ResNorm = " << resvec.NormInf() << ": ";
00940     double MFLOPs = total_flops/elapsed_time/1000000.0;
00941     if (verbose) cout << "Total MFLOPs for 10 " << " Jagged Diagonal MatVec's with (Trans = " << TransA
00942           << ") " << MFLOPs << " (" << elapsed_time << " s)" <<endl;
00943     if (summary) {
00944       if (A->Comm().NumProc()==1) {
00945   if (TransA) cout << "TransMv" << '\t';
00946   else cout << "NoTransMv" << '\t';
00947       }
00948       cout << MFLOPs << endl;
00949     }
00950   }
00951   return;
00952 }
00953 #endif
00954 //=========================================================================================
00955 void runLUMatrixTests(Epetra_CrsMatrix * L,  Epetra_MultiVector * bL, Epetra_MultiVector * btL, Epetra_MultiVector * xexactL, 
00956           Epetra_CrsMatrix * U,  Epetra_MultiVector * bU, Epetra_MultiVector * btU, Epetra_MultiVector * xexactU, 
00957           bool StaticProfile, bool verbose, bool summary) {
00958 
00959   if (L->NoDiagonal()) {
00960     bL->Update(1.0, *xexactL, 1.0); // Add contribution of a unit diagonal to bL
00961     btL->Update(1.0, *xexactL, 1.0); // Add contribution of a unit diagonal to btL
00962   }
00963   if (U->NoDiagonal()) {
00964     bU->Update(1.0, *xexactU, 1.0); // Add contribution of a unit diagonal to bU
00965     btU->Update(1.0, *xexactU, 1.0); // Add contribution of a unit diagonal to btU
00966   }
00967 
00968   Epetra_MultiVector z(*bL);
00969   Epetra_MultiVector r(*bL);
00970   Epetra_SerialDenseVector resvec(bL->NumVectors());
00971 
00972   //Timings
00973   Epetra_Flops flopcounter;
00974   L->SetFlopCounter(flopcounter);
00975   U->SetFlopCounter(flopcounter);
00976   Epetra_Time timer(L->Comm());
00977   std::string statdyn =        "dynamic";
00978   if (StaticProfile) statdyn = "static ";
00979 
00980   for (int j=0; j<4; j++) { // j = 0/2 is notrans, j = 1/3 is trans
00981     
00982     bool TransA = (j==1 || j==3);
00983     std::string contig = "without";
00984     if (j>1) contig =    "with   ";
00985     
00986     if (j==2) {
00987       L->OptimizeStorage();
00988       U->OptimizeStorage();
00989     }
00990 
00991     flopcounter.ResetFlops();
00992     timer.ResetStartTime();
00993     
00994     //10 lower solves
00995     bool Upper = false;
00996     bool UnitDiagonal = L->NoDiagonal();  // If no diagonal, then unit must be used
00997     Epetra_MultiVector * b = TransA ? btL : bL;  // solve with the appropriate b vector
00998     for( int i = 0; i < 10; ++i )
00999       L->Solve(Upper, TransA, UnitDiagonal, *b, z); // Solve Lz = bL or L'z = bLt
01000       
01001     double elapsed_time = timer.ElapsedTime();
01002     double total_flops = L->Flops();
01003 
01004     // Compute residual
01005     r.Update(-1.0, z, 1.0, *xexactL, 0.0); // r = bt - z
01006     r.Norm2(resvec.Values());
01007 
01008     if (resvec.NormInf()>0.000001) {
01009       cout << "resvec = " << resvec << endl;
01010       cout << "z = " << z << endl;
01011       cout << "xexactL = " << *xexactL << endl;
01012       cout << "r = " << r << endl;
01013     }
01014       
01015     if (verbose) cout << "ResNorm = " << resvec.NormInf() << ": ";
01016     double MFLOPs = total_flops/elapsed_time/1000000.0;
01017     if (verbose) cout << "Total MFLOPs for 10 " << " Lower solves " << statdyn << " Profile (Trans = " << TransA
01018           << ")  and " << contig << " opt storage = " << MFLOPs << " (" << elapsed_time << " s)" <<endl;
01019     if (summary) {
01020       if (L->Comm().NumProc()==1) {
01021   if (TransA) cout << "TransLSv" << statdyn<< "Prof" << contig << "OptStor" << '\t';
01022   else cout << "NoTransLSv" << statdyn << "Prof" << contig << "OptStor" << '\t';
01023       }
01024       cout << MFLOPs << endl;
01025     }
01026     flopcounter.ResetFlops();
01027     timer.ResetStartTime();
01028     
01029     //10 upper solves
01030     Upper = true;
01031     UnitDiagonal = U->NoDiagonal();  // If no diagonal, then unit must be used
01032     b = TransA ? btU : bU;  // solve with the appropriate b vector
01033     for( int i = 0; i < 10; ++i )
01034       U->Solve(Upper, TransA, UnitDiagonal, *b, z); // Solve Lz = bL or L'z = bLt
01035       
01036     elapsed_time = timer.ElapsedTime();
01037     total_flops = U->Flops();
01038 
01039     // Compute residual
01040     r.Update(-1.0, z, 1.0, *xexactU, 0.0); // r = bt - z
01041     r.Norm2(resvec.Values());
01042 
01043     if (resvec.NormInf()>0.001) {
01044       cout << "U = " << *U << endl;
01045       //cout << "resvec = " << resvec << endl;
01046       cout << "z = " << z << endl;
01047       cout << "xexactU = " << *xexactU << endl;
01048       //cout << "r = " << r << endl;
01049       cout << "b = " << *b << endl;
01050     }
01051 
01052       
01053     if (verbose) cout << "ResNorm = " << resvec.NormInf() << ": ";
01054     MFLOPs = total_flops/elapsed_time/1000000.0;
01055     if (verbose) cout << "Total MFLOPs for 10 " << " Upper solves " << statdyn << " Profile (Trans = " << TransA
01056           << ")  and " << contig << " opt storage = " << MFLOPs <<endl;
01057     if (summary) {
01058       if (L->Comm().NumProc()==1) {
01059   if (TransA) cout << "TransUSv" << statdyn<< "Prof" << contig << "OptStor" << '\t';
01060   else cout << "NoTransUSv" << statdyn << "Prof" << contig << "OptStor" << '\t';
01061       }
01062       cout << MFLOPs << endl;
01063     }
01064   }
01065   return;
01066 }

Generated on Thu Sep 18 12:37:56 2008 for Epetra Package Browser (Single Doxygen Collection) by doxygen 1.3.9.1