cc_main.cc

Go to the documentation of this file.
00001 /*@HEADER
00002 // ***********************************************************************
00003 // 
00004 //       Ifpack: Object-Oriented Algebraic Preconditioner Package
00005 //                 Copyright (2002) Sandia Corporation
00006 // 
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 // 
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //  
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //  
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00025 // 
00026 // ***********************************************************************
00027 //@HEADER
00028 */
00029 
00030 #include <stdio.h>
00031 #include <stdlib.h>
00032 #include <ctype.h>
00033 #include <assert.h>
00034 #include <string.h>
00035 #include <math.h>
00036 #ifdef PETRA_MPI
00037 #include "mpi.h"
00038 #endif
00039 #include "prototypes.h"
00040 #include "paz_aztec.h"
00041 #ifndef __cplusplus
00042 #define __cplusplus
00043 #endif
00044 #include "Petra_Comm.h"
00045 #include "Petra_Map.h"
00046 #include "Petra_Time.h"
00047 #include "Petra_BlockMap.h"
00048 #include "Petra_RDP_MultiVector.h"
00049 #include "Petra_RDP_Vector.h"
00050 #include "Petra_RDP_DVBR_Matrix.h"
00051 #include "Petra_RDP_CRS_Matrix.h"
00052 #include "Ifpack_ILUK_Graph.h"
00053 #include "Ifpack_RDP_CRS_RILUK.h"
00054 
00055 #define perror(str) { fprintf(stderr,"%s\n",str);   exit(-1); }
00056 #define perror1(str,ierr) { fprintf(stderr,"%s %d\n",str,ierr);   exit(-1); }
00057 #define double_quote '"'
00058 void BiCGSTAB(Petra_RDP_CRS_Matrix &A, 
00059         Petra_RDP_Vector &x, 
00060         Petra_RDP_Vector &b, 
00061         Ifpack_RDP_CRS_RILUK *M, 
00062         int Maxiter, 
00063         double Tolerance, 
00064         double *residual, double & FLOPS, bool verbose);
00065 
00066 int main(int argc, char *argv[])
00067 {
00068   int    *update;                  /* vector elements updated on this node. */
00069   int    *indx;   /* MSR format of real and imag parts */
00070   int    *bindx;
00071   int    *bpntr;
00072   int    *rpntr;
00073   int    *cpntr;
00074   int    indexBase = 0;
00075   double *val;
00076   double *xguess, *b, *bt, *xexact, *xsolve;
00077   int    n_nonzeros, n_blk_nonzeros, ierr;
00078   int    N_update;           /* # of block unknowns updated on this node    */
00079   int    numLocalEquations;
00080                                  /* Number scalar equations on this node */
00081   int    numGlobalEquations, numGlobalBlocks; /* Total number of equations */
00082   int    numLocalBlocks;
00083   int    *blockSizes, *numNzBlks, *blkColInds;
00084   int    *numNz, *ColInds;
00085   int    N_external, N_blk_eqns;
00086   int    blk_row, *blk_col_inds;
00087   int    row,     *col_inds, numEntries;
00088   double *row_vals;
00089 
00090   double *val_msr;
00091   int *bindx_msr;
00092   
00093   double norm, d ;
00094 
00095   int matrix_type;
00096 
00097   int has_global_indices, option;
00098   int i, j, m, mp;
00099   int ione = 1;
00100   int *proc_config = new int[PAZ_PROC_SIZE];
00101 
00102   double time ;
00103 #ifdef PETRA_MPI
00104   MPI_Init(&argc,&argv);
00105 #endif
00106 
00107   /* get number of processors and the name of this processor */
00108  
00109 #ifdef PETRA_MPI
00110   Petra_Comm& Comm = *new Petra_Comm(MPI_COMM_WORLD);
00111 #else
00112   Petra_Comm& Comm = *new Petra_Comm();
00113 #endif
00114 
00115   printf("proc %d of %d is alive\n",
00116       Comm.MyPID(),Comm.NumProc());
00117 
00118   int MyPID = Comm.MyPID();
00119 
00120   bool verbose = false;
00121   if (MyPID==0) verbose = true;
00122 
00123   
00124 /* Still need Aztec proc info for the HB routines, can switch to Petra later */
00125 
00126 #ifdef PETRA_MPI
00127   PAZ_set_proc_config(proc_config,MPI_COMM_WORLD);
00128 #else
00129   PAZ_set_proc_config(proc_config,0);
00130 #endif
00131 
00132   Comm.Barrier();
00133 
00134 #ifdef VBRMATRIX
00135   if(argc != 6) 
00136     perror("error: enter name of data and partition file on command line, followed by levelfill and shift value") ; 
00137 #else
00138   if(argc != 5) perror("error: enter name of data file on command line, followed by levelfill and shift value") ; 
00139 #endif
00140   /* Set exact solution to NULL */
00141   xexact = NULL;
00142 
00143   /* Read matrix file and distribute among processors.  
00144      Returns with this processor's set of rows */ 
00145 
00146 #ifdef VBRMATRIX
00147   if (Comm.MyPID()==0) 
00148     read_hb(argv[1], &numGlobalEquations, &n_nonzeros, 
00149       &val_msr,  &bindx_msr, &xguess, &b, &bt, &xexact);
00150   
00151   create_vbr(argv[2], proc_config, &numGlobalEquations, &numGlobalBlocks,
00152        &n_nonzeros, &n_blk_nonzeros, &N_update, &update,
00153        bindx_msr, val_msr, &val, &indx, 
00154        &rpntr, &cpntr, &bpntr, &bindx);
00155 
00156   if(proc_config[PAZ_node] == 0) 
00157     {
00158       free ((void *) val_msr);
00159       free ((void *) bindx_msr);
00160       free ((void *) cpntr);
00161     }
00162     matrix_type = PAZ_VBR_MATRIX;
00163 
00164   Comm.Barrier();
00165 
00166   distrib_vbr_matrix( proc_config, numGlobalEquations, numGlobalBlocks, 
00167           &n_nonzeros, &n_blk_nonzeros,
00168           &N_update, &update, 
00169           &val, &indx, &rpntr, &cpntr, &bpntr, &bindx, 
00170           &xguess, &b, &bt, &xexact);
00171 
00172 #else
00173   if (Comm.MyPID()==0) 
00174     read_hb(argv[1], &numGlobalEquations, &n_nonzeros,
00175              &val,  &bindx, &xguess, &b, &bt, &xexact);
00176 
00177   Comm.Barrier();
00178 
00179   distrib_msr_matrix(proc_config, &numGlobalEquations, &n_nonzeros, &N_update,
00180       &update, &val, &bindx, &xguess, &b, &bt, &xexact);
00181 
00182 #ifdef DEBUG
00183   for (i = 0; i<N_update; i++)
00184     if (val[i] == 0.0 ) printf("Zero diagonal at row %d\n",i);
00185 #endif
00186     matrix_type = PAZ_MSR_MATRIX;
00187 #endif
00188 
00189 #ifdef VBRMATRIX
00190   numLocalEquations = rpntr[N_update];
00191 #else
00192   numLocalEquations = N_update;
00193 #endif
00194 
00195 #ifdef VBRMATRIX
00196 
00197   /********  Make integer data structures needed for this interface *********/
00198 
00199   /* Make blockSizes - number of equations in each block row on this proc */
00200 
00201   numLocalBlocks = N_update;
00202 
00203   blockSizes = new int[numLocalBlocks];
00204   for (i=0; i<numLocalBlocks; i++)
00205     blockSizes[i] = rpntr[i+1]-rpntr[i];
00206 
00207   /* Make numNzBlks - number of block entries in each block row */
00208 
00209   numNzBlks = new int[numLocalBlocks];
00210   for (i=0; i<numLocalBlocks; i++)
00211     numNzBlks[i] = bpntr[i+1] - bpntr[i];
00212 
00213   /* Make blkColInds - Exactly bindx (just copy pointer) */
00214   blkColInds = bindx;
00215   
00216 
00217   Petra_BlockMap& map = *new Petra_BlockMap(numGlobalEquations, numLocalEquations, 
00218       update, indexBase, Comm, numGlobalBlocks, numLocalBlocks,
00219       blockSizes);
00220 
00221   Petra_RDP_DVBR_Matrix& A = *new Petra_RDP_DVBR_Matrix(map);
00222   
00223   if ((ierr=A.allocate(numNzBlks, blkColInds)))
00224      perror1("Error in DVBR_Matrix_allocate:",ierr);
00225  
00226   /* Add block rows one-at-a-time */
00227 
00228   for (blk_row=0; blk_row<numLocalBlocks; blk_row++)
00229     {
00230       row_vals = val + indx[bpntr[blk_row]];
00231       blk_col_inds = bindx + bpntr[blk_row];
00232       if ((ierr=A.putBlockRow(update[blk_row], numNzBlks[blk_row], row_vals, 
00233               blk_col_inds)))
00234        {
00235          printf("Row %d:",update[row]);
00236          perror1("Error putting block row:",ierr);
00237        }
00238     }
00239   
00240   if ((ierr=A.FillComplete()))
00241       perror1("Error in DVBR_Matrix_FillComplete:",ierr);
00242 
00243 #else
00244   /* Make numNzBlks - number of block entries in each block row */
00245 
00246   numNz = new int[numLocalEquations];
00247   for (i=0; i<numLocalEquations; i++) numNz[i] = bindx[i+1] - bindx[i];
00248 
00249   /* Make ColInds - Exactly bindx, offset by diag (just copy pointer) */
00250   ColInds = bindx+numLocalEquations+1;
00251 
00252   Petra_Map& map = *new Petra_Map(numGlobalEquations, numLocalEquations, 
00253       update, 0, Comm);
00254  
00255   Comm.Barrier();
00256   Petra_Time & FillTimer = *new Petra_Time(Comm);
00257   Petra_RDP_CRS_Matrix& A = *new Petra_RDP_CRS_Matrix(Copy, map, numNz);
00258   
00259   /* Add  rows one-at-a-time */
00260 
00261   for (row=0; row<numLocalEquations; row++)
00262     {
00263       row_vals = val + bindx[row];
00264       col_inds = bindx + bindx[row];
00265       numEntries = bindx[row+1] - bindx[row];
00266      if ((ierr = A.InsertGlobalValues(update[row], numEntries, row_vals, col_inds)))
00267        {
00268          printf("Row %d:",update[row]);
00269          perror1("Error putting row:",ierr);
00270        }
00271      if ((ierr=(A.InsertGlobalValues(update[row], 1, val+row, update+row)<0)))
00272        perror1("Error putting  diagonal",ierr);
00273     }
00274    Comm.Barrier();
00275    double FillTime = FillTimer.ElapsedTime();
00276     if ((ierr=A.FillComplete()))    
00277     perror1("Error in Petra_RDP_Vector_fillComplete",ierr);
00278    double FillCompleteTime = FillTimer.ElapsedTime() - FillTime;
00279    if (Comm.MyPID()==0) {
00280      cout << "\n\n****************************************************" << endl;
00281      cout << "\n\nMatrix construction time (sec) = " << FillTime<< endl;
00282      cout << "    Matrix FillComplete time (sec) = " << FillCompleteTime << endl;
00283      cout << "    Total construction time (sec) = " << FillTime+FillCompleteTime << endl<< endl;
00284    }
00285 
00286   
00287    
00288 #endif
00289   
00290 #ifdef MULTI_VECTOR
00291 
00292   // Make a second x and b vector that are 2x original x and b; make into a 2-vector.
00293 
00294   int nrhs = 2;
00295   double **allx = new double*[nrhs];
00296   double *xexact2 = new double[numLocalEquations];
00297   for (i = 0;i<numLocalEquations; i++) xexact2[i] = 2.0 * xexact[i];
00298   allx[0] = xexact; allx[1] = xexact2;
00299 
00300   double **allb = new double*[nrhs];
00301   double *b2 = new double[numLocalEquations];
00302   for (i = 0;i<numLocalEquations; i++) b2[i] = 2.0 * b[i];
00303   allb[0] = b; allb[1] = b2;
00304 
00305   double **allbt = new double*[nrhs];
00306   double *bt2 = new double[numLocalEquations];
00307   for (i = 0;i<numLocalEquations; i++) bt2[i] = 2.0 * bt[i];
00308   allbt[0] = bt; allbt[1] = bt2;
00309 
00310   Petra_RDP_MultiVector& xtrue = *new Petra_RDP_MultiVector(Copy, map, allx, nrhs);
00311   Petra_RDP_MultiVector& bexact = *new Petra_RDP_MultiVector(Copy, map, allb, nrhs);
00312   Petra_RDP_MultiVector& btexact = *new Petra_RDP_MultiVector(Copy, map, allbt, nrhs);
00313   Petra_RDP_MultiVector& bcomp = *new Petra_RDP_MultiVector(map, nrhs);
00314   Petra_RDP_MultiVector& xcomp = *new Petra_RDP_MultiVector(map, nrhs);
00315   Petra_RDP_MultiVector& resid = *new Petra_RDP_MultiVector(map, nrhs);
00316 
00317 #else
00318   int nrhs = 1;
00319   Petra_RDP_Vector& xtrue = *new Petra_RDP_Vector(Copy, map, xexact);
00320   Petra_RDP_Vector& bexact = *new Petra_RDP_Vector(Copy, map, b);
00321   Petra_RDP_Vector& btexact = *new Petra_RDP_Vector(Copy, map, bt);
00322   Petra_RDP_Vector& bcomp = *new Petra_RDP_Vector(map);
00323   Petra_RDP_Vector& xcomp = *new Petra_RDP_Vector(map);
00324   Petra_RDP_Vector& resid = *new Petra_RDP_Vector(map);
00325     
00326 #endif /* MULTI_VECTOR */
00327 
00328   Comm.Barrier();
00329 
00330   // Construct ILU preconditioner
00331 
00332   double elapsed_time, total_flops, MFLOPs;
00333   Petra_Time & timer = *new Petra_Time(Comm);
00334 
00335   int LevelFill = atoi(argv[argc-3]);
00336   if (verbose) cout << "Using Level Fill = " << LevelFill << endl;
00337   double ShiftValue = atof(argv[argc-2]);
00338   if (verbose) cout << "Using Diagonal Shift Value of = " << ShiftValue << endl;
00339   int NumThreads = atoi(argv[argc-1]);
00340   if (verbose) cout << "Using " << NumThreads << " Threads "  << endl;
00341 
00342   Ifpack_RDP_CRS_RILUK * ILUK = 0;
00343   Ifpack_ILUK_Graph * ILUK_Graph = 0;
00344   if (LevelFill>-1) {
00345     elapsed_time = timer.ElapsedTime();
00346     ILUK_Graph = new Ifpack_ILUK_Graph(A.Graph(), LevelFill);
00347     assert(ILUK_Graph->ConstructFilledGraph()==0);
00348     assert(ILUK_Graph->ComputeLevels(NumThreads)==0);
00349     elapsed_time = timer.ElapsedTime() - elapsed_time;
00350     if (verbose) cout << "Time to construct ILUK graph = " << elapsed_time << endl;
00351     
00352     return 0;
00353     
00354     elapsed_time = timer.ElapsedTime();
00355     ILUK = new Ifpack_RDP_CRS_RILUK(A, *ILUK_Graph);
00356     ILUK->SetShiftValue(ShiftValue);
00357     assert(ILUK->InitValues()==0);
00358     assert(ILUK->Factor()==0);
00359     elapsed_time = timer.ElapsedTime() - elapsed_time;
00360     total_flops = ILUK->Flops();
00361     MFLOPs = total_flops/elapsed_time/1000000.0;
00362     if (verbose) cout << "Time to compute preconditioner values = " 
00363           << elapsed_time << endl
00364           << "MFLOPS for Factorization = " << MFLOPs << endl;
00365   }
00366 
00367   int Maxiter = 500;
00368   double Tolerance = 1.0E-14;
00369   double * residual = new double[nrhs];
00370 
00371 
00372   elapsed_time = timer.ElapsedTime();
00373 
00374   BiCGSTAB(A, xcomp, bexact, ILUK, Maxiter, Tolerance, residual, total_flops, verbose);
00375 
00376   elapsed_time = timer.ElapsedTime() - elapsed_time;
00377   MFLOPs = total_flops/elapsed_time/1000000.0;
00378   if (verbose) cout << "Time to compute solution = " 
00379         << elapsed_time << endl 
00380         << "Number of operations in solve = " << total_flops << endl
00381         << "MFLOPS for Solve = " << MFLOPs<< endl << endl;
00382   
00383 
00384   int NumMVs = 100;
00385   int ii, iii;
00386 
00387   for (ii=0; ii<2; ii++) {
00388     bool TransA = (ii==1);
00389     A.ResetFlops();
00390     elapsed_time = timer.ElapsedTime();
00391     for (iii=0; iii<NumMVs; iii++)
00392       if ((ierr=A.Multiply(TransA, xcomp, bcomp))) perror1("Error in matvec",ierr);
00393     elapsed_time = timer.ElapsedTime() - elapsed_time;
00394     total_flops = A.Flops();
00395     MFLOPs = total_flops/elapsed_time/1000000.0;
00396     if (Comm.MyPID()==0) {
00397       if (TransA) {
00398   cout << "\n\n****************************************************" << endl;
00399   cout << "\n\nResults for transpose multiply with standard storage" << endl;
00400       }
00401       else {
00402   cout << "\n\nMatrix Fill cost = " << (FillTime+FillCompleteTime)/elapsed_time*NumMVs 
00403        << " Matrix-Multiplies " << endl;
00404   cout << "\n\n*******************************************************" << endl;
00405   cout << "\n\nResults for no transpose multiply with standard storage" << endl;
00406       }
00407       
00408       cout << "\n\nMatrix Fill cost = " << (FillTime+FillCompleteTime)/elapsed_time*NumMVs 
00409      << " Matrix-Multiplies " << endl;
00410       cout << "\n\nTotal FLOPS for standard Storage (" <<NumMVs<< " Multiplies) = " 
00411      << total_flops<< endl;
00412       cout << "    Total time for standard Storage = " << elapsed_time<< endl;
00413       cout << "    Total MFLOPs for standard matrix multiply = " << MFLOPs << endl<< endl;
00414     }
00415     
00416     // cout << "Vector bcomp" << bcomp << endl;
00417     
00418     if (TransA) {
00419       if ((ierr=resid.Update(-1.0, btexact, 1.0, bcomp, 0.0))) perror1("Error in Update",ierr);}
00420     else {
00421       if ((ierr=resid.Update(-1.0, bexact, 1.0, bcomp, 0.0))) perror1("Error in Update",ierr);}
00422     
00423     if ((ierr = resid.Norm2(residual))) perror1("Error in Norm2",ierr);
00424     
00425     if (Comm.MyPID()==0)
00426       for (i=0; i< nrhs; i++) printf("Residual[%d]    = %22.16g\n",i,residual[i]);
00427 
00428   }
00429 
00430    // Optimize data layout for memory access
00431 
00432   if ((ierr=A.OptimizeStorage())) perror1("Error in Petra_RDP_CRS_Matrix: OptimizeStorage",ierr);
00433 
00434   for (ii=0; ii<2; ii++) {
00435     bool TransA = (ii==1);
00436     A.ResetFlops();
00437     elapsed_time = timer.ElapsedTime();
00438     for (iii=0; iii<NumMVs; iii++)
00439       if ((ierr=A.Multiply(TransA, xcomp, bcomp)))
00440   perror1("Error in Multiply",ierr);
00441     elapsed_time = timer.ElapsedTime() - elapsed_time;
00442     total_flops = A.Flops();
00443     MFLOPs = total_flops/elapsed_time/1000000.0;
00444     if (Comm.MyPID()==0) {
00445       cout << "\n\n****************************************************" << endl;
00446       if (TransA) cout << "\n\nResults for transpose multiply with optimized storage" << endl;
00447       else cout << "\n\nResults for no transpose multiply with optimized storage"<< endl;
00448       cout << "\n\nTotal FLOPS for optimized storage (" <<NumMVs<< " Multiplies) = " 
00449      << total_flops<< endl;
00450       cout << "    Total time for optimized Storage = " << elapsed_time<< endl;
00451       cout << "    Total MFLOPs for optimized matrix multiply = " << MFLOPs << endl<< endl;
00452     }
00453     
00454     if (TransA) {
00455       if ((ierr=resid.Update(-1.0, btexact, 1.0, bcomp, 0.0))) perror1("Error in Update",ierr);}
00456     else {
00457       if ((ierr=resid.Update(-1.0, bexact, 1.0, bcomp, 0.0))) perror1("Error in Update",ierr); }
00458 
00459     if ((ierr = resid.Norm2(residual))) perror1("Error in Norm2",ierr);
00460 
00461     if (Comm.MyPID()==0)
00462       for (i=0; i< nrhs; i++) printf("Residual[%d]    = %22.16g\n",i,residual[i]);
00463 
00464   }
00465 
00466   free ((void *) xguess);
00467   free ((void *) b);
00468   free ((void *) bt);
00469   free ((void *) xexact);
00470   free ((void *) val);
00471   free ((void *) bindx);
00472   free ((void *) update);
00473 
00474 #ifdef VBRMATRIX
00475   free ((void *) indx);
00476   free ((void *) rpntr);
00477   free ((void *) bpntr);
00478   delete [] blockSizes;
00479   delete [] numNzBlks;
00480 #else
00481   delete [] numNz;
00482 
00483 #endif
00484 
00485 #ifdef MULTI_VECTOR
00486   delete [] xexact2;
00487   delete [] b2;
00488   delete [] allx;
00489   delete [] allb;
00490 #endif
00491   delete &bexact;
00492   delete &bcomp;
00493   delete &resid;
00494   delete &xcomp;
00495   delete ILUK;
00496   delete &ILUK_Graph;
00497   delete &A;
00498   delete &map;
00499   delete &timer;
00500   delete &FillTimer;
00501   delete &Comm;
00502 
00503   delete proc_config;
00504                
00505 #ifdef PETRA_MPI
00506   MPI_Finalize() ;
00507 #endif
00508 
00509 return 0 ;
00510 }
00511 void BiCGSTAB(Petra_RDP_CRS_Matrix &A, 
00512         Petra_RDP_Vector &x, 
00513         Petra_RDP_Vector &b, 
00514         Ifpack_RDP_CRS_RILUK *M, 
00515         int Maxiter, 
00516         double Tolerance, 
00517         double *residual, double & FLOPS, bool verbose) {
00518 
00519   // Allocate vectors needed for iterations
00520   Petra_RDP_Vector& phat = *new Petra_RDP_Vector(x); phat.PutScalar(0.0);
00521   Petra_RDP_Vector& p = *new Petra_RDP_Vector(x); p.PutScalar(0.0);
00522   Petra_RDP_Vector& shat = *new Petra_RDP_Vector(x); shat.PutScalar(0.0);
00523   Petra_RDP_Vector& s = *new Petra_RDP_Vector(x); s.PutScalar(0.0);
00524   Petra_RDP_Vector& r = *new Petra_RDP_Vector(x); r.PutScalar(0.0);
00525   Petra_RDP_Vector& rtilde = *new Petra_RDP_Vector(x); rtilde.Random();
00526   Petra_RDP_Vector& v = *new Petra_RDP_Vector(x); v.PutScalar(0.0);
00527   
00528 
00529   A.Multiply(false, x, r); // r = A*x
00530 
00531   r.Update(1.0, b, -1.0); // r = b - A*x
00532 
00533   double r_norm, b_norm, scaled_r_norm, rhon, rhonm1 = 1.0;
00534   double alpha = 1.0, omega = 1.0, sigma;
00535   double omega_num, omega_den;
00536   r.Norm2(&r_norm);
00537   b.Norm2(&b_norm);
00538   scaled_r_norm = r_norm/b_norm;
00539   r.Dot(rtilde,&rhon);
00540 
00541   if (verbose) cout << "Initial residual = " << r_norm
00542         << " Scaled residual = " << scaled_r_norm << endl;
00543 
00544 
00545   for (int i=0; i<Maxiter; i++) { // Main iteration loop   
00546 
00547     double beta = (rhon/rhonm1) * (alpha/omega);
00548     rhonm1 = rhon;
00549 
00550     /* p    = r + beta*(p - omega*v)       */
00551     /* phat = M^-1 p                       */
00552     /* v    = A phat                       */
00553 
00554     double dtemp = - beta*omega;
00555 
00556     p.Update(1.0, r, dtemp, v, beta);
00557     if (M==0) 
00558       phat.Scale(1.0, p);
00559     else
00560       M->LevelSolve(false, p, phat);
00561     A.Multiply(false, phat, v);
00562 
00563     
00564     rtilde.Dot(v,&sigma);
00565     alpha = rhon/sigma;    
00566 
00567     /* s = r - alpha*v                     */
00568     /* shat = M^-1 s                       */
00569     /* r = A shat (r is a tmp here for t ) */
00570 
00571     s.Update(-alpha, v, 1.0, r, 0.0);
00572     if (M==0) 
00573       shat.Scale(1.0, s);
00574     else
00575       M->LevelSolve(false, s, shat);
00576     A.Multiply(false, shat, r);
00577 
00578     r.Dot(s, &omega_num);
00579     r.Dot(r, &omega_den);
00580     omega = omega_num / omega_den;
00581 
00582     /* x = x + alpha*phat + omega*shat */
00583     /* r = s - omega*r */
00584 
00585     x.Update(alpha, phat, omega, shat, 1.0);
00586     r.Update(1.0, s, -omega); 
00587     
00588     r.Norm2(&r_norm);
00589     scaled_r_norm = r_norm/b_norm;
00590     r.Dot(rtilde,&rhon);
00591 
00592     if (verbose) cout << "Iter "<< i << " residual = " << r_norm
00593           << " Scaled residual = " << scaled_r_norm << endl;
00594 
00595     if (scaled_r_norm < Tolerance) break;
00596   }
00597 
00598   FLOPS = phat.Flops()+p.Flops()+shat.Flops()+s.Flops()+r.Flops()+rtilde.Flops()+
00599           v.Flops()+A.Flops()+x.Flops()+b.Flops();
00600   if (M!=0) FLOPS += M->Flops();
00601 
00602   delete &phat;
00603   delete &p;
00604   delete &shat;
00605   delete &s;
00606   delete &r;
00607   delete &rtilde;
00608   delete &v;
00609 
00610 
00611   return;
00612 }

Generated on Thu Sep 18 12:37:21 2008 for Ifpack Package Browser (Single Doxygen Collection) by doxygen 1.3.9.1