BelosBlockGmres.hpp

Go to the documentation of this file.
00001 // @HEADER
00002 // ***********************************************************************
00003 //
00004 //                 Belos: Block Linear Solvers Package
00005 //                 Copyright (2004) Sandia Corporation
00006 //
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 //
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00025 //
00026 // ***********************************************************************
00027 // @HEADER
00028 //
00029 // This file contains an implementation of the Block GMRES algorithm
00030 // for solving real nonsymmetric linear systems of equations AX = B,
00031 // where B is a matrix containing one or more right-hand sides, and X is
00032 // the matrix of corresponding solutions. This implementation allows the 
00033 // user to solve systems involving any number of right-hand sides. The
00034 // block size used in the solver is user specified, and is independent
00035 // of the number of right-hand sides. Thus, a system involving many 
00036 // right-hand sides can be processed by solving for only some number  
00037 // (the block size) of the right-hand sides simultaneously. Several passes
00038 // through the block solver are used to solve for all of them. A single
00039 // right-hand side system can be solved in the traditional way by choosing
00040 // the block size equal to one, or it can be solved using a block 
00041 // implementation (choosing a block size greater than one).
00042 //   
00043 //
00044 #ifndef BELOS_BLOCK_GMRES_HPP
00045 #define BELOS_BLOCK_GMRES_HPP
00046 
00053 #include "BelosConfigDefs.hpp"
00054 #include "BelosIterativeSolver.hpp"
00055 #include "BelosLinearProblem.hpp"
00056 #include "BelosOutputManager.hpp"
00057 #include "BelosStatusTest.hpp"
00058 #include "BelosOperatorTraits.hpp"
00059 #include "BelosMultiVecTraits.hpp"
00060 
00061 #include "Teuchos_BLAS.hpp"
00062 #include "Teuchos_LAPACK.hpp"
00063 #include "Teuchos_SerialDenseMatrix.hpp"
00064 #include "Teuchos_SerialDenseVector.hpp"
00065 #include "Teuchos_ScalarTraits.hpp"
00066 #include "Teuchos_ParameterList.hpp"
00067 #include "Teuchos_TimeMonitor.hpp"
00068 
00069 using Teuchos::ParameterList;
00070 using Teuchos::RefCountPtr;
00071 
00083 namespace Belos {
00084   
00085   template <class ScalarType, class MV, class OP>
00086   class BlockGmres : public IterativeSolver<ScalarType,MV,OP> { 
00087   public:
00088     
00089     //
00090     // Convenience typedefs
00091     //
00092     typedef MultiVecTraits<ScalarType,MV> MVT;
00093     typedef OperatorTraits<ScalarType,MV,OP> OPT;
00094     typedef Teuchos::ScalarTraits<ScalarType> SCT;
00095     typedef typename SCT::magnitudeType MagnitudeType;
00096     
00099 
00100     BlockGmres(const RefCountPtr<LinearProblem<ScalarType,MV,OP> > &lp, 
00101          const RefCountPtr<StatusTest<ScalarType,MV,OP> > &stest,
00102                const RefCountPtr<OutputManager<ScalarType> > &om,
00103          const RefCountPtr<ParameterList> &pl
00104          );
00105     
00107     virtual ~BlockGmres() {};
00109     
00112     
00114     int GetNumIters() const { return( _totaliter ); }
00115     
00117     int GetNumRestarts() const { return( _restartiter ); }
00118     
00120 
00123     RefCountPtr<const MV> GetNativeResiduals( std::vector<MagnitudeType> *normvec ) const;
00124     
00126 
00131     RefCountPtr<MV> GetCurrentSoln();
00132     
00134 
00136     RefCountPtr<LinearProblem<ScalarType,MV,OP> > GetLinearProblem() const { return( _lp ); }
00137     
00139     RefCountPtr<StatusTest<ScalarType,MV,OP> > GetStatusTest() const { return( _stest ); }
00140 
00142     int Reset( const RefCountPtr<ParameterList>& pl = null );
00143     
00145 
00148     
00153     void Solve();
00154 
00156 
00159 
00161     std::string description() const;
00162 
00164     
00165   private:
00166 
00168     void SetGmresBlkTols();
00169 
00171     bool BlockReduction(bool&);
00172 
00174     bool QRFactorAug(MV&, 
00175          Teuchos::SerialDenseMatrix<int,ScalarType>&,
00176          bool);
00177 
00179     bool BlkOrth(MV&);
00180 
00182     bool BlkOrthSing(MV&);
00183 
00185     void UpdateLSQR(Teuchos::SerialDenseMatrix<int,ScalarType>&,Teuchos::SerialDenseMatrix<int,ScalarType>&);
00186 
00188     void CheckKrylovOrth(const int);
00189 
00191     RefCountPtr<LinearProblem<ScalarType,MV,OP> > _lp; 
00192 
00194     RefCountPtr<StatusTest<ScalarType,MV,OP> > _stest; 
00195 
00197     RefCountPtr<OutputManager<ScalarType> > _om;
00198 
00200     RefCountPtr<ParameterList> _pl;     
00201 
00203     RefCountPtr<MV> _basisvecs;
00204 
00206     RefCountPtr<MV> _z_basisvecs; 
00207 
00209     RefCountPtr<MV> _cur_block_rhs, _cur_block_sol;
00210 
00212     Teuchos::SerialDenseMatrix<int,ScalarType> _hessmatrix;
00213 
00215     Teuchos::SerialDenseMatrix<int,ScalarType> _z;
00216 
00218     RefCountPtr<ostream> _os;
00219     
00221     int _length;
00222 
00224     int _blocksize, _restartiter, _totaliter, _iter;
00225 
00227     MagnitudeType _dep_tol, _blk_tol, _sing_tol;
00228 
00230     bool _flexible;
00231 
00233     Teuchos::SerialDenseVector<int,ScalarType> beta, sn;
00234     Teuchos::SerialDenseVector<int,MagnitudeType> cs;
00235 
00237     bool _restartTimers;
00238     
00240     Teuchos::RefCountPtr<Teuchos::Time> _timerOp, _timerPrec, 
00241                                         _timerOrtho, _timerTotal;
00242   };
00243 
00244   //
00245   // Implementation
00246   //
00247 
00248   //
00249   // Note: I should define a copy constructor and overload = because of the use of new
00250   //
00251   template <class ScalarType, class MV, class OP>
00252   BlockGmres<ScalarType,MV,OP>::BlockGmres(const RefCountPtr< LinearProblem<ScalarType,MV,OP> >& lp, 
00253              const RefCountPtr< StatusTest<ScalarType,MV,OP> >& stest,
00254              const RefCountPtr< OutputManager<ScalarType> >& om,
00255              const RefCountPtr< ParameterList > &pl ):
00256     _lp(lp),
00257     _stest(stest),
00258     _om(om),
00259     _pl(pl),
00260     _os(om->GetOStream()),
00261     _length(_pl->get("Length",25)),
00262     _blocksize(0), 
00263     _restartiter(0), 
00264     _totaliter(0),
00265     _iter(0),
00266     _dep_tol(1.0),
00267     _blk_tol(1.0),
00268     _sing_tol(1.0),
00269     _flexible( (_pl->isParameter("Variant"))&&(Teuchos::getParameter<std::string>(*_pl, "Variant")=="Flexible") ),
00270     _restartTimers(true),
00271     _timerOp(Teuchos::TimeMonitor::getNewTimer("Operation Op*x")),
00272     _timerPrec(Teuchos::TimeMonitor::getNewTimer("Operation Prec*x")),
00273     _timerOrtho(Teuchos::TimeMonitor::getNewTimer("Orthogonalization")),
00274     _timerTotal(Teuchos::TimeMonitor::getNewTimer("Total time"))    
00275   {
00276     //
00277     // Set up the block orthogonality tolerances
00278     //
00279     SetGmresBlkTols();  
00280   }
00281     
00282   template <class ScalarType, class MV, class OP>
00283   void 
00284   BlockGmres<ScalarType,MV,OP>::SetGmresBlkTols() 
00285   {
00286     typedef typename Teuchos::ScalarTraits<MagnitudeType> MGT;
00287     const MagnitudeType two = 2.0;
00288     const MagnitudeType eps = SCT::eps();
00289     _dep_tol = MGT::one()/MGT::squareroot(two);
00290     _blk_tol = 10*sqrt(eps);
00291     _sing_tol = 10 * eps;
00292   }
00293   
00294   template <class ScalarType, class MV, class OP>
00295   RefCountPtr<const MV> 
00296   BlockGmres<ScalarType,MV,OP>::GetNativeResiduals( std::vector<MagnitudeType> *normvec ) const 
00297   {
00298     //
00299     // If this is the first iteration for a new right-hand side return the
00300     // residual for the current block rhs and solution.
00301     // NOTE: Make sure the incoming vector is the correct size!
00302     //
00303     if ( normvec && (int)normvec->size() < _blocksize )                         
00304       normvec->resize( _blocksize );                                          
00305 
00306     if (_totaliter == 0) {
00307       RefCountPtr<MV> temp_res = MVT::Clone( *_cur_block_rhs, _blocksize );
00308       _lp->ComputeResVec( &*temp_res, &*_cur_block_sol, &*_cur_block_rhs);
00309       MVT::MvNorm( *temp_res, normvec );
00310       return temp_res;
00311     } else {
00312       if (normvec) {
00313         Teuchos::BLAS<int,ScalarType> blas;
00314         for (int j=0; j<_blocksize; j++)
00315           (*normvec)[j] = blas.NRM2( _blocksize, &_z(_iter*_blocksize, j ), 1);
00316       }
00317     }
00318     return null;
00319   }
00320 
00321   template <class ScalarType, class MV, class OP>
00322   RefCountPtr<MV> 
00323   BlockGmres<ScalarType,MV,OP>::GetCurrentSoln()
00324   {    
00325     //
00326     // If this is the first iteration of the Arnoldi factorization, 
00327     // return the current solution.  It has either been updated recently, 
00328     // if there was a restart, or we haven't computed anything yet.
00329     //
00330     RefCountPtr<MV> cur_sol_copy = MVT::CloneCopy(*_cur_block_sol);
00331     if (_iter==0) { 
00332         return cur_sol_copy; 
00333     } else {
00334       const ScalarType one = Teuchos::ScalarTraits<ScalarType>::one();
00335       int i, m = _iter*_blocksize;
00336       Teuchos::BLAS<int,ScalarType> blas;
00337       //
00338       //  Make a view and then copy the RHS of the least squares problem.  DON'T OVERWRITE IT!
00339       //
00340       Teuchos::SerialDenseMatrix<int,ScalarType> y( Teuchos::Copy, _z, m, _blocksize );
00341       //
00342       //  Solve the least squares problem and compute current solutions.
00343       //
00344       blas.TRSM( Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
00345          Teuchos::NON_UNIT_DIAG, m, _blocksize, one,  
00346          _hessmatrix.values(), _hessmatrix.stride(), y.values(), y.stride() );
00347     
00348       std::vector<int> index(m);
00349       for ( i=0; i<m; i++ ) {   
00350         index[i] = i;
00351       }
00352       if (_flexible) {
00353   RefCountPtr<const MV> Zjp1 = MVT::CloneView( *_z_basisvecs, index );
00354   MVT::MvTimesMatAddMv( one, *Zjp1, y, one, *cur_sol_copy );
00355       } 
00356       else {
00357   RefCountPtr<const MV> Vjp1 = MVT::CloneView( *_basisvecs, index );
00358   MVT::MvTimesMatAddMv( one, *Vjp1, y, one, *cur_sol_copy );
00359       }
00360     }
00361     return cur_sol_copy;
00362   }
00363     
00364   template <class ScalarType, class MV, class OP>
00365   void 
00366   BlockGmres<ScalarType,MV,OP>::Solve() 
00367   {
00368     Teuchos::TimeMonitor LocalTimer(*_timerTotal,_restartTimers);
00369 
00370     if ( _restartTimers ) {
00371       _timerOp->reset();
00372       _timerPrec->reset();
00373       _timerOrtho->reset();
00374     }
00375 
00376     int i=0;
00377     std::vector<int> index;
00378     const ScalarType one = Teuchos::ScalarTraits<ScalarType>::one();
00379     const ScalarType zero = Teuchos::ScalarTraits<ScalarType>::zero();
00380     Teuchos::RefCountPtr<MV> U_vec;
00381     bool dep_flg = false, ortho_flg = false;
00382     bool restart_flg = true;
00383     Teuchos::LAPACK<int, ScalarType> lapack;
00384     Teuchos::BLAS<int, ScalarType> blas;
00385     //
00386     // Obtain the output stream from the OutputManager.
00387     //
00388     _os = _om->GetOStream();
00389     //
00390     // Obtain the first block linear system form the linear problem manager.
00391     //
00392     _cur_block_sol = _lp->GetCurrLHSVec();
00393     _cur_block_rhs = _lp->GetCurrRHSVec();
00394     //
00395     //  Start executable statements. 
00396     //
00397     while ( _cur_block_rhs.get() && _cur_block_sol.get() ) {
00398       //
00399       // Get the blocksize from the linear problem
00400       //
00401       _blocksize = _lp->GetCurrBlockSize();
00402       //
00403       if (_om->isVerbosityAndPrint( IterationDetails )) {
00404         *_os << endl;
00405         *_os << "===================================================" << endl;
00406         *_os << "Solving linear system(s):  "
00407              << _lp->GetRHSIndex() << " through " << _lp->GetRHSIndex()+_lp->GetNumToSolve()-1
00408              << "  [block size = " << _blocksize << "]\n";
00409         *_os << endl;
00410       }
00411       //
00412       // Reset the iteration counter for this block of right-hand sides.
00413       //
00414       _totaliter = 0;
00415       //
00416       // Make room for the Arnoldi vectors and F.
00417       //
00418       _basisvecs = MVT::Clone(*_cur_block_rhs,(_length+1)*_blocksize);
00419       if (_flexible)
00420         _z_basisvecs = MVT::Clone(*_cur_block_rhs, (_length+1)*_blocksize);
00421       //
00422       // Create the rectangular Hessenberg matrix and right-hand side of least squares problem.
00423       //
00424       _hessmatrix.shape((_length+1)*_blocksize, _length*_blocksize);
00425       _z.shape((_length+1)*_blocksize, _blocksize); 
00426       //
00427       //
00428       for ( _restartiter=0; _stest->CheckStatus(this) == Unconverged && restart_flg; ++_restartiter ) {
00429   //
00430   // Associate the initial block of _basisvecs with U_vec
00431   // Reset the index vector (this might have been changed if there was a restart)
00432   //
00433   index.resize(_blocksize);
00434   for (i=0; i < _blocksize; i++) { index[i] = i; }
00435   U_vec = MVT::CloneView( *_basisvecs, index );
00436   //
00437   // Compute current residual and place into 1st block
00438   //
00439   _lp->ComputeResVec( &*U_vec, &*_cur_block_sol, &*_cur_block_rhs );
00440   //
00441   // Reset orthogonalization failure flags
00442   //
00443   dep_flg = false; ortho_flg = false;
00444   //
00445   // Re-initialize RHS of the least squares system and create a view.
00446   //
00447   _z.putScalar();
00448   Teuchos::SerialDenseMatrix<int,ScalarType> G10(Teuchos::View, _z, _blocksize, _blocksize);
00449   ortho_flg = QRFactorAug( *U_vec, G10, true );
00450   //
00451   if (ortho_flg){
00452     if (_om->isVerbosityAndPrint( Errors )){
00453       *_os << "Exiting Block GMRES" << endl;
00454       *_os << "  Restart iteration# " << _restartiter
00455      << "  Iteration# " << _iter << endl;
00456       *_os << "  ERROR: Failed to compute initial block of orthonormal basis vectors"
00457      << endl << endl;
00458     }
00459   }
00460   //
00461   // NOTE:  U_vec is a view into the set of basis vectors (_basisvecs), which is not guaranteed to
00462   // be updated when U_vec is changed.  Thus, to ensure consistency between U_vec and _basisvecs,
00463   // U_vec must be destroyed at this point!
00464   //
00465   if (U_vec.get()) {U_vec == null;}
00466   //
00467   //  
00468   for (_iter=0; _iter<_length && _stest->CheckStatus(this) == Unconverged && !ortho_flg; ++_iter, ++_totaliter) {
00469     //
00470     // Compute a length _length block Arnoldi Reduction (one step at a time),
00471     // the exit_flg indicates if we cannot extend the Arnoldi Reduction.
00472           // If exit_flg is true, then we need to leave this loop and compute the latest solution.
00473           //
00474     // NOTE:  There's an exception here if the blocksize is equal to one, then a lucky
00475     // breakdown has occurred, so we should update the least-squares problem.
00476     //
00477     //dep_flg = true;
00478     ortho_flg = BlockReduction(dep_flg);
00479     if (ortho_flg && _blocksize > 1){ 
00480       break;
00481     }
00482     //
00483     // Update the new Least-Squares solution through the QR factorization of the new
00484     // block in the upper Hessenberg matrix.
00485     //
00486     UpdateLSQR( _hessmatrix, _z );
00487     //
00488   } // end for (_iter=0;...
00489   //
00490   // Update the solutions by solving the triangular system to get the Krylov weights.
00491   //
00492         if (_iter) {
00493     //
00494     // Make a copy of _z since it may be used in the convergence test to compute native residuals.
00495     Teuchos::SerialDenseMatrix<int,ScalarType> _z_copy( Teuchos::Copy,_z, _iter*_blocksize, _blocksize ); 
00496     //
00497     blas.TRSM( Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS, 
00498          Teuchos::NON_UNIT_DIAG, _iter*_blocksize, _blocksize, one,
00499          _hessmatrix.values(), _hessmatrix.stride(), _z_copy.values(), _z_copy.stride() ); 
00500     //                                                                    
00501           // Update Solution.                                                   
00502     //                                                                      
00503           // 1)  For the flexible variant the solution will be updated directly,
00504     // Check status if _iter==_length                                       
00505           // otherwise the updated residual will be passed back to the linear problem.
00506     //                                                                      
00507           // 2)  Inform the linear problem that the solution was updated, pass updated residual if necessary.
00508     //
00509     index.resize( _iter*_blocksize );
00510     for (i=0; i < _iter*_blocksize; i++) { index[i] = i; }
00511     if (_flexible) {
00512       RefCountPtr<const MV> Zjp1 = MVT::CloneView( *_z_basisvecs, index );
00513       MVT::MvTimesMatAddMv( one, *Zjp1, _z_copy, one, *_cur_block_sol );
00514       _lp->SolutionUpdated();
00515     } else {
00516       RefCountPtr<const MV> Vjp1 = MVT::CloneView( *_basisvecs, index );
00517       RefCountPtr<MV> solnUpdate = MVT::Clone( *_cur_block_sol, _blocksize ); 
00518       MVT::MvTimesMatAddMv( one, *Vjp1, _z_copy, zero, *solnUpdate );
00519       //
00520       // Update the solution held by the linear problem.
00521       //    
00522       _lp->SolutionUpdated( solnUpdate.get() );
00523       //
00524     }
00525   } // if (_iter) 
00526   //
00527   // Check status if _iter==_length
00528   //
00529   if (_iter == _length) {
00530     _stest->CheckStatus(this);
00531   }
00532   //
00533   // Determine if we are going to allow a restart
00534   // NOTE:  We will try to restart if we actually computed a subspace of nontrivial dimension (iter!=0)
00535   //
00536   restart_flg = (_iter!=0 && restart_flg);
00537   if (ortho_flg && restart_flg) {
00538     if (_om->isVerbosityAndPrint( Warnings )) {
00539       *_os << "WARNING: Orthogonalization failure detected at local iteration "
00540      << _iter<<", total iteration "<<_totaliter<<", restart will be performed!\n";
00541     }
00542   } 
00543   //
00544   // Break out of this loop before the _restartiter is incremented if we are finished.
00545   //
00546         if ( _stest->GetStatus() != Unconverged || !restart_flg ) { break; }
00547         //
00548       } // end for (_restartiter=0;...
00549       //
00550       // Print out solver status
00551       //
00552       if (_om->isVerbosityAndPrint( FinalSummary )) {
00553         *_os << endl;
00554         _stest->Print(*_os);
00555         if (ortho_flg && _stest->GetStatus()!=Converged) {
00556           *_os << " Exiting Block GMRES --- " << endl;
00557           *_os << "  ERROR: Failed to compute new block of orthonormal basis vectors" << endl;
00558           *_os << "  ***Solution from previous step will be returned***"<< endl<< endl;
00559         }
00560       } 
00561       //
00562       // Inform the linear problem that we are finished with this block linear system.
00563       //    
00564       _lp->SetCurrLSVec();
00565       //
00566       // Obtain the next block linear system from the linear problem manager.
00567       //
00568       _cur_block_sol = _lp->GetCurrLHSVec();
00569       _cur_block_rhs = _lp->GetCurrRHSVec();
00570       //
00571     } // end while( _cur_block_sol && _cur_block_rhs )
00572     //
00573     // Print timing details 
00574 
00575     // Stop timer.
00576     _timerTotal->stop();
00577 
00578     // Reset format that will be used to print the summary
00579     Teuchos::TimeMonitor::format().setPageWidth(54);
00580 
00581     if (_om->isVerbosity( Belos::TimingDetails )) {
00582       if (_om->doPrint())
00583         *_os <<"********************TIMING DETAILS********************"<<endl;
00584       Teuchos::TimeMonitor::summarize( *_os );
00585       if (_om->doPrint())
00586         *_os <<"******************************************************"<<endl;
00587     }
00588   } // end Solve()
00589   
00590   
00591   template<class ScalarType, class MV, class OP>
00592   int 
00593   BlockGmres<ScalarType,MV,OP>::Reset( const RefCountPtr<ParameterList>& pl )
00594   {
00595     // Set new parameter list if one is passed in.
00596     if (pl.get() != 0 )  
00597       _pl = pl;
00598     _length = _pl->get("Length",25);
00599     _blocksize = 0;
00600     _restartiter = 0; 
00601     _totaliter = 0;
00602     _iter = 0;
00603     //
00604     // If there is a "Variant" parameter in the list, check to see if it's "Flexible" (i.e. flexible GMRES)
00605     //
00606     if (_pl->isParameter("Variant"))
00607       _flexible = (Teuchos::getParameter<std::string>(*_pl, "Variant")=="Flexible");
00608     return 0;
00609   }
00610 
00611   template<class ScalarType, class MV, class OP>
00612   bool 
00613   BlockGmres<ScalarType,MV,OP>::BlockReduction ( bool& dep_flg ) 
00614   {
00615     //
00616     int i;  
00617     std::vector<int> index( _blocksize );
00618     RefCountPtr<MV> AU_vec = MVT::Clone( *_basisvecs,_blocksize );
00619     //
00620     // Associate the j-th block of _basisvecs with U_vec.
00621     //
00622     for ( i=0; i<_blocksize; i++ ) {
00623       index[i] = _iter*_blocksize+i;
00624     }
00625     RefCountPtr<MV> U_vec = MVT::CloneView( *_basisvecs, index );
00626     //                                                                          
00627     // If this is the flexible variant apply operator separately, else apply composite operator.
00628     //                                                                          
00629     if (_flexible) {                                                            
00630       //
00631       RefCountPtr<MV> Z_vec = MVT::CloneView( *_z_basisvecs, index );             
00632       //                                                                        
00633       //  Apply right preconditioning and store it in _z_basisvecs.             
00634       //                                                                        
00635       {
00636   Teuchos::TimeMonitor PrecTimer(*_timerPrec);
00637   _lp->ApplyRightPrec( *U_vec, *Z_vec );                                    
00638       }
00639       //                                                                        
00640       //  Apply operator and store it in AU_vec.                                
00641       //                                                                        
00642       {
00643   Teuchos::TimeMonitor OpTimer(*_timerOp);
00644   _lp->ApplyOp( *Z_vec, *AU_vec );                                          
00645       }
00646     } 
00647     else                                                                      
00648       { 
00649   Teuchos::TimeMonitor OpTimer(*_timerOp);
00650   _lp->Apply( *U_vec, *AU_vec ); 
00651       }
00652     //
00653     //  Orthogonalize the new block in the Krylov expansion and check for dependencies.
00654     //
00655     bool dep = false;
00656     if (!dep_flg){
00657       Teuchos::TimeMonitor OrthoTimer(*_timerOrtho);
00658       dep = BlkOrth(*AU_vec);
00659       if (dep) {
00660   dep_flg = true;
00661   if (_blocksize == 1) {
00662     return dep_flg;
00663   }
00664       }
00665     }
00666     // If any dependencies have been detected during this step of
00667     // Block Reduction, or any previous steps (within the construction
00668     // of the current Krylov subspaces), block orthogonalization is 
00669     // implemented with a variant of A. Ruhe's approach.
00670     //
00671     bool flg = false;
00672     if (dep_flg){
00673       Teuchos::TimeMonitor OrthoTimer(*_timerOrtho);
00674       flg = BlkOrthSing(*AU_vec);
00675     }
00676     //
00677     return flg;
00678     //
00679   } // end BlockReduction()
00680   
00681   
00682   template<class ScalarType, class MV, class OP>
00683   bool BlockGmres<ScalarType,MV,OP>::BlkOrth( MV& VecIn ) 
00684   {
00685     //
00686     // Orthogonalization is first done between the new block of 
00687     // vectors and all previous blocks, then the vectors within the
00688     // new block are orthogonalized.
00689     //
00690     const ScalarType one = Teuchos::ScalarTraits<ScalarType>::one();
00691     const ScalarType zero = Teuchos::ScalarTraits<ScalarType>::zero();
00692     const int max_num_orth = 2;
00693     int i, k, row_offset, col_offset;
00694     std::vector<int> index( _blocksize );
00695     std::vector<MagnitudeType> norm1( _blocksize );
00696     std::vector<MagnitudeType> norm2( _blocksize );
00697     //
00698     // Initialize index vector.
00699     //
00700     for (i=0; i<_blocksize; i++) { index[i] = (_iter+1)*_blocksize + i; }
00701     //
00702     // Associate (j+1)-st block of ArnoldiVecs with F_vec.
00703     //
00704     RefCountPtr<MV> F_vec = MVT::CloneView( *_basisvecs, index );
00705     //
00706     // Copy preconditioned AU_vec into (j+1)st block of _basisvecs
00707     //
00708     MVT::MvAddMv( one, VecIn, zero, VecIn, *F_vec );
00709     //
00710     // Zero out the full block column of the Hessenberg matrix 
00711     // even though we're only going to set the coefficients in 
00712     // rows [0:(j+1)*_blocksize-1]
00713     //
00714     int n_row = _hessmatrix.numRows();
00715     //
00716     for ( k=0; k<_blocksize; k++ ) {
00717       for ( i=0; i<n_row ; i++ ) {
00718         _hessmatrix(i,_iter*_blocksize+k) = zero;
00719       }
00720     }
00721     //
00722     // Grab all previous Arnoldi vectors
00723     //
00724     int num_prev = (_iter+1)*_blocksize;
00725     index.resize( num_prev );
00726     for (i=0; i<num_prev; i++) { index[i] = i; }
00727     RefCountPtr<MV> V_prev = MVT::CloneView( *_basisvecs, index );
00728     //
00729     // Create a matrix to store the product trans(V_prev)*F_vec
00730     //
00731     Teuchos::SerialDenseMatrix<int,ScalarType> dense_mat( num_prev, _blocksize );
00732     //
00733     MVT::MvNorm(*F_vec, &norm1);
00734     //
00735     // Perform two steps of block classical Gram-Schmidt so that
00736     // F_vec is orthogonal to the columns of V_prev.
00737     //
00738     for ( int num_orth=0; num_orth<max_num_orth; num_orth++ ) {
00739       //
00740       // Compute trans(V_prev)*F_vec and store in the j'th diagonal
00741       // block of the Hessenberg matrix
00742       //
00743       MVT::MvTransMv (one, *V_prev, *F_vec, dense_mat);
00744       //
00745       // Update the orthogonalization coefficients for the j-th block
00746       // column of the Hessenberg matrix.
00747       //
00748       for ( k=0; k<_blocksize; k++ ) {
00749         for ( i=0; i<num_prev; i++ ) {
00750           _hessmatrix(i,_iter*_blocksize+k) += dense_mat(i,k);
00751         }
00752       }
00753       //
00754       // F_vec <- F_vec - V(0:(j+1)*block-1,:) * H(0:num_prev-1,j:num_prev-1)
00755       //
00756       MVT::MvTimesMatAddMv( -one, *V_prev, dense_mat, one, *F_vec );
00757       //
00758     } // end for num_orth=0;...)
00759       //
00760     MVT::MvNorm( *F_vec, &norm2 );
00761     //
00762     // Check to make sure the new block of Arnoldi vectors are 
00763     // not dependent on previous Arnoldi vectors
00764     //
00765     bool flg = false; // This will get set true if dependencies are detected
00766     //
00767     for (i=0; i<_blocksize; i++){
00768       if (norm2[i] < norm1[i] * _blk_tol) {
00769   flg = true;
00770   if (_om->isVerbosityAndPrint( OrthoDetails )){
00771     *_os << "Col " << num_prev+i << " is dependent on previous "
00772          << "Arnoldi vectors in V_prev" << endl;
00773     *_os << endl;
00774   }
00775       }
00776     } // end for (i=0;...)
00777       //
00778     if (_om->isVerbosity( OrthoDetails )) {
00779       if(_om->doPrint()) { *_os << "Checking Orthogonality after BlkOrth()"
00780     << " Iteration: " << _iter << endl; }
00781       CheckKrylovOrth(_iter);
00782     }
00783     //
00784     // If dependencies have not already been detected, compute
00785     // the QR factorization of the next block. Otherwise,
00786     // this block of Arnoldi vectors will be re-computed via and 
00787     // implementation of A. Ruhe's block Arnoldi.
00788     //
00789     if (!flg) {
00790       //
00791       // Compute the QR factorization of F_vec
00792       //
00793       row_offset = (_iter+1)*_blocksize; col_offset = _iter*_blocksize;
00794       Teuchos::SerialDenseMatrix<int,ScalarType> sub_block_hess(Teuchos::View, _hessmatrix, _blocksize, _blocksize,
00795                 row_offset, col_offset);
00796       flg = QRFactorAug( *F_vec, sub_block_hess, false );
00797     }
00798     //
00799     return flg;
00800     //
00801   }  // end BlkOrth()
00802   
00803   
00804   template<class ScalarType, class MV, class OP>
00805   bool BlockGmres<ScalarType,MV,OP>::BlkOrthSing( MV& VecIn ) 
00806   {
00807     //
00808     // This is a variant of A. Ruhe's block Arnoldi
00809     // The orthogonalization of the vectors AU_vec is done
00810     // one at a time. If a dependency is detected, a random
00811     // vector is added and orthogonalized against all previous
00812     // Arnoldi vectors.
00813     // 
00814     const int IntOne = 1;
00815     const ScalarType one = Teuchos::ScalarTraits<ScalarType>::one();
00816     const ScalarType zero = Teuchos::ScalarTraits<ScalarType>::zero();
00817     Teuchos::SerialDenseVector<int,ScalarType> dense_vec;
00818     int i, k, num_orth;
00819     std::vector<int> index( _blocksize );
00820     std::vector<int> index2( IntOne );
00821     std::vector<MagnitudeType> nm1(IntOne);
00822     std::vector<MagnitudeType> nm2(IntOne);
00823     //
00824     // Initialize index vector.
00825     //
00826     for ( i=0; i<_blocksize; i++ ) { index[i] = (_iter+1)*_blocksize + i; }
00827     //
00828     // Associate (j+1)-st block of ArnoldiVecs with F_vec.
00829     //
00830     RefCountPtr<MV> F_vec = MVT::CloneView( *_basisvecs, index );
00831     //
00832     // Copy preconditioned AU_vec into (j+1)st block of _basisvecs
00833     //
00834     MVT::MvAddMv( one, VecIn, zero, VecIn, *F_vec );
00835     //
00836     // Zero out the full block column of the Hessenberg matrix 
00837     //
00838     int n_row = _hessmatrix.numRows();
00839     //
00840     for ( k=0; k<_blocksize; k++ ) {
00841       for ( i=0; i<n_row ; i++ ) {
00842         _hessmatrix(i, _iter*_blocksize+k) = zero;
00843       }
00844     }
00845     //
00846     RefCountPtr<const MV> Q_vec;
00847     RefCountPtr<MV> q_vec, tptr;
00848     tptr = MVT::Clone( *F_vec, IntOne ); 
00849     //
00850     // Start a loop to orthogonalize each of the _blocksize
00851     // columns of F_vec against all previous _basisvecs
00852     //
00853     bool flg = false;
00854     //
00855     for (int num_prev = (_iter+1)*_blocksize; num_prev < (_iter+2)*_blocksize; num_prev++) {
00856       //
00857       // Initialize dense vector.
00858       //
00859       dense_vec.size(num_prev);
00860       //
00861       // Grab the next column of _basisvecs
00862       //
00863       index2[0] = num_prev;
00864       q_vec = MVT::CloneView( *_basisvecs, index2 );
00865       //
00866       // Grab all previous columns of _basisvecs
00867       //
00868       index.resize( num_prev );
00869       for (k=0; k<num_prev; k++) { index[k] = k; }
00870       Q_vec = MVT::CloneView( *_basisvecs, index ); 
00871       //
00872       // Do one step of classical Gram-Schmidt orthogonalization
00873       // with a 2nd correction step if needed.
00874       //
00875       bool dep = false;
00876       MVT::MvNorm( *q_vec, &nm1 );
00877       //
00878       // Compute trans(Q_vec)*q_vec
00879       //
00880       MVT::MvTransMv( one, *Q_vec, *q_vec, dense_vec );
00881       //
00882       // Sum results [0:num_prev-1] into column (num_prev-_blocksize)
00883       // of the Hessenberg matrix
00884       //
00885       for (k=0; k<num_prev; k++){
00886         _hessmatrix(k,num_prev-_blocksize) += dense_vec(k);
00887       }
00888       // Compute q_vec<- q_vec - Q_vec * dense_vec
00889       //
00890       MVT::MvTimesMatAddMv( -one, *Q_vec, dense_vec, one, *q_vec );
00891       //
00892       MVT::MvNorm( *q_vec, &nm2 );
00893       //
00894       if (nm2[0] < nm1[0] * _dep_tol) {
00895         // 
00896         // Repeat process with newly computed q_vec
00897         //
00898         // Compute trans(Q_vec)*q_vec
00899         //
00900         MVT::MvTransMv( one, *Q_vec, *q_vec, dense_vec );
00901         //
00902         // Sum results [0:num_prev-1] into column (num_prev-_blocksize)
00903         // of the Hessenberg matrix
00904         //
00905         for (k=0; k<num_prev; k++){
00906           _hessmatrix(k,num_prev-_blocksize) += dense_vec(k);
00907         }
00908         // Compute q_vec<- q_vec - Q_vec * dense_vec
00909         //
00910         MVT::MvTimesMatAddMv( -one, *Q_vec, dense_vec, one, *q_vec );
00911         //
00912         MVT::MvNorm( *q_vec, &nm2 );
00913       }
00914       //
00915       // Check for linear dependence
00916       //
00917       if (nm2[0] < nm1[0] * _sing_tol) {
00918         dep = true;
00919       }
00920       if (!dep){
00921         //
00922         // Normalize the new q_vec
00923         //
00924         ScalarType rjj = one/nm2[0];
00925         MVT::MvAddMv( rjj, *q_vec, zero, *q_vec, *q_vec );
00926         //
00927         // Enter norm of q_vec to the [(j+1)*_blocksize + iter] row
00928         // in the [(j*_blocksize + iter] column of the Hessenberg matrix
00929         // 
00930         _hessmatrix( num_prev, num_prev-_blocksize ) = nm2[0];
00931       }
00932       else { 
00933         //
00934         if (_om->isVerbosityAndPrint( OrthoDetails )) {
00935           *_os << "Column " << num_prev << " of _basisvecs is dependent" << endl;
00936           *_os << endl;
00937         }
00938         //
00939         // Create a random vector and orthogonalize it against all 
00940         // previous cols of _basisvecs
00941         // We could try adding a random unit vector instead -- not 
00942         // sure if this would make any difference.
00943         //
00944         MVT::MvRandom( *tptr );
00945         MVT::MvNorm( *tptr, &nm1 );
00946         //
00947         // This code  is automatically doing 2 steps of orthogonalization
00948         // after adding a random vector. We could do one step of
00949         // orthogonalization with a correction step if needed.
00950         //
00951         for (num_orth=0; num_orth<2; num_orth++)
00952         {
00953           MVT::MvTransMv(one, *Q_vec, *tptr, dense_vec);
00954           // Note that we don't change the entries of the
00955           // Hessenberg matrix when we orthogonalize a 
00956           // random vector
00957           MVT::MvTimesMatAddMv( -one, *Q_vec, dense_vec, one, *tptr );
00958         }
00959         //
00960         MVT::MvNorm( *tptr, &nm2 );
00961         //
00962         if (nm2[0] >= nm1[0] * _sing_tol){ 
00963           //
00964           // Copy vector into the current column of _basisvecs
00965           //
00966           MVT::MvAddMv( one, *tptr, zero, *tptr, *q_vec );
00967           MVT::MvNorm( *q_vec, &nm2 );
00968           //
00969           // Normalize the new q_vec
00970           //
00971           ScalarType rjj = one/nm2[0];
00972           MVT::MvAddMv( rjj, *q_vec, zero, *q_vec, *q_vec );
00973           //
00974           // Enter a zero in the [(j+1)*_blocksize + iter] row in the
00975           // [(j*_blocksize + iter] column of the Hessenberg matrix
00976           //
00977           _hessmatrix( num_prev, num_prev-_blocksize ) = zero;
00978         }
00979         else {
00980           //
00981           // Can't produce a new orthonormal basis vector
00982           // Return a flag so we can exit this pass of block GMRES
00983           flg = true;
00984           return flg;
00985         }
00986         //
00987       } // end else 
00988       //
00989     } // end for (iter=0;...)
00990       //
00991     if (_om->isVerbosity( OrthoDetails )){
00992       if(_om->doPrint()) { *_os << endl;
00993       *_os << "Checking Orthogonality after BlkOrthSing()"
00994            << " Iteration: " << _iter << endl; }
00995       CheckKrylovOrth(_iter);
00996     }
00997     //
00998     return flg;
00999     //
01000   } // end BlkOrthSing()
01001   
01002   template<class ScalarType, class MV, class OP>
01003   bool BlockGmres<ScalarType,MV,OP>::QRFactorAug(MV& VecIn, 
01004              Teuchos::SerialDenseMatrix<int,ScalarType>& R, 
01005              bool blkone) 
01006   {
01007     int i,j,k;
01008     int nb = MVT::GetNumberVecs( VecIn ); 
01009     const int IntOne = 1;
01010     const ScalarType zero = Teuchos::ScalarTraits<ScalarType>::zero();
01011     const ScalarType one = Teuchos::ScalarTraits<ScalarType>::one();
01012     bool addvec = false;
01013     bool flg = false;
01014     //
01015     std::vector<int> index, index2( IntOne );
01016     std::vector<MagnitudeType> norm1( IntOne );
01017     std::vector<MagnitudeType> norm2( IntOne );
01018     Teuchos::SerialDenseVector<int,ScalarType> rj; 
01019     RefCountPtr<const MV> Qj;
01020     RefCountPtr<MV> qj, tptr;
01021     tptr = MVT::Clone(*_basisvecs, IntOne); 
01022     //
01023     // Zero out the array that will contain the Fourier coefficients.
01024     //
01025     for ( j=0; j<nb; j++ ) {
01026       for ( i=0; i<nb; i++ ) {
01027   R(i,j) = zero;
01028       }
01029     }
01030     //
01031     // Start the loop to orthogonalize the nb columns of VecIn.
01032     //
01033     for ( j=0; j<nb; j++ ) {
01034       //
01035       flg = false;
01036       //
01037       // Grab the j-th column of VecIn (the first column is indexed to 
01038       // be the zero-th one).
01039       //
01040       index2[0] = j;
01041       qj = MVT::CloneView( VecIn, index2 );
01042       //
01043       // If we are beyond the 1st column, orthogonalize against the previous
01044       // vectors in the current block
01045       //
01046       if ( j ) {
01047   //
01048   // Grab the first j columns of VecIn (that are now an orthogonal
01049   // basis for first j columns of the entering VecIn).
01050   //
01051   rj.size(j);
01052   index.resize(j);
01053   for (i=0; i<j; i++) { index[i] = i; } 
01054   Qj = MVT::CloneView( VecIn, index );
01055   MVT::MvNorm( *qj, &norm1 );
01056   //
01057   // Do one step of classical Gram-Schmidt orthogonalization
01058   // with a second correction step if needed
01059   //
01060   // Determine the Fouier coefficients for orthogonalizing column
01061   // j of VecIn against columns 0:j-1 of VecIn. In other words,
01062   // result = trans(Qj)*qj.
01063   //
01064   MVT::MvTransMv( one, *Qj, *qj, rj );
01065   //
01066   // Sum results[0:j-1] into column j of R.
01067   //
01068   for ( k=0; k<j; k++ ) {
01069     R(k,j) += rj(k);
01070   }
01071   //
01072   // Compute qj <- qj - Qj * rj.
01073   //
01074   MVT::MvTimesMatAddMv( -one, *Qj, rj, one, *qj );
01075   //
01076   MVT::MvNorm( *qj, &norm2 );
01077   //
01078   if (norm2[0] < norm1[0] * _dep_tol){
01079     //
01080     // Repeat process with newly computed qj
01081     //
01082     MVT::MvTransMv( one, *Qj, *qj, rj );
01083     //
01084     // Sum results[0:j-1] into column j of R.
01085     //
01086     for ( k=0; k<j; k++ ) {
01087       R(k,j) += rj(k);
01088     }
01089     //
01090     // Compute qj <- qj - Qj * rj.
01091     //
01092     MVT::MvTimesMatAddMv( -one, *Qj, rj, one, *qj );
01093     //
01094     MVT::MvNorm( *qj, &norm2 );
01095   }
01096   //
01097   // Check for dependencies
01098   //
01099   if (!blkone) {
01100     // This is not the 1st block. A looser tolerance is used to 
01101     // determine dependencies. If a dependency is detected, a flag
01102     // is set so we can back out this routine and out of BlkOrth. 
01103     // The routine BlkOrthSing is used to construct the new block 
01104     // of orthonormal basis vectors one at a time. If a dependency
01105     // is detected within this routine, a random vector is added 
01106     // and orthogonalized against all previous basis vectors.
01107     // 
01108     //
01109     if (norm2[0] < norm1[0] * _blk_tol) {
01110       if (_om->isVerbosityAndPrint( OrthoDetails )) {
01111         *_os << "Column " << j << " of current block is dependent" << endl;
01112       }
01113       flg = true;  
01114       return flg;
01115     }
01116   }
01117   else {
01118     // This is the 1st block of basis vectors.
01119     // Use a tighter tolerance to determine dependencies, because
01120     // if a dependency is detected we will be adding a random
01121     // vector and orthogonalizing it against previous vectors
01122     // in the 1st block
01123     //
01124     if (norm2[0] < norm1[0] * _sing_tol) {
01125       // The 1st block of vectors are dependent
01126       // Add a random vector and orthogonalize it against
01127       // previous vectors in block.
01128       //
01129       addvec = true;
01130       Teuchos::SerialDenseVector<int,ScalarType> tj(j);
01131       //
01132       MVT::MvRandom( *tptr );
01133       MVT::MvNorm( *tptr, &norm1 );
01134       //
01135       int num_orth;
01136       for (num_orth=0; num_orth<2; num_orth++){
01137         MVT::MvTransMv( one, *Qj, *tptr, tj );
01138         MVT::MvTimesMatAddMv( -one, *Qj, tj, one, *tptr );
01139       }
01140       MVT::MvNorm( *tptr, &norm2 ); 
01141       //
01142       if (norm2[0] >= norm1[0] * _sing_tol){
01143         // Copy vector into current column of _basisvecs
01144         MVT::MvAddMv( one, *tptr, zero, *tptr, *qj );
01145       }
01146       else {
01147         flg = true;
01148         return flg;
01149       } 
01150     } 
01151   } // end else
01152       } // end if (j)
01153       //
01154       // If we have not exited, compute the norm of column j of
01155       // VecIn (qj), then normalize qj to make it into a unit vector
01156       //
01157       std::vector<MagnitudeType> normq(IntOne);
01158       MVT::MvNorm( *qj, &normq );
01159       //
01160       ScalarType rjj = one / normq[0];
01161       MVT::MvAddMv ( rjj, *qj, zero, *qj, *qj );
01162       //
01163       if (addvec){
01164   // We've added a random vector, so
01165   // enter a zero in j'th diagonal element of R
01166   R(j,j) = zero;
01167       }
01168       else {
01169   R(j,j) = normq[0];
01170       }
01171       //
01172     } // end for (j=0; j<nb; j++)
01173       //
01174     return flg;
01175     //
01176   } // end QRFactorAug()
01177   
01178   template<class ScalarType, class MV, class OP>
01179   void 
01180   BlockGmres<ScalarType,MV,OP>::UpdateLSQR( Teuchos::SerialDenseMatrix<int,ScalarType>& R, 
01181               Teuchos::SerialDenseMatrix<int,ScalarType>& z )
01182   {
01183     int i, j, maxidx;
01184     ScalarType sigma, mu, vscale, maxelem;
01185     const ScalarType zero = Teuchos::ScalarTraits<ScalarType>::zero();
01186 
01187     Teuchos::LAPACK<int, ScalarType> lapack;
01188     Teuchos::BLAS<int, ScalarType> blas;
01189     //
01190     // Apply previous transformations and compute new transformation to reduce upper-Hessenberg
01191     // system to upper-triangular form.  
01192     // NOTE:  When _iter==0, storage will be created for the transformations.
01193     //
01194     if (_blocksize == 1) 
01195       {
01196   if (_iter==0) {
01197     cs.resize( _length+1 );
01198     sn.resize( _length+1 );
01199   }
01200   //
01201   // QR factorization of Least-Squares system with Givens rotations
01202   //
01203   for (i=0; i<_iter; i++) {
01204     //
01205     // Apply previous Givens rotations to new column of Hessenberg matrix
01206     //
01207     blas.ROT( 1, &R(i,_iter), 1, &R(i+1, _iter), 1, &cs[i], &sn[i] );
01208   }
01209   //
01210   // Calculate new Givens rotation
01211   //
01212   blas.ROTG( &R(_iter,_iter), &R(_iter+1,_iter), &cs[_iter], &sn[_iter] );
01213   R(_iter+1,_iter) = zero;
01214   //
01215   // Update RHS w/ new transformation
01216   //
01217   blas.ROT( 1, &z(_iter,0), 1, &z(_iter+1,0), 1, &cs[_iter], &sn[_iter] );
01218       } 
01219     else
01220       {
01221   if (_iter==0) {
01222     beta.size((_length+1)*_blocksize);
01223   }
01224   //
01225   // QR factorization of Least-Squares system with Householder reflectors
01226   //
01227   for (j=0; j<_blocksize; j++) {
01228     //
01229     // Apply previous Householder reflectors to new block of Hessenberg matrix
01230     //
01231     for (i=0; i<_iter*_blocksize+j; i++) {
01232       sigma = blas.DOT( _blocksize, &R(i+1,i), 1, &R(i+1,_iter*_blocksize+j), 1);
01233       sigma += R(i,_iter*_blocksize+j);
01234       sigma *= beta[i];
01235       blas.AXPY(_blocksize, -sigma, &R(i+1,i), 1, &R(i+1,_iter*_blocksize+j), 1);
01236       R(i,_iter*_blocksize+j) -= sigma;
01237     }
01238     //
01239     // Compute new Householder reflector
01240     //
01241     maxidx = blas.IAMAX( _blocksize+1, &R(_iter*_blocksize+j,_iter*_blocksize+j), 1 );
01242     maxelem = R(_iter*_blocksize+j+maxidx-1,_iter*_blocksize+j);
01243     for (i=0; i<_blocksize+1; i++) 
01244       R(_iter*_blocksize+j+i,_iter*_blocksize+j) /= maxelem;
01245     sigma = blas.DOT( _blocksize, &R(_iter*_blocksize+j+1,_iter*_blocksize+j), 1, 
01246           &R(_iter*_blocksize+j+1,_iter*_blocksize+j), 1 );
01247     if (sigma == zero) {
01248       beta[_iter*_blocksize + j] = zero;
01249     } else {
01250       mu = sqrt(R(_iter*_blocksize+j,_iter*_blocksize+j)*R(_iter*_blocksize+j,_iter*_blocksize+j)+sigma);
01251       if ( Teuchos::ScalarTraits<ScalarType>::real(R(_iter*_blocksize+j,_iter*_blocksize+j)) < Teuchos::ScalarTraits<MagnitudeType>::zero() ) {
01252         vscale = R(_iter*_blocksize+j,_iter*_blocksize+j) - mu;
01253       } else {
01254         vscale = -sigma / (R(_iter*_blocksize+j,_iter*_blocksize+j) + mu);
01255       }
01256       beta[_iter*_blocksize+j] = 2.0*vscale*vscale/(sigma + vscale*vscale);
01257       R(_iter*_blocksize+j,_iter*_blocksize+j) = maxelem*mu;
01258       for (i=0; i<_blocksize; i++)
01259         R(_iter*_blocksize+j+1+i,_iter*_blocksize+j) /= vscale;
01260     }
01261     //
01262     // Apply new Householder reflector to rhs
01263     //
01264     for (i=0; i<_blocksize; i++) {
01265       sigma = blas.DOT( _blocksize, &R(_iter*_blocksize+j+1,_iter*_blocksize+j), 
01266             1, &z(_iter*_blocksize+j+1,i), 1);
01267       sigma += z(_iter*_blocksize+j,i);
01268       sigma *= beta[_iter*_blocksize+j];
01269       blas.AXPY(_blocksize, -sigma, &R(_iter*_blocksize+j+1,_iter*_blocksize+j), 
01270           1, &z(_iter*_blocksize+j+1,i), 1);
01271       z(_iter*_blocksize+j,i) -= sigma;
01272     }
01273   }
01274       } // end if (_blocksize == 1)
01275   } // end UpdateLSQR
01276   
01277 
01278   template<class ScalarType, class MV, class OP>
01279   void BlockGmres<ScalarType,MV,OP>::CheckKrylovOrth( const int j ) 
01280   {
01281     int i,k,m=(j+1)*_blocksize;
01282     const ScalarType one = Teuchos::ScalarTraits<ScalarType>::one();
01283     const ScalarType zero = Teuchos::ScalarTraits<ScalarType>::zero();
01284     Teuchos::SerialDenseMatrix<int,ScalarType> VTV; VTV.shape(m,m);
01285     std::vector<int> index(_blocksize);
01286     std::vector<MagnitudeType> ptr_norms(_blocksize);
01287     
01288     for ( i=0; i<_blocksize; i++ ) {
01289       index[i] = m+i;
01290     }
01291     RefCountPtr<MV> F_vec = MVT::CloneView( *_basisvecs, index );
01292     
01293     ScalarType sum = zero;    
01294     MVT::MvNorm( *F_vec, &ptr_norms );
01295     for ( i=0; i<_blocksize; i++ ) {
01296       sum += ptr_norms[i];
01297     }
01298     
01299     index.resize( m );
01300     for ( i=0; i<m; i++ ) { index[i] = i; }
01301     RefCountPtr<MV> Vj = MVT::CloneView( *_basisvecs, index );
01302     MVT::MvTransMv(one, *Vj, *Vj, VTV);
01303     ScalarType column_sum;
01304     //
01305     *_os << " " <<  endl;
01306     *_os << "********Block Arnoldi iteration******** " << j <<  endl;
01307     *_os << " " <<  endl;
01308     //
01309     for (k=0; k<m; k++) {
01310       column_sum = zero;
01311       for (i=0; i<m; i++) {
01312   if (i==k) {
01313     VTV(i,i) -= one;
01314   }
01315   column_sum += VTV(i,k);
01316       }
01317       *_os <<  " V^T*V-I " << "for column " << k << " is " 
01318     << Teuchos::ScalarTraits<ScalarType>::magnitude(column_sum) <<  endl;
01319     }
01320     *_os << " " <<  endl;
01321     
01322     Teuchos::SerialDenseMatrix<int,ScalarType> E; E.shape(m,_blocksize);
01323     
01324     MVT::MvTransMv(one, *Vj, *F_vec, E);
01325     
01326     for (k=0;k<_blocksize;k++) {
01327       column_sum = zero;
01328       for (i=0; i<m; i++) {
01329   column_sum += E(i,k);
01330       }
01331       if (ptr_norms[k]) column_sum = column_sum/ptr_norms[k];
01332       *_os << " Orthogonality with F " << "for column " << k << " is " 
01333     << Teuchos::ScalarTraits<ScalarType>::magnitude(column_sum) <<  endl;
01334     }
01335     *_os << " " <<  endl;
01336     //
01337     //
01338   } // end CheckKrylovOrth
01339     
01340 
01341   // Overridden from Teuchos::Describable
01342 
01343   template<class ScalarType, class MV, class OP>
01344   std::string BlockGmres<ScalarType,MV,OP>::description() const
01345   {
01346     std::ostringstream oss;
01347     oss << "Belos::BlockGmres<...,"<<Teuchos::ScalarTraits<ScalarType>::name()<<">";
01348     oss << "{";
01349     oss << "Variant=\'"<<(_flexible?"Flexible":"Standard")<<"\'";
01350     oss << "}";
01351     return oss.str();
01352   }
01353 
01354 } // end namespace Belos
01355 
01356 #endif
01357 // End of file BelosBlockGmres.hpp
01358 
01359 

Generated on Thu Sep 18 12:30:12 2008 for Belos by doxygen 1.3.9.1