00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044 #ifndef BELOS_BLOCK_GMRES_HPP
00045 #define BELOS_BLOCK_GMRES_HPP
00046
00053 #include "BelosConfigDefs.hpp"
00054 #include "BelosIterativeSolver.hpp"
00055 #include "BelosLinearProblem.hpp"
00056 #include "BelosOutputManager.hpp"
00057 #include "BelosStatusTest.hpp"
00058 #include "BelosOperatorTraits.hpp"
00059 #include "BelosMultiVecTraits.hpp"
00060
00061 #include "Teuchos_BLAS.hpp"
00062 #include "Teuchos_LAPACK.hpp"
00063 #include "Teuchos_SerialDenseMatrix.hpp"
00064 #include "Teuchos_SerialDenseVector.hpp"
00065 #include "Teuchos_ScalarTraits.hpp"
00066 #include "Teuchos_ParameterList.hpp"
00067 #include "Teuchos_TimeMonitor.hpp"
00068
00069 using Teuchos::ParameterList;
00070 using Teuchos::RefCountPtr;
00071
00083 namespace Belos {
00084
00085 template <class ScalarType, class MV, class OP>
00086 class BlockGmres : public IterativeSolver<ScalarType,MV,OP> {
00087 public:
00088
00089
00090
00091
00092 typedef MultiVecTraits<ScalarType,MV> MVT;
00093 typedef OperatorTraits<ScalarType,MV,OP> OPT;
00094 typedef Teuchos::ScalarTraits<ScalarType> SCT;
00095 typedef typename SCT::magnitudeType MagnitudeType;
00096
00099
00100 BlockGmres(const RefCountPtr<LinearProblem<ScalarType,MV,OP> > &lp,
00101 const RefCountPtr<StatusTest<ScalarType,MV,OP> > &stest,
00102 const RefCountPtr<OutputManager<ScalarType> > &om,
00103 const RefCountPtr<ParameterList> &pl
00104 );
00105
00107 virtual ~BlockGmres() {};
00109
00112
00114 int GetNumIters() const { return( _totaliter ); }
00115
00117 int GetNumRestarts() const { return( _restartiter ); }
00118
00120
00123 RefCountPtr<const MV> GetNativeResiduals( std::vector<MagnitudeType> *normvec ) const;
00124
00126
00131 RefCountPtr<MV> GetCurrentSoln();
00132
00134
00136 RefCountPtr<LinearProblem<ScalarType,MV,OP> > GetLinearProblem() const { return( _lp ); }
00137
00139 RefCountPtr<StatusTest<ScalarType,MV,OP> > GetStatusTest() const { return( _stest ); }
00140
00142 int Reset( const RefCountPtr<ParameterList>& pl = null );
00143
00145
00148
00153 void Solve();
00154
00156
00159
00161 std::string description() const;
00162
00164
00165 private:
00166
00168 void SetGmresBlkTols();
00169
00171 bool BlockReduction(bool&);
00172
00174 bool QRFactorAug(MV&,
00175 Teuchos::SerialDenseMatrix<int,ScalarType>&,
00176 bool);
00177
00179 bool BlkOrth(MV&);
00180
00182 bool BlkOrthSing(MV&);
00183
00185 void UpdateLSQR(Teuchos::SerialDenseMatrix<int,ScalarType>&,Teuchos::SerialDenseMatrix<int,ScalarType>&);
00186
00188 void CheckKrylovOrth(const int);
00189
00191 RefCountPtr<LinearProblem<ScalarType,MV,OP> > _lp;
00192
00194 RefCountPtr<StatusTest<ScalarType,MV,OP> > _stest;
00195
00197 RefCountPtr<OutputManager<ScalarType> > _om;
00198
00200 RefCountPtr<ParameterList> _pl;
00201
00203 RefCountPtr<MV> _basisvecs;
00204
00206 RefCountPtr<MV> _z_basisvecs;
00207
00209 RefCountPtr<MV> _cur_block_rhs, _cur_block_sol;
00210
00212 Teuchos::SerialDenseMatrix<int,ScalarType> _hessmatrix;
00213
00215 Teuchos::SerialDenseMatrix<int,ScalarType> _z;
00216
00218 RefCountPtr<ostream> _os;
00219
00221 int _length;
00222
00224 int _blocksize, _restartiter, _totaliter, _iter;
00225
00227 MagnitudeType _dep_tol, _blk_tol, _sing_tol;
00228
00230 bool _flexible;
00231
00233 Teuchos::SerialDenseVector<int,ScalarType> beta, sn;
00234 Teuchos::SerialDenseVector<int,MagnitudeType> cs;
00235
00237 bool _restartTimers;
00238
00240 Teuchos::RefCountPtr<Teuchos::Time> _timerOp, _timerPrec,
00241 _timerOrtho, _timerTotal;
00242 };
00243
00244
00245
00246
00247
00248
00249
00250
00251 template <class ScalarType, class MV, class OP>
00252 BlockGmres<ScalarType,MV,OP>::BlockGmres(const RefCountPtr< LinearProblem<ScalarType,MV,OP> >& lp,
00253 const RefCountPtr< StatusTest<ScalarType,MV,OP> >& stest,
00254 const RefCountPtr< OutputManager<ScalarType> >& om,
00255 const RefCountPtr< ParameterList > &pl ):
00256 _lp(lp),
00257 _stest(stest),
00258 _om(om),
00259 _pl(pl),
00260 _os(om->GetOStream()),
00261 _length(_pl->get("Length",25)),
00262 _blocksize(0),
00263 _restartiter(0),
00264 _totaliter(0),
00265 _iter(0),
00266 _dep_tol(1.0),
00267 _blk_tol(1.0),
00268 _sing_tol(1.0),
00269 _flexible( (_pl->isParameter("Variant"))&&(Teuchos::getParameter<std::string>(*_pl, "Variant")=="Flexible") ),
00270 _restartTimers(true),
00271 _timerOp(Teuchos::TimeMonitor::getNewTimer("Operation Op*x")),
00272 _timerPrec(Teuchos::TimeMonitor::getNewTimer("Operation Prec*x")),
00273 _timerOrtho(Teuchos::TimeMonitor::getNewTimer("Orthogonalization")),
00274 _timerTotal(Teuchos::TimeMonitor::getNewTimer("Total time"))
00275 {
00276
00277
00278
00279 SetGmresBlkTols();
00280 }
00281
00282 template <class ScalarType, class MV, class OP>
00283 void
00284 BlockGmres<ScalarType,MV,OP>::SetGmresBlkTols()
00285 {
00286 typedef typename Teuchos::ScalarTraits<MagnitudeType> MGT;
00287 const MagnitudeType two = 2.0;
00288 const MagnitudeType eps = SCT::eps();
00289 _dep_tol = MGT::one()/MGT::squareroot(two);
00290 _blk_tol = 10*sqrt(eps);
00291 _sing_tol = 10 * eps;
00292 }
00293
00294 template <class ScalarType, class MV, class OP>
00295 RefCountPtr<const MV>
00296 BlockGmres<ScalarType,MV,OP>::GetNativeResiduals( std::vector<MagnitudeType> *normvec ) const
00297 {
00298
00299
00300
00301
00302
00303 if ( normvec && (int)normvec->size() < _blocksize )
00304 normvec->resize( _blocksize );
00305
00306 if (_totaliter == 0) {
00307 RefCountPtr<MV> temp_res = MVT::Clone( *_cur_block_rhs, _blocksize );
00308 _lp->ComputeResVec( &*temp_res, &*_cur_block_sol, &*_cur_block_rhs);
00309 MVT::MvNorm( *temp_res, normvec );
00310 return temp_res;
00311 } else {
00312 if (normvec) {
00313 Teuchos::BLAS<int,ScalarType> blas;
00314 for (int j=0; j<_blocksize; j++)
00315 (*normvec)[j] = blas.NRM2( _blocksize, &_z(_iter*_blocksize, j ), 1);
00316 }
00317 }
00318 return null;
00319 }
00320
00321 template <class ScalarType, class MV, class OP>
00322 RefCountPtr<MV>
00323 BlockGmres<ScalarType,MV,OP>::GetCurrentSoln()
00324 {
00325
00326
00327
00328
00329
00330 RefCountPtr<MV> cur_sol_copy = MVT::CloneCopy(*_cur_block_sol);
00331 if (_iter==0) {
00332 return cur_sol_copy;
00333 } else {
00334 const ScalarType one = Teuchos::ScalarTraits<ScalarType>::one();
00335 int i, m = _iter*_blocksize;
00336 Teuchos::BLAS<int,ScalarType> blas;
00337
00338
00339
00340 Teuchos::SerialDenseMatrix<int,ScalarType> y( Teuchos::Copy, _z, m, _blocksize );
00341
00342
00343
00344 blas.TRSM( Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
00345 Teuchos::NON_UNIT_DIAG, m, _blocksize, one,
00346 _hessmatrix.values(), _hessmatrix.stride(), y.values(), y.stride() );
00347
00348 std::vector<int> index(m);
00349 for ( i=0; i<m; i++ ) {
00350 index[i] = i;
00351 }
00352 if (_flexible) {
00353 RefCountPtr<const MV> Zjp1 = MVT::CloneView( *_z_basisvecs, index );
00354 MVT::MvTimesMatAddMv( one, *Zjp1, y, one, *cur_sol_copy );
00355 }
00356 else {
00357 RefCountPtr<const MV> Vjp1 = MVT::CloneView( *_basisvecs, index );
00358 MVT::MvTimesMatAddMv( one, *Vjp1, y, one, *cur_sol_copy );
00359 }
00360 }
00361 return cur_sol_copy;
00362 }
00363
00364 template <class ScalarType, class MV, class OP>
00365 void
00366 BlockGmres<ScalarType,MV,OP>::Solve()
00367 {
00368 Teuchos::TimeMonitor LocalTimer(*_timerTotal,_restartTimers);
00369
00370 if ( _restartTimers ) {
00371 _timerOp->reset();
00372 _timerPrec->reset();
00373 _timerOrtho->reset();
00374 }
00375
00376 int i=0;
00377 std::vector<int> index;
00378 const ScalarType one = Teuchos::ScalarTraits<ScalarType>::one();
00379 const ScalarType zero = Teuchos::ScalarTraits<ScalarType>::zero();
00380 Teuchos::RefCountPtr<MV> U_vec;
00381 bool dep_flg = false, ortho_flg = false;
00382 bool restart_flg = true;
00383 Teuchos::LAPACK<int, ScalarType> lapack;
00384 Teuchos::BLAS<int, ScalarType> blas;
00385
00386
00387
00388 _os = _om->GetOStream();
00389
00390
00391
00392 _cur_block_sol = _lp->GetCurrLHSVec();
00393 _cur_block_rhs = _lp->GetCurrRHSVec();
00394
00395
00396
00397 while ( _cur_block_rhs.get() && _cur_block_sol.get() ) {
00398
00399
00400
00401 _blocksize = _lp->GetCurrBlockSize();
00402
00403 if (_om->isVerbosityAndPrint( IterationDetails )) {
00404 *_os << endl;
00405 *_os << "===================================================" << endl;
00406 *_os << "Solving linear system(s): "
00407 << _lp->GetRHSIndex() << " through " << _lp->GetRHSIndex()+_lp->GetNumToSolve()-1
00408 << " [block size = " << _blocksize << "]\n";
00409 *_os << endl;
00410 }
00411
00412
00413
00414 _totaliter = 0;
00415
00416
00417
00418 _basisvecs = MVT::Clone(*_cur_block_rhs,(_length+1)*_blocksize);
00419 if (_flexible)
00420 _z_basisvecs = MVT::Clone(*_cur_block_rhs, (_length+1)*_blocksize);
00421
00422
00423
00424 _hessmatrix.shape((_length+1)*_blocksize, _length*_blocksize);
00425 _z.shape((_length+1)*_blocksize, _blocksize);
00426
00427
00428 for ( _restartiter=0; _stest->CheckStatus(this) == Unconverged && restart_flg; ++_restartiter ) {
00429
00430
00431
00432
00433 index.resize(_blocksize);
00434 for (i=0; i < _blocksize; i++) { index[i] = i; }
00435 U_vec = MVT::CloneView( *_basisvecs, index );
00436
00437
00438
00439 _lp->ComputeResVec( &*U_vec, &*_cur_block_sol, &*_cur_block_rhs );
00440
00441
00442
00443 dep_flg = false; ortho_flg = false;
00444
00445
00446
00447 _z.putScalar();
00448 Teuchos::SerialDenseMatrix<int,ScalarType> G10(Teuchos::View, _z, _blocksize, _blocksize);
00449 ortho_flg = QRFactorAug( *U_vec, G10, true );
00450
00451 if (ortho_flg){
00452 if (_om->isVerbosityAndPrint( Errors )){
00453 *_os << "Exiting Block GMRES" << endl;
00454 *_os << " Restart iteration# " << _restartiter
00455 << " Iteration# " << _iter << endl;
00456 *_os << " ERROR: Failed to compute initial block of orthonormal basis vectors"
00457 << endl << endl;
00458 }
00459 }
00460
00461
00462
00463
00464
00465 if (U_vec.get()) {U_vec == null;}
00466
00467
00468 for (_iter=0; _iter<_length && _stest->CheckStatus(this) == Unconverged && !ortho_flg; ++_iter, ++_totaliter) {
00469
00470
00471
00472
00473
00474
00475
00476
00477
00478 ortho_flg = BlockReduction(dep_flg);
00479 if (ortho_flg && _blocksize > 1){
00480 break;
00481 }
00482
00483
00484
00485
00486 UpdateLSQR( _hessmatrix, _z );
00487
00488 }
00489
00490
00491
00492 if (_iter) {
00493
00494
00495 Teuchos::SerialDenseMatrix<int,ScalarType> _z_copy( Teuchos::Copy,_z, _iter*_blocksize, _blocksize );
00496
00497 blas.TRSM( Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
00498 Teuchos::NON_UNIT_DIAG, _iter*_blocksize, _blocksize, one,
00499 _hessmatrix.values(), _hessmatrix.stride(), _z_copy.values(), _z_copy.stride() );
00500
00501
00502
00503
00504
00505
00506
00507
00508
00509 index.resize( _iter*_blocksize );
00510 for (i=0; i < _iter*_blocksize; i++) { index[i] = i; }
00511 if (_flexible) {
00512 RefCountPtr<const MV> Zjp1 = MVT::CloneView( *_z_basisvecs, index );
00513 MVT::MvTimesMatAddMv( one, *Zjp1, _z_copy, one, *_cur_block_sol );
00514 _lp->SolutionUpdated();
00515 } else {
00516 RefCountPtr<const MV> Vjp1 = MVT::CloneView( *_basisvecs, index );
00517 RefCountPtr<MV> solnUpdate = MVT::Clone( *_cur_block_sol, _blocksize );
00518 MVT::MvTimesMatAddMv( one, *Vjp1, _z_copy, zero, *solnUpdate );
00519
00520
00521
00522 _lp->SolutionUpdated( solnUpdate.get() );
00523
00524 }
00525 }
00526
00527
00528
00529 if (_iter == _length) {
00530 _stest->CheckStatus(this);
00531 }
00532
00533
00534
00535
00536 restart_flg = (_iter!=0 && restart_flg);
00537 if (ortho_flg && restart_flg) {
00538 if (_om->isVerbosityAndPrint( Warnings )) {
00539 *_os << "WARNING: Orthogonalization failure detected at local iteration "
00540 << _iter<<", total iteration "<<_totaliter<<", restart will be performed!\n";
00541 }
00542 }
00543
00544
00545
00546 if ( _stest->GetStatus() != Unconverged || !restart_flg ) { break; }
00547
00548 }
00549
00550
00551
00552 if (_om->isVerbosityAndPrint( FinalSummary )) {
00553 *_os << endl;
00554 _stest->Print(*_os);
00555 if (ortho_flg && _stest->GetStatus()!=Converged) {
00556 *_os << " Exiting Block GMRES --- " << endl;
00557 *_os << " ERROR: Failed to compute new block of orthonormal basis vectors" << endl;
00558 *_os << " ***Solution from previous step will be returned***"<< endl<< endl;
00559 }
00560 }
00561
00562
00563
00564 _lp->SetCurrLSVec();
00565
00566
00567
00568 _cur_block_sol = _lp->GetCurrLHSVec();
00569 _cur_block_rhs = _lp->GetCurrRHSVec();
00570
00571 }
00572
00573
00574
00575
00576 _timerTotal->stop();
00577
00578
00579 Teuchos::TimeMonitor::format().setPageWidth(54);
00580
00581 if (_om->isVerbosity( Belos::TimingDetails )) {
00582 if (_om->doPrint())
00583 *_os <<"********************TIMING DETAILS********************"<<endl;
00584 Teuchos::TimeMonitor::summarize( *_os );
00585 if (_om->doPrint())
00586 *_os <<"******************************************************"<<endl;
00587 }
00588 }
00589
00590
00591 template<class ScalarType, class MV, class OP>
00592 int
00593 BlockGmres<ScalarType,MV,OP>::Reset( const RefCountPtr<ParameterList>& pl )
00594 {
00595
00596 if (pl.get() != 0 )
00597 _pl = pl;
00598 _length = _pl->get("Length",25);
00599 _blocksize = 0;
00600 _restartiter = 0;
00601 _totaliter = 0;
00602 _iter = 0;
00603
00604
00605
00606 if (_pl->isParameter("Variant"))
00607 _flexible = (Teuchos::getParameter<std::string>(*_pl, "Variant")=="Flexible");
00608 return 0;
00609 }
00610
00611 template<class ScalarType, class MV, class OP>
00612 bool
00613 BlockGmres<ScalarType,MV,OP>::BlockReduction ( bool& dep_flg )
00614 {
00615
00616 int i;
00617 std::vector<int> index( _blocksize );
00618 RefCountPtr<MV> AU_vec = MVT::Clone( *_basisvecs,_blocksize );
00619
00620
00621
00622 for ( i=0; i<_blocksize; i++ ) {
00623 index[i] = _iter*_blocksize+i;
00624 }
00625 RefCountPtr<MV> U_vec = MVT::CloneView( *_basisvecs, index );
00626
00627
00628
00629 if (_flexible) {
00630
00631 RefCountPtr<MV> Z_vec = MVT::CloneView( *_z_basisvecs, index );
00632
00633
00634
00635 {
00636 Teuchos::TimeMonitor PrecTimer(*_timerPrec);
00637 _lp->ApplyRightPrec( *U_vec, *Z_vec );
00638 }
00639
00640
00641
00642 {
00643 Teuchos::TimeMonitor OpTimer(*_timerOp);
00644 _lp->ApplyOp( *Z_vec, *AU_vec );
00645 }
00646 }
00647 else
00648 {
00649 Teuchos::TimeMonitor OpTimer(*_timerOp);
00650 _lp->Apply( *U_vec, *AU_vec );
00651 }
00652
00653
00654
00655 bool dep = false;
00656 if (!dep_flg){
00657 Teuchos::TimeMonitor OrthoTimer(*_timerOrtho);
00658 dep = BlkOrth(*AU_vec);
00659 if (dep) {
00660 dep_flg = true;
00661 if (_blocksize == 1) {
00662 return dep_flg;
00663 }
00664 }
00665 }
00666
00667
00668
00669
00670
00671 bool flg = false;
00672 if (dep_flg){
00673 Teuchos::TimeMonitor OrthoTimer(*_timerOrtho);
00674 flg = BlkOrthSing(*AU_vec);
00675 }
00676
00677 return flg;
00678
00679 }
00680
00681
00682 template<class ScalarType, class MV, class OP>
00683 bool BlockGmres<ScalarType,MV,OP>::BlkOrth( MV& VecIn )
00684 {
00685
00686
00687
00688
00689
00690 const ScalarType one = Teuchos::ScalarTraits<ScalarType>::one();
00691 const ScalarType zero = Teuchos::ScalarTraits<ScalarType>::zero();
00692 const int max_num_orth = 2;
00693 int i, k, row_offset, col_offset;
00694 std::vector<int> index( _blocksize );
00695 std::vector<MagnitudeType> norm1( _blocksize );
00696 std::vector<MagnitudeType> norm2( _blocksize );
00697
00698
00699
00700 for (i=0; i<_blocksize; i++) { index[i] = (_iter+1)*_blocksize + i; }
00701
00702
00703
00704 RefCountPtr<MV> F_vec = MVT::CloneView( *_basisvecs, index );
00705
00706
00707
00708 MVT::MvAddMv( one, VecIn, zero, VecIn, *F_vec );
00709
00710
00711
00712
00713
00714 int n_row = _hessmatrix.numRows();
00715
00716 for ( k=0; k<_blocksize; k++ ) {
00717 for ( i=0; i<n_row ; i++ ) {
00718 _hessmatrix(i,_iter*_blocksize+k) = zero;
00719 }
00720 }
00721
00722
00723
00724 int num_prev = (_iter+1)*_blocksize;
00725 index.resize( num_prev );
00726 for (i=0; i<num_prev; i++) { index[i] = i; }
00727 RefCountPtr<MV> V_prev = MVT::CloneView( *_basisvecs, index );
00728
00729
00730
00731 Teuchos::SerialDenseMatrix<int,ScalarType> dense_mat( num_prev, _blocksize );
00732
00733 MVT::MvNorm(*F_vec, &norm1);
00734
00735
00736
00737
00738 for ( int num_orth=0; num_orth<max_num_orth; num_orth++ ) {
00739
00740
00741
00742
00743 MVT::MvTransMv (one, *V_prev, *F_vec, dense_mat);
00744
00745
00746
00747
00748 for ( k=0; k<_blocksize; k++ ) {
00749 for ( i=0; i<num_prev; i++ ) {
00750 _hessmatrix(i,_iter*_blocksize+k) += dense_mat(i,k);
00751 }
00752 }
00753
00754
00755
00756 MVT::MvTimesMatAddMv( -one, *V_prev, dense_mat, one, *F_vec );
00757
00758 }
00759
00760 MVT::MvNorm( *F_vec, &norm2 );
00761
00762
00763
00764
00765 bool flg = false;
00766
00767 for (i=0; i<_blocksize; i++){
00768 if (norm2[i] < norm1[i] * _blk_tol) {
00769 flg = true;
00770 if (_om->isVerbosityAndPrint( OrthoDetails )){
00771 *_os << "Col " << num_prev+i << " is dependent on previous "
00772 << "Arnoldi vectors in V_prev" << endl;
00773 *_os << endl;
00774 }
00775 }
00776 }
00777
00778 if (_om->isVerbosity( OrthoDetails )) {
00779 if(_om->doPrint()) { *_os << "Checking Orthogonality after BlkOrth()"
00780 << " Iteration: " << _iter << endl; }
00781 CheckKrylovOrth(_iter);
00782 }
00783
00784
00785
00786
00787
00788
00789 if (!flg) {
00790
00791
00792
00793 row_offset = (_iter+1)*_blocksize; col_offset = _iter*_blocksize;
00794 Teuchos::SerialDenseMatrix<int,ScalarType> sub_block_hess(Teuchos::View, _hessmatrix, _blocksize, _blocksize,
00795 row_offset, col_offset);
00796 flg = QRFactorAug( *F_vec, sub_block_hess, false );
00797 }
00798
00799 return flg;
00800
00801 }
00802
00803
00804 template<class ScalarType, class MV, class OP>
00805 bool BlockGmres<ScalarType,MV,OP>::BlkOrthSing( MV& VecIn )
00806 {
00807
00808
00809
00810
00811
00812
00813
00814 const int IntOne = 1;
00815 const ScalarType one = Teuchos::ScalarTraits<ScalarType>::one();
00816 const ScalarType zero = Teuchos::ScalarTraits<ScalarType>::zero();
00817 Teuchos::SerialDenseVector<int,ScalarType> dense_vec;
00818 int i, k, num_orth;
00819 std::vector<int> index( _blocksize );
00820 std::vector<int> index2( IntOne );
00821 std::vector<MagnitudeType> nm1(IntOne);
00822 std::vector<MagnitudeType> nm2(IntOne);
00823
00824
00825
00826 for ( i=0; i<_blocksize; i++ ) { index[i] = (_iter+1)*_blocksize + i; }
00827
00828
00829
00830 RefCountPtr<MV> F_vec = MVT::CloneView( *_basisvecs, index );
00831
00832
00833
00834 MVT::MvAddMv( one, VecIn, zero, VecIn, *F_vec );
00835
00836
00837
00838 int n_row = _hessmatrix.numRows();
00839
00840 for ( k=0; k<_blocksize; k++ ) {
00841 for ( i=0; i<n_row ; i++ ) {
00842 _hessmatrix(i, _iter*_blocksize+k) = zero;
00843 }
00844 }
00845
00846 RefCountPtr<const MV> Q_vec;
00847 RefCountPtr<MV> q_vec, tptr;
00848 tptr = MVT::Clone( *F_vec, IntOne );
00849
00850
00851
00852
00853 bool flg = false;
00854
00855 for (int num_prev = (_iter+1)*_blocksize; num_prev < (_iter+2)*_blocksize; num_prev++) {
00856
00857
00858
00859 dense_vec.size(num_prev);
00860
00861
00862
00863 index2[0] = num_prev;
00864 q_vec = MVT::CloneView( *_basisvecs, index2 );
00865
00866
00867
00868 index.resize( num_prev );
00869 for (k=0; k<num_prev; k++) { index[k] = k; }
00870 Q_vec = MVT::CloneView( *_basisvecs, index );
00871
00872
00873
00874
00875 bool dep = false;
00876 MVT::MvNorm( *q_vec, &nm1 );
00877
00878
00879
00880 MVT::MvTransMv( one, *Q_vec, *q_vec, dense_vec );
00881
00882
00883
00884
00885 for (k=0; k<num_prev; k++){
00886 _hessmatrix(k,num_prev-_blocksize) += dense_vec(k);
00887 }
00888
00889
00890 MVT::MvTimesMatAddMv( -one, *Q_vec, dense_vec, one, *q_vec );
00891
00892 MVT::MvNorm( *q_vec, &nm2 );
00893
00894 if (nm2[0] < nm1[0] * _dep_tol) {
00895
00896
00897
00898
00899
00900 MVT::MvTransMv( one, *Q_vec, *q_vec, dense_vec );
00901
00902
00903
00904
00905 for (k=0; k<num_prev; k++){
00906 _hessmatrix(k,num_prev-_blocksize) += dense_vec(k);
00907 }
00908
00909
00910 MVT::MvTimesMatAddMv( -one, *Q_vec, dense_vec, one, *q_vec );
00911
00912 MVT::MvNorm( *q_vec, &nm2 );
00913 }
00914
00915
00916
00917 if (nm2[0] < nm1[0] * _sing_tol) {
00918 dep = true;
00919 }
00920 if (!dep){
00921
00922
00923
00924 ScalarType rjj = one/nm2[0];
00925 MVT::MvAddMv( rjj, *q_vec, zero, *q_vec, *q_vec );
00926
00927
00928
00929
00930 _hessmatrix( num_prev, num_prev-_blocksize ) = nm2[0];
00931 }
00932 else {
00933
00934 if (_om->isVerbosityAndPrint( OrthoDetails )) {
00935 *_os << "Column " << num_prev << " of _basisvecs is dependent" << endl;
00936 *_os << endl;
00937 }
00938
00939
00940
00941
00942
00943
00944 MVT::MvRandom( *tptr );
00945 MVT::MvNorm( *tptr, &nm1 );
00946
00947
00948
00949
00950
00951 for (num_orth=0; num_orth<2; num_orth++)
00952 {
00953 MVT::MvTransMv(one, *Q_vec, *tptr, dense_vec);
00954
00955
00956
00957 MVT::MvTimesMatAddMv( -one, *Q_vec, dense_vec, one, *tptr );
00958 }
00959
00960 MVT::MvNorm( *tptr, &nm2 );
00961
00962 if (nm2[0] >= nm1[0] * _sing_tol){
00963
00964
00965
00966 MVT::MvAddMv( one, *tptr, zero, *tptr, *q_vec );
00967 MVT::MvNorm( *q_vec, &nm2 );
00968
00969
00970
00971 ScalarType rjj = one/nm2[0];
00972 MVT::MvAddMv( rjj, *q_vec, zero, *q_vec, *q_vec );
00973
00974
00975
00976
00977 _hessmatrix( num_prev, num_prev-_blocksize ) = zero;
00978 }
00979 else {
00980
00981
00982
00983 flg = true;
00984 return flg;
00985 }
00986
00987 }
00988
00989 }
00990
00991 if (_om->isVerbosity( OrthoDetails )){
00992 if(_om->doPrint()) { *_os << endl;
00993 *_os << "Checking Orthogonality after BlkOrthSing()"
00994 << " Iteration: " << _iter << endl; }
00995 CheckKrylovOrth(_iter);
00996 }
00997
00998 return flg;
00999
01000 }
01001
01002 template<class ScalarType, class MV, class OP>
01003 bool BlockGmres<ScalarType,MV,OP>::QRFactorAug(MV& VecIn,
01004 Teuchos::SerialDenseMatrix<int,ScalarType>& R,
01005 bool blkone)
01006 {
01007 int i,j,k;
01008 int nb = MVT::GetNumberVecs( VecIn );
01009 const int IntOne = 1;
01010 const ScalarType zero = Teuchos::ScalarTraits<ScalarType>::zero();
01011 const ScalarType one = Teuchos::ScalarTraits<ScalarType>::one();
01012 bool addvec = false;
01013 bool flg = false;
01014
01015 std::vector<int> index, index2( IntOne );
01016 std::vector<MagnitudeType> norm1( IntOne );
01017 std::vector<MagnitudeType> norm2( IntOne );
01018 Teuchos::SerialDenseVector<int,ScalarType> rj;
01019 RefCountPtr<const MV> Qj;
01020 RefCountPtr<MV> qj, tptr;
01021 tptr = MVT::Clone(*_basisvecs, IntOne);
01022
01023
01024
01025 for ( j=0; j<nb; j++ ) {
01026 for ( i=0; i<nb; i++ ) {
01027 R(i,j) = zero;
01028 }
01029 }
01030
01031
01032
01033 for ( j=0; j<nb; j++ ) {
01034
01035 flg = false;
01036
01037
01038
01039
01040 index2[0] = j;
01041 qj = MVT::CloneView( VecIn, index2 );
01042
01043
01044
01045
01046 if ( j ) {
01047
01048
01049
01050
01051 rj.size(j);
01052 index.resize(j);
01053 for (i=0; i<j; i++) { index[i] = i; }
01054 Qj = MVT::CloneView( VecIn, index );
01055 MVT::MvNorm( *qj, &norm1 );
01056
01057
01058
01059
01060
01061
01062
01063
01064 MVT::MvTransMv( one, *Qj, *qj, rj );
01065
01066
01067
01068 for ( k=0; k<j; k++ ) {
01069 R(k,j) += rj(k);
01070 }
01071
01072
01073
01074 MVT::MvTimesMatAddMv( -one, *Qj, rj, one, *qj );
01075
01076 MVT::MvNorm( *qj, &norm2 );
01077
01078 if (norm2[0] < norm1[0] * _dep_tol){
01079
01080
01081
01082 MVT::MvTransMv( one, *Qj, *qj, rj );
01083
01084
01085
01086 for ( k=0; k<j; k++ ) {
01087 R(k,j) += rj(k);
01088 }
01089
01090
01091
01092 MVT::MvTimesMatAddMv( -one, *Qj, rj, one, *qj );
01093
01094 MVT::MvNorm( *qj, &norm2 );
01095 }
01096
01097
01098
01099 if (!blkone) {
01100
01101
01102
01103
01104
01105
01106
01107
01108
01109 if (norm2[0] < norm1[0] * _blk_tol) {
01110 if (_om->isVerbosityAndPrint( OrthoDetails )) {
01111 *_os << "Column " << j << " of current block is dependent" << endl;
01112 }
01113 flg = true;
01114 return flg;
01115 }
01116 }
01117 else {
01118
01119
01120
01121
01122
01123
01124 if (norm2[0] < norm1[0] * _sing_tol) {
01125
01126
01127
01128
01129 addvec = true;
01130 Teuchos::SerialDenseVector<int,ScalarType> tj(j);
01131
01132 MVT::MvRandom( *tptr );
01133 MVT::MvNorm( *tptr, &norm1 );
01134
01135 int num_orth;
01136 for (num_orth=0; num_orth<2; num_orth++){
01137 MVT::MvTransMv( one, *Qj, *tptr, tj );
01138 MVT::MvTimesMatAddMv( -one, *Qj, tj, one, *tptr );
01139 }
01140 MVT::MvNorm( *tptr, &norm2 );
01141
01142 if (norm2[0] >= norm1[0] * _sing_tol){
01143
01144 MVT::MvAddMv( one, *tptr, zero, *tptr, *qj );
01145 }
01146 else {
01147 flg = true;
01148 return flg;
01149 }
01150 }
01151 }
01152 }
01153
01154
01155
01156
01157 std::vector<MagnitudeType> normq(IntOne);
01158 MVT::MvNorm( *qj, &normq );
01159
01160 ScalarType rjj = one / normq[0];
01161 MVT::MvAddMv ( rjj, *qj, zero, *qj, *qj );
01162
01163 if (addvec){
01164
01165
01166 R(j,j) = zero;
01167 }
01168 else {
01169 R(j,j) = normq[0];
01170 }
01171
01172 }
01173
01174 return flg;
01175
01176 }
01177
01178 template<class ScalarType, class MV, class OP>
01179 void
01180 BlockGmres<ScalarType,MV,OP>::UpdateLSQR( Teuchos::SerialDenseMatrix<int,ScalarType>& R,
01181 Teuchos::SerialDenseMatrix<int,ScalarType>& z )
01182 {
01183 int i, j, maxidx;
01184 ScalarType sigma, mu, vscale, maxelem;
01185 const ScalarType zero = Teuchos::ScalarTraits<ScalarType>::zero();
01186
01187 Teuchos::LAPACK<int, ScalarType> lapack;
01188 Teuchos::BLAS<int, ScalarType> blas;
01189
01190
01191
01192
01193
01194 if (_blocksize == 1)
01195 {
01196 if (_iter==0) {
01197 cs.resize( _length+1 );
01198 sn.resize( _length+1 );
01199 }
01200
01201
01202
01203 for (i=0; i<_iter; i++) {
01204
01205
01206
01207 blas.ROT( 1, &R(i,_iter), 1, &R(i+1, _iter), 1, &cs[i], &sn[i] );
01208 }
01209
01210
01211
01212 blas.ROTG( &R(_iter,_iter), &R(_iter+1,_iter), &cs[_iter], &sn[_iter] );
01213 R(_iter+1,_iter) = zero;
01214
01215
01216
01217 blas.ROT( 1, &z(_iter,0), 1, &z(_iter+1,0), 1, &cs[_iter], &sn[_iter] );
01218 }
01219 else
01220 {
01221 if (_iter==0) {
01222 beta.size((_length+1)*_blocksize);
01223 }
01224
01225
01226
01227 for (j=0; j<_blocksize; j++) {
01228
01229
01230
01231 for (i=0; i<_iter*_blocksize+j; i++) {
01232 sigma = blas.DOT( _blocksize, &R(i+1,i), 1, &R(i+1,_iter*_blocksize+j), 1);
01233 sigma += R(i,_iter*_blocksize+j);
01234 sigma *= beta[i];
01235 blas.AXPY(_blocksize, -sigma, &R(i+1,i), 1, &R(i+1,_iter*_blocksize+j), 1);
01236 R(i,_iter*_blocksize+j) -= sigma;
01237 }
01238
01239
01240
01241 maxidx = blas.IAMAX( _blocksize+1, &R(_iter*_blocksize+j,_iter*_blocksize+j), 1 );
01242 maxelem = R(_iter*_blocksize+j+maxidx-1,_iter*_blocksize+j);
01243 for (i=0; i<_blocksize+1; i++)
01244 R(_iter*_blocksize+j+i,_iter*_blocksize+j) /= maxelem;
01245 sigma = blas.DOT( _blocksize, &R(_iter*_blocksize+j+1,_iter*_blocksize+j), 1,
01246 &R(_iter*_blocksize+j+1,_iter*_blocksize+j), 1 );
01247 if (sigma == zero) {
01248 beta[_iter*_blocksize + j] = zero;
01249 } else {
01250 mu = sqrt(R(_iter*_blocksize+j,_iter*_blocksize+j)*R(_iter*_blocksize+j,_iter*_blocksize+j)+sigma);
01251 if ( Teuchos::ScalarTraits<ScalarType>::real(R(_iter*_blocksize+j,_iter*_blocksize+j)) < Teuchos::ScalarTraits<MagnitudeType>::zero() ) {
01252 vscale = R(_iter*_blocksize+j,_iter*_blocksize+j) - mu;
01253 } else {
01254 vscale = -sigma / (R(_iter*_blocksize+j,_iter*_blocksize+j) + mu);
01255 }
01256 beta[_iter*_blocksize+j] = 2.0*vscale*vscale/(sigma + vscale*vscale);
01257 R(_iter*_blocksize+j,_iter*_blocksize+j) = maxelem*mu;
01258 for (i=0; i<_blocksize; i++)
01259 R(_iter*_blocksize+j+1+i,_iter*_blocksize+j) /= vscale;
01260 }
01261
01262
01263
01264 for (i=0; i<_blocksize; i++) {
01265 sigma = blas.DOT( _blocksize, &R(_iter*_blocksize+j+1,_iter*_blocksize+j),
01266 1, &z(_iter*_blocksize+j+1,i), 1);
01267 sigma += z(_iter*_blocksize+j,i);
01268 sigma *= beta[_iter*_blocksize+j];
01269 blas.AXPY(_blocksize, -sigma, &R(_iter*_blocksize+j+1,_iter*_blocksize+j),
01270 1, &z(_iter*_blocksize+j+1,i), 1);
01271 z(_iter*_blocksize+j,i) -= sigma;
01272 }
01273 }
01274 }
01275 }
01276
01277
01278 template<class ScalarType, class MV, class OP>
01279 void BlockGmres<ScalarType,MV,OP>::CheckKrylovOrth( const int j )
01280 {
01281 int i,k,m=(j+1)*_blocksize;
01282 const ScalarType one = Teuchos::ScalarTraits<ScalarType>::one();
01283 const ScalarType zero = Teuchos::ScalarTraits<ScalarType>::zero();
01284 Teuchos::SerialDenseMatrix<int,ScalarType> VTV; VTV.shape(m,m);
01285 std::vector<int> index(_blocksize);
01286 std::vector<MagnitudeType> ptr_norms(_blocksize);
01287
01288 for ( i=0; i<_blocksize; i++ ) {
01289 index[i] = m+i;
01290 }
01291 RefCountPtr<MV> F_vec = MVT::CloneView( *_basisvecs, index );
01292
01293 ScalarType sum = zero;
01294 MVT::MvNorm( *F_vec, &ptr_norms );
01295 for ( i=0; i<_blocksize; i++ ) {
01296 sum += ptr_norms[i];
01297 }
01298
01299 index.resize( m );
01300 for ( i=0; i<m; i++ ) { index[i] = i; }
01301 RefCountPtr<MV> Vj = MVT::CloneView( *_basisvecs, index );
01302 MVT::MvTransMv(one, *Vj, *Vj, VTV);
01303 ScalarType column_sum;
01304
01305 *_os << " " << endl;
01306 *_os << "********Block Arnoldi iteration******** " << j << endl;
01307 *_os << " " << endl;
01308
01309 for (k=0; k<m; k++) {
01310 column_sum = zero;
01311 for (i=0; i<m; i++) {
01312 if (i==k) {
01313 VTV(i,i) -= one;
01314 }
01315 column_sum += VTV(i,k);
01316 }
01317 *_os << " V^T*V-I " << "for column " << k << " is "
01318 << Teuchos::ScalarTraits<ScalarType>::magnitude(column_sum) << endl;
01319 }
01320 *_os << " " << endl;
01321
01322 Teuchos::SerialDenseMatrix<int,ScalarType> E; E.shape(m,_blocksize);
01323
01324 MVT::MvTransMv(one, *Vj, *F_vec, E);
01325
01326 for (k=0;k<_blocksize;k++) {
01327 column_sum = zero;
01328 for (i=0; i<m; i++) {
01329 column_sum += E(i,k);
01330 }
01331 if (ptr_norms[k]) column_sum = column_sum/ptr_norms[k];
01332 *_os << " Orthogonality with F " << "for column " << k << " is "
01333 << Teuchos::ScalarTraits<ScalarType>::magnitude(column_sum) << endl;
01334 }
01335 *_os << " " << endl;
01336
01337
01338 }
01339
01340
01341
01342
01343 template<class ScalarType, class MV, class OP>
01344 std::string BlockGmres<ScalarType,MV,OP>::description() const
01345 {
01346 std::ostringstream oss;
01347 oss << "Belos::BlockGmres<...,"<<Teuchos::ScalarTraits<ScalarType>::name()<<">";
01348 oss << "{";
01349 oss << "Variant=\'"<<(_flexible?"Flexible":"Standard")<<"\'";
01350 oss << "}";
01351 return oss.str();
01352 }
01353
01354 }
01355
01356 #endif
01357
01358
01359