00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029 #ifndef BELOS_BLOCK_FGMRES_ITER_HPP
00030 #define BELOS_BLOCK_FGMRES_ITER_HPP
00031
00036 #include "BelosConfigDefs.hpp"
00037 #include "BelosTypes.hpp"
00038 #include "BelosGmresIteration.hpp"
00039
00040 #include "BelosLinearProblem.hpp"
00041 #include "BelosMatOrthoManager.hpp"
00042 #include "BelosOutputManager.hpp"
00043 #include "BelosStatusTest.hpp"
00044 #include "BelosOperatorTraits.hpp"
00045 #include "BelosMultiVecTraits.hpp"
00046
00047 #include "Teuchos_BLAS.hpp"
00048 #include "Teuchos_LAPACK.hpp"
00049 #include "Teuchos_SerialDenseMatrix.hpp"
00050 #include "Teuchos_SerialDenseVector.hpp"
00051 #include "Teuchos_ScalarTraits.hpp"
00052 #include "Teuchos_ParameterList.hpp"
00053 #include "Teuchos_TimeMonitor.hpp"
00054
00066 namespace Belos {
00067
00068 template<class ScalarType, class MV, class OP>
00069 class BlockFGmresIter : virtual public GmresIteration<ScalarType,MV,OP> {
00070
00071 public:
00072
00073
00074
00075
00076 typedef MultiVecTraits<ScalarType,MV> MVT;
00077 typedef OperatorTraits<ScalarType,MV,OP> OPT;
00078 typedef Teuchos::ScalarTraits<ScalarType> SCT;
00079 typedef typename SCT::magnitudeType MagnitudeType;
00080
00082
00083
00093 BlockFGmresIter( const Teuchos::RCP<LinearProblem<ScalarType,MV,OP> > &problem,
00094 const Teuchos::RCP<OutputManager<ScalarType> > &printer,
00095 const Teuchos::RCP<StatusTest<ScalarType,MV,OP> > &tester,
00096 const Teuchos::RCP<MatOrthoManager<ScalarType,MV,OP> > &ortho,
00097 Teuchos::ParameterList ¶ms );
00098
00100 virtual ~BlockFGmresIter() {};
00102
00103
00105
00106
00128 void iterate();
00129
00151 void initializeGmres(GmresIterationState<ScalarType,MV> newstate);
00152
00156 void initialize()
00157 {
00158 GmresIterationState<ScalarType,MV> empty;
00159 initializeGmres(empty);
00160 }
00161
00169 GmresIterationState<ScalarType,MV> getState() const {
00170 GmresIterationState<ScalarType,MV> state;
00171 state.curDim = curDim_;
00172 state.V = V_;
00173 state.Z = Z_;
00174 state.H = H_;
00175 state.R = R_;
00176 state.z = z_;
00177 return state;
00178 }
00179
00181
00182
00184
00185
00187 int getNumIters() const { return iter_; }
00188
00190 void resetNumIters( int iter = 0 ) { iter_ = iter; }
00191
00194 Teuchos::RCP<const MV> getNativeResiduals( std::vector<MagnitudeType> *norms ) const;
00195
00197
00202 Teuchos::RCP<MV> getCurrentUpdate() const;
00203
00205
00208 void updateLSQR( int dim = -1 );
00209
00211 int getCurSubspaceDim() const {
00212 if (!initialized_) return 0;
00213 return curDim_;
00214 };
00215
00217 int getMaxSubspaceDim() const { return blockSize_*numBlocks_; }
00218
00220
00221
00223
00224
00226 const LinearProblem<ScalarType,MV,OP>& getProblem() const { return *lp_; }
00227
00229 int getBlockSize() const { return blockSize_; }
00230
00232 void setBlockSize(int blockSize) { setSize( blockSize, numBlocks_ ); }
00233
00235 int getNumBlocks() const { return numBlocks_; }
00236
00238 void setNumBlocks(int numBlocks) { setSize( blockSize_, numBlocks ); }
00239
00246 void setSize(int blockSize, int numBlocks);
00247
00249 bool isInitialized() { return initialized_; }
00250
00252
00253 private:
00254
00255
00256
00257
00259 void setStateSize();
00260
00261
00262
00263
00264 const Teuchos::RCP<LinearProblem<ScalarType,MV,OP> > lp_;
00265 const Teuchos::RCP<OutputManager<ScalarType> > om_;
00266 const Teuchos::RCP<StatusTest<ScalarType,MV,OP> > stest_;
00267 const Teuchos::RCP<OrthoManager<ScalarType,MV> > ortho_;
00268
00269
00270
00271
00272
00273
00274 int blockSize_;
00275
00276 int numBlocks_;
00277
00278
00279 Teuchos::SerialDenseVector<int,ScalarType> beta, sn;
00280 Teuchos::SerialDenseVector<int,MagnitudeType> cs;
00281
00282
00283
00284
00285
00286
00287
00288 bool initialized_;
00289
00290
00291
00292
00293 bool stateStorageInitialized_;
00294
00295
00296 int curDim_, iter_;
00297
00298
00299
00300
00301 Teuchos::RCP<MV> V_;
00302 Teuchos::RCP<MV> Z_;
00303
00304
00305
00306
00307 Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> > H_;
00308
00309
00310
00311
00312 Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> > R_;
00313 Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> > z_;
00314 };
00315
00317
00318 template<class ScalarType, class MV, class OP>
00319 BlockFGmresIter<ScalarType,MV,OP>::BlockFGmresIter(const Teuchos::RCP<LinearProblem<ScalarType,MV,OP> > &problem,
00320 const Teuchos::RCP<OutputManager<ScalarType> > &printer,
00321 const Teuchos::RCP<StatusTest<ScalarType,MV,OP> > &tester,
00322 const Teuchos::RCP<MatOrthoManager<ScalarType,MV,OP> > &ortho,
00323 Teuchos::ParameterList ¶ms ):
00324 lp_(problem),
00325 om_(printer),
00326 stest_(tester),
00327 ortho_(ortho),
00328 blockSize_(0),
00329 numBlocks_(0),
00330 initialized_(false),
00331 stateStorageInitialized_(false),
00332 curDim_(0),
00333 iter_(0)
00334 {
00335
00336 TEST_FOR_EXCEPTION(!params.isParameter("Num Blocks"), std::invalid_argument,
00337 "Belos::BlockFGmresIter::constructor: mandatory parameter 'Num Blocks' is not specified.");
00338 int nb = Teuchos::getParameter<int>(params, "Num Blocks");
00339
00340
00341 int bs = params.get("Block Size", 1);
00342 setSize( bs, nb );
00343 }
00344
00346
00347 template <class ScalarType, class MV, class OP>
00348 void BlockFGmresIter<ScalarType,MV,OP>::setSize (int blockSize, int numBlocks)
00349 {
00350
00351
00352
00353 TEST_FOR_EXCEPTION(numBlocks <= 0 || blockSize <= 0, std::invalid_argument, "Belos::BlockFGmresIter::setSize was passed a non-positive argument.");
00354 if (blockSize == blockSize_ && numBlocks == numBlocks_) {
00355
00356 return;
00357 }
00358
00359 if (blockSize!=blockSize_ || numBlocks!=numBlocks_)
00360 stateStorageInitialized_ = false;
00361
00362 blockSize_ = blockSize;
00363 numBlocks_ = numBlocks;
00364
00365 initialized_ = false;
00366 curDim_ = 0;
00367
00368
00369 setStateSize();
00370
00371 }
00372
00374
00375 template <class ScalarType, class MV, class OP>
00376 void BlockFGmresIter<ScalarType,MV,OP>::setStateSize ()
00377 {
00378 if (!stateStorageInitialized_) {
00379
00380
00381 Teuchos::RCP<const MV> lhsMV = lp_->getLHS();
00382 Teuchos::RCP<const MV> rhsMV = lp_->getRHS();
00383 if (lhsMV == Teuchos::null && rhsMV == Teuchos::null) {
00384 stateStorageInitialized_ = false;
00385 return;
00386 }
00387 else {
00388
00390
00391
00392 int newsd = blockSize_*(numBlocks_+1);
00393
00394 if (blockSize_==1) {
00395 cs.resize( newsd );
00396 sn.resize( newsd );
00397 }
00398 else {
00399 beta.resize( newsd );
00400 }
00401
00402
00403 TEST_FOR_EXCEPTION(blockSize_*numBlocks_ > MVT::GetVecLength(*rhsMV),std::invalid_argument,
00404 "Belos::BlockFGmresIter::setStateSize(): Cannot generate a Krylov basis with dimension larger the operator!");
00405
00406
00407 if (V_ == Teuchos::null) {
00408
00409 Teuchos::RCP<const MV> tmp = ( (rhsMV!=Teuchos::null)? rhsMV: lhsMV );
00410 TEST_FOR_EXCEPTION(tmp == Teuchos::null,std::invalid_argument,
00411 "Belos::BlockFGmresIter::setStateSize(): linear problem does not specify multivectors to clone from.");
00412 V_ = MVT::Clone( *tmp, newsd );
00413 }
00414 else {
00415
00416 if (MVT::GetNumberVecs(*V_) < newsd) {
00417 Teuchos::RCP<const MV> tmp = V_;
00418 V_ = MVT::Clone( *tmp, newsd );
00419 }
00420 }
00421
00422 if (Z_ == Teuchos::null) {
00423
00424 Teuchos::RCP<const MV> tmp = ( (rhsMV!=Teuchos::null)? rhsMV: lhsMV );
00425 TEST_FOR_EXCEPTION(tmp == Teuchos::null,std::invalid_argument,
00426 "Belos::BlockFGmresIter::setStateSize(): linear problem does not specify multivectors to clone from.");
00427 Z_ = MVT::Clone( *tmp, newsd );
00428 }
00429 else {
00430
00431 if (MVT::GetNumberVecs(*Z_) < newsd) {
00432 Teuchos::RCP<const MV> tmp = Z_;
00433 Z_ = MVT::Clone( *tmp, newsd );
00434 }
00435 }
00436
00437
00438 if (H_ == Teuchos::null)
00439 H_ = Teuchos::rcp( new Teuchos::SerialDenseMatrix<int,ScalarType>( newsd, newsd-blockSize_ ) );
00440 else
00441 H_->shapeUninitialized( newsd, newsd-blockSize_ );
00442
00443
00444
00445
00446 if (z_ == Teuchos::null)
00447 z_ = Teuchos::rcp( new Teuchos::SerialDenseMatrix<int,ScalarType>(newsd,blockSize_) );
00448 else
00449 z_->shapeUninitialized( newsd, blockSize_ );
00450
00451
00452 stateStorageInitialized_ = true;
00453 }
00454 }
00455 }
00456
00458
00459 template <class ScalarType, class MV, class OP>
00460 Teuchos::RCP<MV> BlockFGmresIter<ScalarType,MV,OP>::getCurrentUpdate() const
00461 {
00462
00463
00464
00465
00466 RCP<MV> currentUpdate = Teuchos::null;
00467 if (curDim_==0) {
00468 return currentUpdate;
00469 } else {
00470 const ScalarType one = Teuchos::ScalarTraits<ScalarType>::one();
00471 const ScalarType zero = Teuchos::ScalarTraits<ScalarType>::zero();
00472 Teuchos::BLAS<int,ScalarType> blas;
00473 currentUpdate = MVT::Clone( *Z_, blockSize_ );
00474
00475
00476
00477 Teuchos::SerialDenseMatrix<int,ScalarType> y( Teuchos::Copy, *z_, curDim_, blockSize_ );
00478
00479
00480
00481 blas.TRSM( Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
00482 Teuchos::NON_UNIT_DIAG, curDim_, blockSize_, one,
00483 H_->values(), H_->stride(), y.values(), y.stride() );
00484
00485
00486
00487 std::vector<int> index(curDim_);
00488 for ( int i=0; i<curDim_; i++ ) {
00489 index[i] = i;
00490 }
00491 RCP<const MV> Zjp1 = MVT::CloneView( *Z_, index );
00492 MVT::MvTimesMatAddMv( one, *Zjp1, y, zero, *currentUpdate );
00493 }
00494 return currentUpdate;
00495 }
00496
00497
00499
00500
00501 template <class ScalarType, class MV, class OP>
00502 Teuchos::RCP<const MV> BlockFGmresIter<ScalarType,MV,OP>::getNativeResiduals( std::vector<MagnitudeType> *norms ) const
00503 {
00504
00505
00506
00507 if ( norms && (int)norms->size() < blockSize_ )
00508 norms->resize( blockSize_ );
00509
00510 if (norms) {
00511 Teuchos::BLAS<int,ScalarType> blas;
00512 for (int j=0; j<blockSize_; j++) {
00513 (*norms)[j] = blas.NRM2( blockSize_, &(*z_)(curDim_, j), 1);
00514 }
00515 }
00516 return Teuchos::null;
00517 }
00518
00519
00520
00522
00523 template <class ScalarType, class MV, class OP>
00524 void BlockFGmresIter<ScalarType,MV,OP>::initializeGmres(GmresIterationState<ScalarType,MV> newstate)
00525 {
00526
00527 if (!stateStorageInitialized_)
00528 setStateSize();
00529
00530 TEST_FOR_EXCEPTION(!stateStorageInitialized_,std::invalid_argument,
00531 "Belos::BlockFGmresIter::initialize(): Cannot initialize state storage!");
00532
00533
00534
00535
00536
00537 std::string errstr("Belos::BlockFGmresIter::initialize(): Specified multivectors must have a consistent length and width.");
00538
00539 if (newstate.V != Teuchos::null && newstate.z != Teuchos::null) {
00540
00541
00542
00543 TEST_FOR_EXCEPTION( MVT::GetVecLength(*newstate.V) != MVT::GetVecLength(*V_),
00544 std::invalid_argument, errstr );
00545 TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.V) < blockSize_,
00546 std::invalid_argument, errstr );
00547 TEST_FOR_EXCEPTION( newstate.curDim > blockSize_*(numBlocks_+1),
00548 std::invalid_argument, errstr );
00549
00550 curDim_ = newstate.curDim;
00551 int lclDim = MVT::GetNumberVecs(*newstate.V);
00552
00553
00554 TEST_FOR_EXCEPTION(newstate.z->numRows() < curDim_ || newstate.z->numCols() < blockSize_, std::invalid_argument, errstr);
00555
00556
00557
00558 if (newstate.V != V_) {
00559
00560 if (curDim_ == 0 && lclDim > blockSize_) {
00561 om_->stream(Warnings) << "Belos::BlockFGmresIter::initialize(): the solver was initialized with a kernel of " << lclDim << std::endl
00562 << "The block size however is only " << blockSize_ << std::endl
00563 << "The last " << lclDim - blockSize_ << " vectors will be discarded." << std::endl;
00564 }
00565 std::vector<int> nevind(curDim_+blockSize_);
00566 for (int i=0; i<curDim_+blockSize_; i++) nevind[i] = i;
00567 Teuchos::RCP<const MV> newV = MVT::CloneView( *newstate.V, nevind );
00568 Teuchos::RCP<MV> lclV = MVT::CloneView( *V_, nevind );
00569 MVT::MvAddMv( 1.0, *newV, 0.0, *newV, *lclV );
00570
00571
00572 lclV = Teuchos::null;
00573 }
00574
00575
00576 if (newstate.z != z_) {
00577 z_->putScalar();
00578 Teuchos::SerialDenseMatrix<int,ScalarType> newZ(Teuchos::View,*newstate.z,curDim_+blockSize_,blockSize_);
00579 Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> > lclz;
00580 lclz = Teuchos::rcp( new Teuchos::SerialDenseMatrix<int,ScalarType>(Teuchos::View,*z_,curDim_+blockSize_,blockSize_) );
00581 lclz->assign(newZ);
00582
00583
00584 lclz = Teuchos::null;
00585 }
00586
00587 }
00588 else {
00589
00590 TEST_FOR_EXCEPTION(newstate.V == Teuchos::null,std::invalid_argument,
00591 "Belos::BlockFGmresIter::initialize(): BlockFGmresStateIterState does not have initial kernel V_0.");
00592
00593 TEST_FOR_EXCEPTION(newstate.z == Teuchos::null,std::invalid_argument,
00594 "Belos::BlockFGmresIter::initialize(): BlockFGmresStateIterState does not have initial norms z_0.");
00595 }
00596
00597
00598 initialized_ = true;
00599
00600
00601
00602
00603
00604
00605
00606
00607
00608
00609
00610
00611 }
00612
00613
00615
00616 template <class ScalarType, class MV, class OP>
00617 void BlockFGmresIter<ScalarType,MV,OP>::iterate()
00618 {
00619
00620
00621
00622 if (initialized_ == false) {
00623 initialize();
00624 }
00625
00626
00627 int searchDim = blockSize_*numBlocks_;
00628
00630
00631
00632
00633
00634 while (stest_->checkStatus(this) != Passed && curDim_+blockSize_ <= searchDim) {
00635
00636 iter_++;
00637
00638
00639 int lclDim = curDim_ + blockSize_;
00640
00641
00642 std::vector<int> curind(blockSize_);
00643 for (int i=0; i<blockSize_; i++) { curind[i] = lclDim + i; }
00644 Teuchos::RCP<MV> Vnext = MVT::CloneView(*V_,curind);
00645
00646
00647
00648 for (int i=0; i<blockSize_; i++) { curind[i] = curDim_ + i; }
00649 Teuchos::RCP<MV> Vprev = MVT::CloneView(*V_,curind);
00650 Teuchos::RCP<MV> Znext = MVT::CloneView(*Z_,curind);
00651
00652
00653 lp_->applyRightPrec(*Vprev,*Znext);
00654 Vprev = Teuchos::null;
00655
00656
00657 lp_->applyOp(*Znext,*Vnext);
00658 Znext = Teuchos::null;
00659
00660
00661
00662 std::vector<int> prevind(lclDim);
00663 for (int i=0; i<lclDim; i++) { prevind[i] = i; }
00664 Vprev = MVT::CloneView(*V_,prevind);
00665 Teuchos::Array<Teuchos::RCP<const MV> > AVprev(1, Vprev);
00666
00667
00668 Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> >
00669 subH = Teuchos::rcp( new Teuchos::SerialDenseMatrix<int,ScalarType>
00670 ( Teuchos::View,*H_,lclDim,blockSize_,0,curDim_ ) );
00671 Teuchos::Array<Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> > > AsubH;
00672 AsubH.append( subH );
00673
00674
00675 Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> >
00676 subR = Teuchos::rcp( new Teuchos::SerialDenseMatrix<int,ScalarType>
00677 ( Teuchos::View,*H_,blockSize_,blockSize_,lclDim,curDim_ ) );
00678 int rank = ortho_->projectAndNormalize(*Vnext,AsubH,subR,AVprev);
00679 TEST_FOR_EXCEPTION(rank != blockSize_,GmresIterationOrthoFailure,
00680 "Belos::BlockFGmresIter::iterate(): couldn't generate basis of full rank.");
00681
00682
00683
00684
00685
00686 updateLSQR();
00687
00688
00689
00690 Vnext = Teuchos::null;
00691 curDim_ += blockSize_;
00692
00693
00694
00695
00696
00697
00698
00699
00700
00701
00702
00703
00704
00705
00706
00707
00708
00709 }
00710
00711 }
00712
00713
00714 template<class ScalarType, class MV, class OP>
00715 void BlockFGmresIter<ScalarType,MV,OP>::updateLSQR( int dim )
00716 {
00717 int i, j, maxidx;
00718 ScalarType sigma, mu, vscale, maxelem;
00719 const ScalarType zero = Teuchos::ScalarTraits<ScalarType>::zero();
00720
00721
00722
00723
00724 int curDim = curDim_;
00725 if (dim >= curDim_ && dim < getMaxSubspaceDim()) {
00726 curDim = dim;
00727 }
00728
00729 Teuchos::LAPACK<int, ScalarType> lapack;
00730 Teuchos::BLAS<int, ScalarType> blas;
00731
00732
00733
00734
00735 if (blockSize_ == 1) {
00736
00737
00738
00739 for (i=0; i<curDim; i++) {
00740
00741
00742
00743 blas.ROT( 1, &(*H_)(i,curDim), 1, &(*H_)(i+1, curDim), 1, &cs[i], &sn[i] );
00744 }
00745
00746
00747
00748 blas.ROTG( &(*H_)(curDim,curDim), &(*H_)(curDim+1,curDim), &cs[curDim], &sn[curDim] );
00749 (*H_)(curDim+1,curDim) = zero;
00750
00751
00752
00753 blas.ROT( 1, &(*z_)(curDim,0), 1, &(*z_)(curDim+1,0), 1, &cs[curDim], &sn[curDim] );
00754 }
00755 else {
00756
00757
00758
00759 for (j=0; j<blockSize_; j++) {
00760
00761
00762
00763 for (i=0; i<curDim+j; i++) {
00764 sigma = blas.DOT( blockSize_, &(*H_)(i+1,i), 1, &(*H_)(i+1,curDim+j), 1);
00765 sigma += (*H_)(i,curDim+j);
00766 sigma *= beta[i];
00767 blas.AXPY(blockSize_, -sigma, &(*H_)(i+1,i), 1, &(*H_)(i+1,curDim+j), 1);
00768 (*H_)(i,curDim+j) -= sigma;
00769 }
00770
00771
00772
00773 maxidx = blas.IAMAX( blockSize_+1, &(*H_)(curDim+j,curDim+j), 1 );
00774 maxelem = (*H_)(curDim+j+maxidx-1,curDim+j);
00775 for (i=0; i<blockSize_+1; i++)
00776 (*H_)(curDim+j+i,curDim+j) /= maxelem;
00777 sigma = blas.DOT( blockSize_, &(*H_)(curDim+j+1,curDim+j), 1,
00778 &(*H_)(curDim+j+1,curDim+j), 1 );
00779 if (sigma == zero) {
00780 beta[curDim + j] = zero;
00781 } else {
00782 mu = std::sqrt((*H_)(curDim+j,curDim+j)*(*H_)(curDim+j,curDim+j)+sigma);
00783 if ( Teuchos::ScalarTraits<ScalarType>::real((*H_)(curDim+j,curDim+j))
00784 < Teuchos::ScalarTraits<MagnitudeType>::zero() ) {
00785 vscale = (*H_)(curDim+j,curDim+j) - mu;
00786 } else {
00787 vscale = -sigma / ((*H_)(curDim+j,curDim+j) + mu);
00788 }
00789 beta[curDim+j] = 2.0*vscale*vscale/(sigma + vscale*vscale);
00790 (*H_)(curDim+j,curDim+j) = maxelem*mu;
00791 for (i=0; i<blockSize_; i++)
00792 (*H_)(curDim+j+1+i,curDim+j) /= vscale;
00793 }
00794
00795
00796
00797 for (i=0; i<blockSize_; i++) {
00798 sigma = blas.DOT( blockSize_, &(*H_)(curDim+j+1,curDim+j),
00799 1, &(*z_)(curDim+j+1,i), 1);
00800 sigma += (*z_)(curDim+j,i);
00801 sigma *= beta[curDim+j];
00802 blas.AXPY(blockSize_, -sigma, &(*H_)(curDim+j+1,curDim+j),
00803 1, &(*z_)(curDim+j+1,i), 1);
00804 (*z_)(curDim+j,i) -= sigma;
00805 }
00806 }
00807 }
00808
00809
00810 if (dim >= curDim_ && dim < getMaxSubspaceDim()) {
00811 curDim_ = dim + blockSize_;
00812 }
00813
00814 }
00815
00816 }
00817
00818 #endif