00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029 #ifndef BELOS_BLOCK_FGMRES_ITER_HPP
00030 #define BELOS_BLOCK_FGMRES_ITER_HPP
00031
00036 #include "BelosConfigDefs.hpp"
00037 #include "BelosTypes.hpp"
00038 #include "BelosGmresIteration.hpp"
00039
00040 #include "BelosLinearProblem.hpp"
00041 #include "BelosMatOrthoManager.hpp"
00042 #include "BelosOutputManager.hpp"
00043 #include "BelosStatusTest.hpp"
00044 #include "BelosOperatorTraits.hpp"
00045 #include "BelosMultiVecTraits.hpp"
00046
00047 #include "Teuchos_BLAS.hpp"
00048 #include "Teuchos_SerialDenseMatrix.hpp"
00049 #include "Teuchos_SerialDenseVector.hpp"
00050 #include "Teuchos_ScalarTraits.hpp"
00051 #include "Teuchos_ParameterList.hpp"
00052 #include "Teuchos_TimeMonitor.hpp"
00053
00067 namespace Belos {
00068
00069 template<class ScalarType, class MV, class OP>
00070 class BlockFGmresIter : virtual public GmresIteration<ScalarType,MV,OP> {
00071
00072 public:
00073
00074
00075
00076
00077 typedef MultiVecTraits<ScalarType,MV> MVT;
00078 typedef OperatorTraits<ScalarType,MV,OP> OPT;
00079 typedef Teuchos::ScalarTraits<ScalarType> SCT;
00080 typedef typename SCT::magnitudeType MagnitudeType;
00081
00083
00084
00094 BlockFGmresIter( const Teuchos::RCP<LinearProblem<ScalarType,MV,OP> > &problem,
00095 const Teuchos::RCP<OutputManager<ScalarType> > &printer,
00096 const Teuchos::RCP<StatusTest<ScalarType,MV,OP> > &tester,
00097 const Teuchos::RCP<MatOrthoManager<ScalarType,MV,OP> > &ortho,
00098 Teuchos::ParameterList ¶ms );
00099
00101 virtual ~BlockFGmresIter() {};
00103
00104
00106
00107
00129 void iterate();
00130
00152 void initializeGmres(GmresIterationState<ScalarType,MV> newstate);
00153
00157 void initialize()
00158 {
00159 GmresIterationState<ScalarType,MV> empty;
00160 initializeGmres(empty);
00161 }
00162
00170 GmresIterationState<ScalarType,MV> getState() const {
00171 GmresIterationState<ScalarType,MV> state;
00172 state.curDim = curDim_;
00173 state.V = V_;
00174 state.Z = Z_;
00175 state.H = H_;
00176 state.R = R_;
00177 state.z = z_;
00178 return state;
00179 }
00180
00182
00183
00185
00186
00188 int getNumIters() const { return iter_; }
00189
00191 void resetNumIters( int iter = 0 ) { iter_ = iter; }
00192
00195 Teuchos::RCP<const MV> getNativeResiduals( std::vector<MagnitudeType> *norms ) const;
00196
00198
00203 Teuchos::RCP<MV> getCurrentUpdate() const;
00204
00206
00209 void updateLSQR( int dim = -1 );
00210
00212 int getCurSubspaceDim() const {
00213 if (!initialized_) return 0;
00214 return curDim_;
00215 };
00216
00218 int getMaxSubspaceDim() const { return blockSize_*numBlocks_; }
00219
00221
00222
00224
00225
00227 const LinearProblem<ScalarType,MV,OP>& getProblem() const { return *lp_; }
00228
00230 int getBlockSize() const { return blockSize_; }
00231
00233 void setBlockSize(int blockSize) { setSize( blockSize, numBlocks_ ); }
00234
00236 int getNumBlocks() const { return numBlocks_; }
00237
00239 void setNumBlocks(int numBlocks) { setSize( blockSize_, numBlocks ); }
00240
00247 void setSize(int blockSize, int numBlocks);
00248
00250 bool isInitialized() { return initialized_; }
00251
00253
00254 private:
00255
00256
00257
00258
00260 void setStateSize();
00261
00262
00263
00264
00265 const Teuchos::RCP<LinearProblem<ScalarType,MV,OP> > lp_;
00266 const Teuchos::RCP<OutputManager<ScalarType> > om_;
00267 const Teuchos::RCP<StatusTest<ScalarType,MV,OP> > stest_;
00268 const Teuchos::RCP<OrthoManager<ScalarType,MV> > ortho_;
00269
00270
00271
00272
00273
00274
00275 int blockSize_;
00276
00277 int numBlocks_;
00278
00279
00280 Teuchos::SerialDenseVector<int,ScalarType> beta, sn;
00281 Teuchos::SerialDenseVector<int,MagnitudeType> cs;
00282
00283
00284
00285
00286
00287
00288
00289 bool initialized_;
00290
00291
00292
00293
00294 bool stateStorageInitialized_;
00295
00296
00297 int curDim_, iter_;
00298
00299
00300
00301
00302 Teuchos::RCP<MV> V_;
00303 Teuchos::RCP<MV> Z_;
00304
00305
00306
00307
00308 Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> > H_;
00309
00310
00311
00312
00313 Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> > R_;
00314 Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> > z_;
00315 };
00316
00318
00319 template<class ScalarType, class MV, class OP>
00320 BlockFGmresIter<ScalarType,MV,OP>::BlockFGmresIter(const Teuchos::RCP<LinearProblem<ScalarType,MV,OP> > &problem,
00321 const Teuchos::RCP<OutputManager<ScalarType> > &printer,
00322 const Teuchos::RCP<StatusTest<ScalarType,MV,OP> > &tester,
00323 const Teuchos::RCP<MatOrthoManager<ScalarType,MV,OP> > &ortho,
00324 Teuchos::ParameterList ¶ms ):
00325 lp_(problem),
00326 om_(printer),
00327 stest_(tester),
00328 ortho_(ortho),
00329 blockSize_(0),
00330 numBlocks_(0),
00331 initialized_(false),
00332 stateStorageInitialized_(false),
00333 curDim_(0),
00334 iter_(0)
00335 {
00336
00337 TEST_FOR_EXCEPTION(!params.isParameter("Num Blocks"), std::invalid_argument,
00338 "Belos::BlockFGmresIter::constructor: mandatory parameter 'Num Blocks' is not specified.");
00339 int nb = Teuchos::getParameter<int>(params, "Num Blocks");
00340
00341
00342 int bs = params.get("Block Size", 1);
00343 setSize( bs, nb );
00344 }
00345
00347
00348 template <class ScalarType, class MV, class OP>
00349 void BlockFGmresIter<ScalarType,MV,OP>::setSize (int blockSize, int numBlocks)
00350 {
00351
00352
00353
00354 TEST_FOR_EXCEPTION(numBlocks <= 0 || blockSize <= 0, std::invalid_argument, "Belos::BlockFGmresIter::setSize was passed a non-positive argument.");
00355 if (blockSize == blockSize_ && numBlocks == numBlocks_) {
00356
00357 return;
00358 }
00359
00360 if (blockSize!=blockSize_ || numBlocks!=numBlocks_)
00361 stateStorageInitialized_ = false;
00362
00363 blockSize_ = blockSize;
00364 numBlocks_ = numBlocks;
00365
00366 initialized_ = false;
00367 curDim_ = 0;
00368
00369
00370 setStateSize();
00371
00372 }
00373
00375
00376 template <class ScalarType, class MV, class OP>
00377 void BlockFGmresIter<ScalarType,MV,OP>::setStateSize ()
00378 {
00379 if (!stateStorageInitialized_) {
00380
00381
00382 Teuchos::RCP<const MV> lhsMV = lp_->getLHS();
00383 Teuchos::RCP<const MV> rhsMV = lp_->getRHS();
00384 if (lhsMV == Teuchos::null && rhsMV == Teuchos::null) {
00385 stateStorageInitialized_ = false;
00386 return;
00387 }
00388 else {
00389
00391
00392
00393 int newsd = blockSize_*(numBlocks_+1);
00394
00395 if (blockSize_==1) {
00396 cs.resize( newsd );
00397 sn.resize( newsd );
00398 }
00399 else {
00400 beta.resize( newsd );
00401 }
00402
00403
00404 TEST_FOR_EXCEPTION(blockSize_*numBlocks_ > MVT::GetVecLength(*rhsMV),std::invalid_argument,
00405 "Belos::BlockFGmresIter::setStateSize(): Cannot generate a Krylov basis with dimension larger the operator!");
00406
00407
00408 if (V_ == Teuchos::null) {
00409
00410 Teuchos::RCP<const MV> tmp = ( (rhsMV!=Teuchos::null)? rhsMV: lhsMV );
00411 TEST_FOR_EXCEPTION(tmp == Teuchos::null,std::invalid_argument,
00412 "Belos::BlockFGmresIter::setStateSize(): linear problem does not specify multivectors to clone from.");
00413 V_ = MVT::Clone( *tmp, newsd );
00414 }
00415 else {
00416
00417 if (MVT::GetNumberVecs(*V_) < newsd) {
00418 Teuchos::RCP<const MV> tmp = V_;
00419 V_ = MVT::Clone( *tmp, newsd );
00420 }
00421 }
00422
00423 if (Z_ == Teuchos::null) {
00424
00425 Teuchos::RCP<const MV> tmp = ( (rhsMV!=Teuchos::null)? rhsMV: lhsMV );
00426 TEST_FOR_EXCEPTION(tmp == Teuchos::null,std::invalid_argument,
00427 "Belos::BlockFGmresIter::setStateSize(): linear problem does not specify multivectors to clone from.");
00428 Z_ = MVT::Clone( *tmp, newsd );
00429 }
00430 else {
00431
00432 if (MVT::GetNumberVecs(*Z_) < newsd) {
00433 Teuchos::RCP<const MV> tmp = Z_;
00434 Z_ = MVT::Clone( *tmp, newsd );
00435 }
00436 }
00437
00438
00439 if (H_ == Teuchos::null)
00440 H_ = Teuchos::rcp( new Teuchos::SerialDenseMatrix<int,ScalarType>( newsd, newsd-blockSize_ ) );
00441 else
00442 H_->shapeUninitialized( newsd, newsd-blockSize_ );
00443
00444
00445
00446
00447 if (z_ == Teuchos::null)
00448 z_ = Teuchos::rcp( new Teuchos::SerialDenseMatrix<int,ScalarType>(newsd,blockSize_) );
00449 else
00450 z_->shapeUninitialized( newsd, blockSize_ );
00451
00452
00453 stateStorageInitialized_ = true;
00454 }
00455 }
00456 }
00457
00459
00460 template <class ScalarType, class MV, class OP>
00461 Teuchos::RCP<MV> BlockFGmresIter<ScalarType,MV,OP>::getCurrentUpdate() const
00462 {
00463
00464
00465
00466
00467 RCP<MV> currentUpdate = Teuchos::null;
00468 if (curDim_==0) {
00469 return currentUpdate;
00470 } else {
00471 const ScalarType one = Teuchos::ScalarTraits<ScalarType>::one();
00472 const ScalarType zero = Teuchos::ScalarTraits<ScalarType>::zero();
00473 Teuchos::BLAS<int,ScalarType> blas;
00474 currentUpdate = MVT::Clone( *Z_, blockSize_ );
00475
00476
00477
00478 Teuchos::SerialDenseMatrix<int,ScalarType> y( Teuchos::Copy, *z_, curDim_, blockSize_ );
00479
00480
00481
00482 blas.TRSM( Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
00483 Teuchos::NON_UNIT_DIAG, curDim_, blockSize_, one,
00484 H_->values(), H_->stride(), y.values(), y.stride() );
00485
00486
00487
00488 std::vector<int> index(curDim_);
00489 for ( int i=0; i<curDim_; i++ ) {
00490 index[i] = i;
00491 }
00492 RCP<const MV> Zjp1 = MVT::CloneView( *Z_, index );
00493 MVT::MvTimesMatAddMv( one, *Zjp1, y, zero, *currentUpdate );
00494 }
00495 return currentUpdate;
00496 }
00497
00498
00500
00501
00502 template <class ScalarType, class MV, class OP>
00503 Teuchos::RCP<const MV> BlockFGmresIter<ScalarType,MV,OP>::getNativeResiduals( std::vector<MagnitudeType> *norms ) const
00504 {
00505
00506
00507
00508 if ( norms && (int)norms->size() < blockSize_ )
00509 norms->resize( blockSize_ );
00510
00511 if (norms) {
00512 Teuchos::BLAS<int,ScalarType> blas;
00513 for (int j=0; j<blockSize_; j++) {
00514 (*norms)[j] = blas.NRM2( blockSize_, &(*z_)(curDim_, j), 1);
00515 }
00516 }
00517 return Teuchos::null;
00518 }
00519
00520
00521
00523
00524 template <class ScalarType, class MV, class OP>
00525 void BlockFGmresIter<ScalarType,MV,OP>::initializeGmres(GmresIterationState<ScalarType,MV> newstate)
00526 {
00527
00528 if (!stateStorageInitialized_)
00529 setStateSize();
00530
00531 TEST_FOR_EXCEPTION(!stateStorageInitialized_,std::invalid_argument,
00532 "Belos::BlockFGmresIter::initialize(): Cannot initialize state storage!");
00533
00534
00535
00536
00537
00538 std::string errstr("Belos::BlockFGmresIter::initialize(): Specified multivectors must have a consistent length and width.");
00539
00540 if (newstate.V != Teuchos::null && newstate.z != Teuchos::null) {
00541
00542
00543
00544 TEST_FOR_EXCEPTION( MVT::GetVecLength(*newstate.V) != MVT::GetVecLength(*V_),
00545 std::invalid_argument, errstr );
00546 TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.V) < blockSize_,
00547 std::invalid_argument, errstr );
00548 TEST_FOR_EXCEPTION( newstate.curDim > blockSize_*(numBlocks_+1),
00549 std::invalid_argument, errstr );
00550
00551 curDim_ = newstate.curDim;
00552 int lclDim = MVT::GetNumberVecs(*newstate.V);
00553
00554
00555 TEST_FOR_EXCEPTION(newstate.z->numRows() < curDim_ || newstate.z->numCols() < blockSize_, std::invalid_argument, errstr);
00556
00557
00558
00559 if (newstate.V != V_) {
00560
00561 if (curDim_ == 0 && lclDim > blockSize_) {
00562 om_->stream(Warnings) << "Belos::BlockFGmresIter::initialize(): the solver was initialized with a kernel of " << lclDim << std::endl
00563 << "The block size however is only " << blockSize_ << std::endl
00564 << "The last " << lclDim - blockSize_ << " vectors will be discarded." << std::endl;
00565 }
00566 std::vector<int> nevind(curDim_+blockSize_);
00567 for (int i=0; i<curDim_+blockSize_; i++) nevind[i] = i;
00568 Teuchos::RCP<const MV> newV = MVT::CloneView( *newstate.V, nevind );
00569 Teuchos::RCP<MV> lclV = MVT::CloneView( *V_, nevind );
00570 MVT::MvAddMv( 1.0, *newV, 0.0, *newV, *lclV );
00571
00572
00573 lclV = Teuchos::null;
00574 }
00575
00576
00577 if (newstate.z != z_) {
00578 z_->putScalar();
00579 Teuchos::SerialDenseMatrix<int,ScalarType> newZ(Teuchos::View,*newstate.z,curDim_+blockSize_,blockSize_);
00580 Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> > lclz;
00581 lclz = Teuchos::rcp( new Teuchos::SerialDenseMatrix<int,ScalarType>(Teuchos::View,*z_,curDim_+blockSize_,blockSize_) );
00582 lclz->assign(newZ);
00583
00584
00585 lclz = Teuchos::null;
00586 }
00587
00588 }
00589 else {
00590
00591 TEST_FOR_EXCEPTION(newstate.V == Teuchos::null,std::invalid_argument,
00592 "Belos::BlockFGmresIter::initialize(): BlockFGmresStateIterState does not have initial kernel V_0.");
00593
00594 TEST_FOR_EXCEPTION(newstate.z == Teuchos::null,std::invalid_argument,
00595 "Belos::BlockFGmresIter::initialize(): BlockFGmresStateIterState does not have initial norms z_0.");
00596 }
00597
00598
00599 initialized_ = true;
00600
00601
00602
00603
00604
00605
00606
00607
00608
00609
00610
00611
00612 }
00613
00614
00616
00617 template <class ScalarType, class MV, class OP>
00618 void BlockFGmresIter<ScalarType,MV,OP>::iterate()
00619 {
00620
00621
00622
00623 if (initialized_ == false) {
00624 initialize();
00625 }
00626
00627
00628 int searchDim = blockSize_*numBlocks_;
00629
00631
00632
00633
00634
00635 while (stest_->checkStatus(this) != Passed && curDim_+blockSize_ <= searchDim) {
00636
00637 iter_++;
00638
00639
00640 int lclDim = curDim_ + blockSize_;
00641
00642
00643 std::vector<int> curind(blockSize_);
00644 for (int i=0; i<blockSize_; i++) { curind[i] = lclDim + i; }
00645 Teuchos::RCP<MV> Vnext = MVT::CloneView(*V_,curind);
00646
00647
00648
00649 for (int i=0; i<blockSize_; i++) { curind[i] = curDim_ + i; }
00650 Teuchos::RCP<MV> Vprev = MVT::CloneView(*V_,curind);
00651 Teuchos::RCP<MV> Znext = MVT::CloneView(*Z_,curind);
00652
00653
00654 lp_->applyRightPrec(*Vprev,*Znext);
00655 Vprev = Teuchos::null;
00656
00657
00658 lp_->applyOp(*Znext,*Vnext);
00659 Znext = Teuchos::null;
00660
00661
00662
00663 std::vector<int> prevind(lclDim);
00664 for (int i=0; i<lclDim; i++) { prevind[i] = i; }
00665 Vprev = MVT::CloneView(*V_,prevind);
00666 Teuchos::Array<Teuchos::RCP<const MV> > AVprev(1, Vprev);
00667
00668
00669 Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> >
00670 subH = Teuchos::rcp( new Teuchos::SerialDenseMatrix<int,ScalarType>
00671 ( Teuchos::View,*H_,lclDim,blockSize_,0,curDim_ ) );
00672 Teuchos::Array<Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> > > AsubH;
00673 AsubH.append( subH );
00674
00675
00676 Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> >
00677 subR = Teuchos::rcp( new Teuchos::SerialDenseMatrix<int,ScalarType>
00678 ( Teuchos::View,*H_,blockSize_,blockSize_,lclDim,curDim_ ) );
00679 int rank = ortho_->projectAndNormalize(*Vnext,AsubH,subR,AVprev);
00680 TEST_FOR_EXCEPTION(rank != blockSize_,GmresIterationOrthoFailure,
00681 "Belos::BlockFGmresIter::iterate(): couldn't generate basis of full rank.");
00682
00683
00684
00685
00686
00687 updateLSQR();
00688
00689
00690
00691 Vnext = Teuchos::null;
00692 curDim_ += blockSize_;
00693
00694
00695
00696
00697
00698
00699
00700
00701
00702
00703
00704
00705
00706
00707
00708
00709
00710 }
00711
00712 }
00713
00714
00715 template<class ScalarType, class MV, class OP>
00716 void BlockFGmresIter<ScalarType,MV,OP>::updateLSQR( int dim )
00717 {
00718 int i, j, maxidx;
00719 ScalarType sigma, mu, vscale, maxelem;
00720 const ScalarType zero = Teuchos::ScalarTraits<ScalarType>::zero();
00721
00722
00723
00724
00725 int curDim = curDim_;
00726 if (dim >= curDim_ && dim < getMaxSubspaceDim()) {
00727 curDim = dim;
00728 }
00729
00730 Teuchos::BLAS<int, ScalarType> blas;
00731
00732
00733
00734
00735 if (blockSize_ == 1) {
00736
00737
00738
00739 for (i=0; i<curDim; i++) {
00740
00741
00742
00743 blas.ROT( 1, &(*H_)(i,curDim), 1, &(*H_)(i+1, curDim), 1, &cs[i], &sn[i] );
00744 }
00745
00746
00747
00748 blas.ROTG( &(*H_)(curDim,curDim), &(*H_)(curDim+1,curDim), &cs[curDim], &sn[curDim] );
00749 (*H_)(curDim+1,curDim) = zero;
00750
00751
00752
00753 blas.ROT( 1, &(*z_)(curDim,0), 1, &(*z_)(curDim+1,0), 1, &cs[curDim], &sn[curDim] );
00754 }
00755 else {
00756
00757
00758
00759 for (j=0; j<blockSize_; j++) {
00760
00761
00762
00763 for (i=0; i<curDim+j; i++) {
00764 sigma = blas.DOT( blockSize_, &(*H_)(i+1,i), 1, &(*H_)(i+1,curDim+j), 1);
00765 sigma += (*H_)(i,curDim+j);
00766 sigma *= beta[i];
00767 blas.AXPY(blockSize_, -sigma, &(*H_)(i+1,i), 1, &(*H_)(i+1,curDim+j), 1);
00768 (*H_)(i,curDim+j) -= sigma;
00769 }
00770
00771
00772
00773 maxidx = blas.IAMAX( blockSize_+1, &(*H_)(curDim+j,curDim+j), 1 );
00774 maxelem = (*H_)(curDim+j+maxidx-1,curDim+j);
00775 for (i=0; i<blockSize_+1; i++)
00776 (*H_)(curDim+j+i,curDim+j) /= maxelem;
00777 sigma = blas.DOT( blockSize_, &(*H_)(curDim+j+1,curDim+j), 1,
00778 &(*H_)(curDim+j+1,curDim+j), 1 );
00779 if (sigma == zero) {
00780 beta[curDim + j] = zero;
00781 } else {
00782 mu = std::sqrt((*H_)(curDim+j,curDim+j)*(*H_)(curDim+j,curDim+j)+sigma);
00783 if ( Teuchos::ScalarTraits<ScalarType>::real((*H_)(curDim+j,curDim+j))
00784 < Teuchos::ScalarTraits<MagnitudeType>::zero() ) {
00785 vscale = (*H_)(curDim+j,curDim+j) - mu;
00786 } else {
00787 vscale = -sigma / ((*H_)(curDim+j,curDim+j) + mu);
00788 }
00789 beta[curDim+j] = 2.0*vscale*vscale/(sigma + vscale*vscale);
00790 (*H_)(curDim+j,curDim+j) = maxelem*mu;
00791 for (i=0; i<blockSize_; i++)
00792 (*H_)(curDim+j+1+i,curDim+j) /= vscale;
00793 }
00794
00795
00796
00797 for (i=0; i<blockSize_; i++) {
00798 sigma = blas.DOT( blockSize_, &(*H_)(curDim+j+1,curDim+j),
00799 1, &(*z_)(curDim+j+1,i), 1);
00800 sigma += (*z_)(curDim+j,i);
00801 sigma *= beta[curDim+j];
00802 blas.AXPY(blockSize_, -sigma, &(*H_)(curDim+j+1,curDim+j),
00803 1, &(*z_)(curDim+j+1,i), 1);
00804 (*z_)(curDim+j,i) -= sigma;
00805 }
00806 }
00807 }
00808
00809
00810 if (dim >= curDim_ && dim < getMaxSubspaceDim()) {
00811 curDim_ = dim + blockSize_;
00812 }
00813
00814 }
00815
00816 }
00817
00818 #endif