AnasaziLOBPCG.hpp

Go to the documentation of this file.
00001 // @HEADER
00002 // ***********************************************************************
00003 //
00004 //                 Anasazi: Block Eigensolvers Package
00005 //                 Copyright (2004) Sandia Corporation
00006 //
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 //
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00025 //
00026 // ***********************************************************************
00027 // @HEADER
00028 
00029 
00034 /*
00035     LOBPCG contains local storage of up to 10*blockSize_ vectors, representing 10 entities
00036       X,H,P,R
00037       KX,KH,KP  (product of K and the above)
00038       MX,MH,MP  (product of M and the above, not allocated if we don't have an M matrix)
00039     If full orthogonalization is enabled, one extra multivector of blockSize_ vectors is required to 
00040     compute the local update of X and P.
00041     
00042     A solver is bound to an eigenproblem at declaration.
00043     Other solver parameters (e.g., block size, auxiliary vectors) can be changed dynamically.
00044     
00045     The orthogonalization manager is used to project away from the auxiliary vectors.
00046     If full orthogonalization is enabled, the orthogonalization manager is also used to construct an M orthonormal basis.
00047     The orthogonalization manager is subclass of MatOrthoManager, which LOBPCG assumes to be defined by the M inner product.
00048     LOBPCG will not work correctly if the orthomanager uses a different inner product.
00049  */
00050 
00051 
00052 #ifndef ANASAZI_LOBPCG_HPP
00053 #define ANASAZI_LOBPCG_HPP
00054 
00055 #include "AnasaziTypes.hpp"
00056 
00057 #include "AnasaziEigensolver.hpp"
00058 #include "AnasaziMultiVecTraits.hpp"
00059 #include "AnasaziOperatorTraits.hpp"
00060 #include "Teuchos_ScalarTraits.hpp"
00061 
00062 #include "AnasaziMatOrthoManager.hpp"
00063 #include "AnasaziSolverUtils.hpp"
00064 
00065 #include "Teuchos_LAPACK.hpp"
00066 #include "Teuchos_BLAS.hpp"
00067 #include "Teuchos_SerialDenseMatrix.hpp"
00068 #include "Teuchos_ParameterList.hpp"
00069 #include "Teuchos_TimeMonitor.hpp"
00070 
00091 namespace Anasazi {
00092 
00094 
00095 
00100   template <class ScalarType, class MultiVector>
00101   struct LOBPCGState {
00103     Teuchos::RCP<const MultiVector> V; 
00105     Teuchos::RCP<const MultiVector> KV; 
00107     Teuchos::RCP<const MultiVector> MV;
00108 
00110     Teuchos::RCP<const MultiVector> X; 
00112     Teuchos::RCP<const MultiVector> KX; 
00114     Teuchos::RCP<const MultiVector> MX;
00115 
00117     Teuchos::RCP<const MultiVector> P; 
00119     Teuchos::RCP<const MultiVector> KP; 
00121     Teuchos::RCP<const MultiVector> MP;
00122 
00127     Teuchos::RCP<const MultiVector> H; 
00129     Teuchos::RCP<const MultiVector> KH; 
00131     Teuchos::RCP<const MultiVector> MH;
00132 
00134     Teuchos::RCP<const MultiVector> R;
00135 
00137     Teuchos::RCP<const std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> > T;
00138 
00139     LOBPCGState() : 
00140       V(Teuchos::null),KV(Teuchos::null),MV(Teuchos::null),
00141       X(Teuchos::null),KX(Teuchos::null),MX(Teuchos::null),
00142       P(Teuchos::null),KP(Teuchos::null),MP(Teuchos::null),
00143       H(Teuchos::null),KH(Teuchos::null),MH(Teuchos::null),
00144       R(Teuchos::null),T(Teuchos::null) {};
00145   };
00146 
00148 
00150 
00151 
00165   class LOBPCGRitzFailure : public AnasaziError {public:
00166     LOBPCGRitzFailure(const std::string& what_arg) : AnasaziError(what_arg)
00167     {}};
00168 
00181   class LOBPCGInitFailure : public AnasaziError {public:
00182     LOBPCGInitFailure(const std::string& what_arg) : AnasaziError(what_arg)
00183     {}};
00184 
00201   class LOBPCGOrthoFailure : public AnasaziError {public:
00202     LOBPCGOrthoFailure(const std::string& what_arg) : AnasaziError(what_arg)
00203     {}};
00204 
00206 
00207 
00208   template <class ScalarType, class MV, class OP>
00209   class LOBPCG : public Eigensolver<ScalarType,MV,OP> { 
00210   public:
00211     
00213 
00214     
00222     LOBPCG( const Teuchos::RCP<Eigenproblem<ScalarType,MV,OP> > &problem, 
00223             const Teuchos::RCP<SortManager<ScalarType,MV,OP> > &sorter,
00224             const Teuchos::RCP<OutputManager<ScalarType> > &printer,
00225             const Teuchos::RCP<StatusTest<ScalarType,MV,OP> > &tester,
00226             const Teuchos::RCP<MatOrthoManager<ScalarType,MV,OP> > &ortho,
00227             Teuchos::ParameterList &params 
00228           );
00229     
00231     virtual ~LOBPCG() {};
00232 
00234 
00236 
00237 
00264     void iterate();
00265 
00291     void initialize(LOBPCGState<ScalarType,MV> newstate);
00292 
00296     void initialize();
00297 
00317     bool isInitialized() const;
00318 
00329     LOBPCGState<ScalarType,MV> getState() const;
00330 
00332 
00334 
00335 
00337     int getNumIters() const;
00338 
00340     void resetNumIters();
00341 
00349     Teuchos::RCP<const MV> getRitzVectors();
00350 
00356     std::vector<Value<ScalarType> > getRitzValues();
00357 
00365     std::vector<int> getRitzIndex();
00366 
00367 
00373     std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> getResNorms();
00374 
00375 
00381     std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> getRes2Norms();
00382 
00383 
00391     std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> getRitzRes2Norms();
00392 
00393 
00402     int getCurSubspaceDim() const;
00403 
00407     int getMaxSubspaceDim() const;
00408 
00410 
00412 
00413 
00414 
00416     const Eigenproblem<ScalarType,MV,OP>& getProblem() const;
00417 
00418 
00427     void setBlockSize(int blockSize);
00428 
00429 
00431     int getBlockSize() const;
00432 
00433 
00445     void setAuxVecs(const Teuchos::Array<Teuchos::RCP<const MV> > &auxvecs);
00446 
00448     Teuchos::Array<Teuchos::RCP<const MV> > getAuxVecs() const;
00449 
00451 
00453 
00454 
00460     void setFullOrtho(bool fullOrtho);
00461 
00463     bool getFullOrtho() const;
00464     
00466     bool hasP();
00467 
00469     
00471 
00472 
00474     void currentStatus(std::ostream &os);
00475 
00477 
00478   private:
00479     //
00480     //
00481     //
00482     void setupViews();
00483     //
00484     // Convenience typedefs
00485     //
00486     typedef SolverUtils<ScalarType,MV,OP> Utils;
00487     typedef MultiVecTraits<ScalarType,MV> MVT;
00488     typedef OperatorTraits<ScalarType,MV,OP> OPT;
00489     typedef Teuchos::ScalarTraits<ScalarType> SCT;
00490     typedef typename SCT::magnitudeType MagnitudeType;
00491     const MagnitudeType ONE;  
00492     const MagnitudeType ZERO; 
00493     const MagnitudeType NANVAL;
00494     //
00495     // Internal structs
00496     //
00497     struct CheckList {
00498       bool checkX, checkMX, checkKX;
00499       bool checkH, checkMH;
00500       bool checkP, checkMP, checkKP;
00501       bool checkR, checkQ;
00502       CheckList() : checkX(false),checkMX(false),checkKX(false),
00503                     checkH(false),checkMH(false),
00504                     checkP(false),checkMP(false),checkKP(false),
00505                     checkR(false),checkQ(false) {};
00506     };
00507     //
00508     // Internal methods
00509     //
00510     std::string accuracyCheck(const CheckList &chk, const std::string &where) const;
00511     //
00512     // Classes inputed through constructor that define the eigenproblem to be solved.
00513     //
00514     const Teuchos::RCP<Eigenproblem<ScalarType,MV,OP> >     problem_;
00515     const Teuchos::RCP<SortManager<ScalarType,MV,OP> >      sm_;
00516     const Teuchos::RCP<OutputManager<ScalarType> >          om_;
00517     const Teuchos::RCP<StatusTest<ScalarType,MV,OP> >       tester_;
00518     const Teuchos::RCP<MatOrthoManager<ScalarType,MV,OP> >  orthman_;
00519     //
00520     // Information obtained from the eigenproblem
00521     //
00522     Teuchos::RCP<const OP> Op_;
00523     Teuchos::RCP<const OP> MOp_;
00524     Teuchos::RCP<const OP> Prec_;
00525     bool hasM_;
00526     //
00527     // Internal timers
00528     //
00529     Teuchos::RCP<Teuchos::Time> timerOp_, timerMOp_, timerPrec_,
00530                                 timerSort_, 
00531                                 timerLocalProj_, timerDS_,
00532                                 timerLocalUpdate_, timerCompRes_,
00533                                 timerOrtho_, timerInit_;
00534     //
00535     // Counters
00536     //
00537     // Number of operator applications
00538     int count_ApplyOp_, count_ApplyM_, count_ApplyPrec_;
00539 
00540     //
00541     // Algorithmic parameters.
00542     //
00543     // blockSize_ is the solver block size
00544     int blockSize_;
00545     //
00546     // fullOrtho_ dictates whether the orthogonalization procedures specified by Hetmaniuk and Lehoucq should
00547     // be activated (see citations at the top of this file)
00548     bool fullOrtho_;
00549 
00550     //
00551     // Current solver state
00552     //
00553     // initialized_ specifies that the basis vectors have been initialized and the iterate() routine
00554     // is capable of running; _initialize is controlled  by the initialize() member method
00555     // For the implications of the state of initialized_, please see documentation for initialize()
00556     bool initialized_;
00557     //
00558     // nevLocal_ reflects how much of the current basis is valid (0 <= nevLocal_ <= 3*blockSize_)
00559     // this tells us how many of the values in theta_ are valid Ritz values
00560     int nevLocal_;
00561     //
00562     // hasP_ tells us whether there is valid data in P (and KP,MP)
00563     bool hasP_;
00564     //
00565     // State Multivecs
00566     // V_, KV_ MV_  and  R_ are primary pointers to allocated multivectors
00567     // the rest are multivector views into V_, KV_ and MV_
00568     Teuchos::RCP<MV> V_, KV_, MV_, R_;
00569     Teuchos::RCP<MV> X_, KX_, MX_,
00570                      H_, KH_, MH_,
00571                      P_, KP_, MP_;
00572 
00573     //
00574     // if fullOrtho_ == true, then we must produce the following on every iteration:
00575     // [newX newP] = [X H P] [CX;CP]
00576     // the structure of [CX;CP] when using full orthogonalization does not allow us to 
00577     // do this in situ, and R_ does not have enough storage for newX and newP. therefore, 
00578     // we must allocate additional storage for this.
00579     // otherwise, when not using full orthogonalization, the structure
00580     // [newX newP] = [X H P] [CX1  0 ]
00581     //                       [CX2 CP2]  allows us to work using only R as work space
00582     //                       [CX3 CP3] 
00583     Teuchos::RCP<MV> tmpmvec_;        
00584     // 
00585     // auxiliary vectors
00586     Teuchos::Array<Teuchos::RCP<const MV> > auxVecs_;
00587     int numAuxVecs_;
00588     //
00589     // Number of iterations that have been performed.
00590     int iter_;
00591     // 
00592     // Current eigenvalues, residual norms
00593     std::vector<MagnitudeType> theta_, Rnorms_, R2norms_;
00594     // 
00595     // are the residual norms current with the residual?
00596     bool Rnorms_current_, R2norms_current_;
00597 
00598   };
00599 
00600 
00601 
00602 
00604   // Constructor
00605   template <class ScalarType, class MV, class OP>
00606   LOBPCG<ScalarType,MV,OP>::LOBPCG(
00607         const Teuchos::RCP<Eigenproblem<ScalarType,MV,OP> > &problem, 
00608         const Teuchos::RCP<SortManager<ScalarType,MV,OP> > &sorter,
00609         const Teuchos::RCP<OutputManager<ScalarType> > &printer,
00610         const Teuchos::RCP<StatusTest<ScalarType,MV,OP> > &tester,
00611         const Teuchos::RCP<MatOrthoManager<ScalarType,MV,OP> > &ortho,
00612         Teuchos::ParameterList &params
00613         ) :
00614     ONE(Teuchos::ScalarTraits<MagnitudeType>::one()),
00615     ZERO(Teuchos::ScalarTraits<MagnitudeType>::zero()),
00616     NANVAL(Teuchos::ScalarTraits<MagnitudeType>::nan()),
00617     // problem, tools
00618     problem_(problem), 
00619     sm_(sorter),
00620     om_(printer),
00621     tester_(tester),
00622     orthman_(ortho),
00623     // timers, counters
00624     timerOp_(Teuchos::TimeMonitor::getNewTimer("Operation Op*x")),
00625     timerMOp_(Teuchos::TimeMonitor::getNewTimer("Operation M*x")),
00626     timerPrec_(Teuchos::TimeMonitor::getNewTimer("Operation Prec*x")),
00627     timerSort_(Teuchos::TimeMonitor::getNewTimer("Sorting eigenvalues")),
00628     timerLocalProj_(Teuchos::TimeMonitor::getNewTimer("Local projection")),
00629     timerDS_(Teuchos::TimeMonitor::getNewTimer("Direct solve")),
00630     timerLocalUpdate_(Teuchos::TimeMonitor::getNewTimer("Local update")),
00631     timerCompRes_(Teuchos::TimeMonitor::getNewTimer("Computing residuals")),
00632     timerOrtho_(Teuchos::TimeMonitor::getNewTimer("Orthogonalization")),
00633     timerInit_(Teuchos::TimeMonitor::getNewTimer("Initialization")),
00634     count_ApplyOp_(0),
00635     count_ApplyM_(0),
00636     count_ApplyPrec_(0),
00637     // internal data
00638     blockSize_(0),
00639     fullOrtho_(params.get("Full Ortho", true)),
00640     initialized_(false),
00641     nevLocal_(0),
00642     hasP_(false),
00643     auxVecs_( Teuchos::Array<Teuchos::RCP<const MV> >(0) ), 
00644     numAuxVecs_(0),
00645     iter_(0),
00646     Rnorms_current_(false),
00647     R2norms_current_(false)
00648   {     
00649     TEST_FOR_EXCEPTION(problem_ == Teuchos::null,std::invalid_argument,
00650                        "Anasazi::LOBPCG::constructor: user passed null problem pointer.");
00651     TEST_FOR_EXCEPTION(sm_ == Teuchos::null,std::invalid_argument,
00652                        "Anasazi::LOBPCG::constructor: user passed null sort manager pointer.");
00653     TEST_FOR_EXCEPTION(om_ == Teuchos::null,std::invalid_argument,
00654                        "Anasazi::LOBPCG::constructor: user passed null output manager pointer.");
00655     TEST_FOR_EXCEPTION(tester_ == Teuchos::null,std::invalid_argument,
00656                        "Anasazi::LOBPCG::constructor: user passed null status test pointer.");
00657     TEST_FOR_EXCEPTION(orthman_ == Teuchos::null,std::invalid_argument,
00658                        "Anasazi::LOBPCG::constructor: user passed null orthogonalization manager pointer.");
00659     TEST_FOR_EXCEPTION(problem_->isProblemSet() == false, std::invalid_argument,
00660                        "Anasazi::LOBPCG::constructor: problem is not set.");
00661     TEST_FOR_EXCEPTION(problem_->isHermitian() == false, std::invalid_argument,
00662                        "Anasazi::LOBPCG::constructor: problem is not Hermitian; LOBPCG requires Hermitian problem.");
00663 
00664     // get the problem operators
00665     Op_   = problem_->getOperator();
00666     TEST_FOR_EXCEPTION(Op_ == Teuchos::null, std::invalid_argument,
00667                        "Anasazi::LOBPCG::constructor: problem provides no operator.");
00668     MOp_  = problem_->getM();
00669     Prec_ = problem_->getPrec();
00670     hasM_ = (MOp_ != Teuchos::null);
00671 
00672     // set the block size and allocate data
00673     int bs = params.get("Block Size", problem_->getNEV());
00674     setBlockSize(bs);
00675   }
00676 
00677 
00679   // Set the block size and make necessary adjustments.
00680   template <class ScalarType, class MV, class OP>
00681   void LOBPCG<ScalarType,MV,OP>::setBlockSize (int newBS) 
00682   {
00683     // time spent here counts towards timerInit_
00684     Teuchos::TimeMonitor lcltimer( *timerInit_ );
00685 
00686     // This routine only allocates space; it doesn't not perform any computation
00687     // if size is decreased, take the first newBS vectors of all and leave state as is
00688     // otherwise, grow/allocate space and set solver to unitialized
00689 
00690     Teuchos::RCP<const MV> tmp;
00691     // grab some Multivector to Clone
00692     // in practice, getInitVec() should always provide this, but it is possible to use a 
00693     // Eigenproblem with nothing in getInitVec() by manually initializing with initialize(); 
00694     // in case of that strange scenario, we will try to Clone from R_ because it is smaller 
00695     // than V_, and we don't want to keep V_ around longer than necessary
00696     if (blockSize_ > 0) {
00697       tmp = R_;
00698     }
00699     else {
00700       tmp = problem_->getInitVec();
00701       TEST_FOR_EXCEPTION(tmp == Teuchos::null,std::logic_error,
00702                          "Anasazi::LOBPCG::setBlockSize(): eigenproblem did not specify initial vectors to clone from.");
00703     }
00704     
00705     TEST_FOR_EXCEPTION(newBS <= 0 || newBS > MVT::GetVecLength(*tmp), std::invalid_argument, "Anasazi::LOBPCG::setBlockSize(): block size must be strictly positive.");
00706     if (newBS == blockSize_) {
00707       // do nothing
00708       return;
00709     }
00710     else if (newBS < blockSize_ && initialized_) {
00711       //
00712       // shrink vectors
00713 
00714       // release views so we can modify the bases
00715       X_ = Teuchos::null;
00716       KX_ = Teuchos::null;
00717       MX_ = Teuchos::null;
00718       H_ = Teuchos::null;
00719       KH_ = Teuchos::null;
00720       MH_ = Teuchos::null;
00721       P_ = Teuchos::null;
00722       KP_ = Teuchos::null;
00723       MP_ = Teuchos::null;
00724 
00725       // make new indices vectors
00726       std::vector<int> newind(newBS), oldind(newBS);
00727       for (int i=0; i<newBS; i++) {
00728         newind[i] = i;
00729         oldind[i] = i;
00730       }
00731 
00732       Teuchos::RCP<MV> newV, newMV, newKV, newR, src;
00733       // allocate R and newV
00734       newR = MVT::Clone(*tmp,newBS);
00735       newV = MVT::Clone(*tmp,newBS*3);
00736       newKV = MVT::Clone(*tmp,newBS*3);
00737       if (hasM_) {
00738         newMV = MVT::Clone(*tmp,newBS*3);
00739       }
00740 
00741       //
00742       // if we are initialized, we want to pull the data from V_ into newV:
00743       //           bs  |  bs  | bs 
00744       // newV  = [newX | **** |newP ]
00745       // newKV = [newKX| **** |newKP]
00746       // newMV = [newMX| **** |newMP]
00747       // where 
00748       //          oldbs   |  oldbs  |   oldbs   
00749       //  V_ = [newX  *** | ******* | newP  ***]
00750       // KV_ = [newKX *** | ******* | newKP ***]
00751       // MV_ = [newMX *** | ******* | newMP ***]
00752       //
00753       // we don't care to copy the data corresponding to H
00754       // we will not copy the M data if !hasM_, because it doesn't exist
00755       //
00756 
00757       // these are shrink operations which preserve their data
00758       theta_.resize(3*newBS);
00759       Rnorms_.resize(newBS);
00760       R2norms_.resize(newBS);
00761 
00762       // copy residual vectors: oldind,newind currently contains [0,...,newBS-1]
00763       src = MVT::CloneView(*R_,newind);
00764       MVT::SetBlock(*src,newind,*newR);
00765       // free old memory and point to new memory
00766       R_ = newR;
00767 
00768       // copy in order: newX newKX newMX, then newP newKP newMP
00769       // for  X: [0,bs-1] <-- [0,bs-1] 
00770       src = MVT::CloneView(*V_,oldind);
00771       MVT::SetBlock(*src,newind,*newV);
00772       src = MVT::CloneView(*KV_,oldind);
00773       MVT::SetBlock(*src,newind,*newKV);
00774       if (hasM_) {
00775         src = MVT::CloneView(*MV_,oldind);
00776         MVT::SetBlock(*src,newind,*newMV);
00777       }
00778       // for  P: [2*bs, 3*bs-1] <-- [2*oldbs, 2*oldbs+bs-1] 
00779       for (int i=0; i<newBS; i++) {
00780         newind[i] += 2*newBS;
00781         oldind[i] += 2*blockSize_;
00782       }
00783       src = MVT::CloneView(*V_,oldind);
00784       MVT::SetBlock(*src,newind,*newV);
00785       src = MVT::CloneView(*KV_,oldind);
00786       MVT::SetBlock(*src,newind,*newKV);
00787       if (hasM_) {
00788         src = MVT::CloneView(*MV_,oldind);
00789         MVT::SetBlock(*src,newind,*newMV);
00790       }
00791 
00792       // release temp view
00793       src = Teuchos::null;
00794 
00795       // release old allocations and point at new memory
00796       V_ = newV;
00797       KV_ = newKV;
00798       if (hasM_) {
00799         MV_ = newMV;
00800       }
00801       else {
00802         MV_ = V_;
00803       }
00804     }
00805     else {  
00806       // newBS > blockSize_  or  not initialized
00807       // this is also the scenario for our initial call to setBlockSize(), in the constructor
00808       initialized_ = false;
00809       hasP_ = false;
00810 
00811       // release views
00812       X_ = Teuchos::null;
00813       KX_ = Teuchos::null;
00814       MX_ = Teuchos::null;
00815       H_ = Teuchos::null;
00816       KH_ = Teuchos::null;
00817       MH_ = Teuchos::null;
00818       P_ = Teuchos::null;
00819       KP_ = Teuchos::null;
00820       MP_ = Teuchos::null;
00821 
00822       // free allocated storage
00823       R_ = Teuchos::null;
00824       V_ = Teuchos::null;
00825 
00826       // allocate scalar vectors
00827       theta_.resize(3*newBS,NANVAL);
00828       Rnorms_.resize(newBS,NANVAL);
00829       R2norms_.resize(newBS,NANVAL);
00830       
00831       // clone multivectors off of tmp
00832       R_ = MVT::Clone(*tmp,newBS);
00833       V_ = MVT::Clone(*tmp,newBS*3);
00834       KV_ = MVT::Clone(*tmp,newBS*3);
00835       if (hasM_) {
00836         MV_ = MVT::Clone(*tmp,newBS*3);
00837       }
00838       else {
00839         MV_ = V_;
00840       }
00841     }
00842 
00843     // allocate tmp space
00844     tmpmvec_ = Teuchos::null;
00845     if (fullOrtho_) {
00846       tmpmvec_ = MVT::Clone(*tmp,newBS);
00847     }
00848 
00849     // set new block size
00850     blockSize_ = newBS;
00851 
00852     // setup new views
00853     setupViews();
00854   }
00855 
00856 
00858   // Setup views into V,KV,MV
00859   template <class ScalarType, class MV, class OP>
00860   void LOBPCG<ScalarType,MV,OP>::setupViews() 
00861   {
00862     std::vector<int> ind(blockSize_);
00863 
00864     for (int i=0; i<blockSize_; i++) {
00865       ind[i] = i;
00866     }
00867     X_  = MVT::CloneView(*V_,ind);
00868     KX_ = MVT::CloneView(*KV_,ind);
00869     if (hasM_) {
00870       MX_ = MVT::CloneView(*MV_,ind);
00871     }
00872     else {
00873       MX_ = X_;
00874     }
00875 
00876     for (int i=0; i<blockSize_; i++) {
00877       ind[i] += blockSize_;
00878     }
00879     H_  = MVT::CloneView(*V_,ind);
00880     KH_ = MVT::CloneView(*KV_,ind);
00881     if (hasM_) {
00882       MH_ = MVT::CloneView(*MV_,ind);
00883     }
00884     else {
00885       MH_ = H_;
00886     }
00887 
00888     for (int i=0; i<blockSize_; i++) {
00889       ind[i] += blockSize_;
00890     }
00891     P_  = MVT::CloneView(*V_,ind);
00892     KP_ = MVT::CloneView(*KV_,ind);
00893     if (hasM_) {
00894       MP_ = MVT::CloneView(*MV_,ind);
00895     }
00896     else {
00897       MP_ = P_;
00898     }
00899   }
00900 
00901 
00903   // Set the auxiliary vectors
00904   template <class ScalarType, class MV, class OP>
00905   void LOBPCG<ScalarType,MV,OP>::setAuxVecs(const Teuchos::Array<Teuchos::RCP<const MV> > &auxvecs) {
00906     typedef typename Teuchos::Array<Teuchos::RCP<const MV> >::iterator tarcpmv;
00907 
00908     // set new auxiliary vectors
00909     auxVecs_ = auxvecs;
00910     
00911     numAuxVecs_ = 0;
00912     for (tarcpmv i=auxVecs_.begin(); i != auxVecs_.end(); i++) {
00913       numAuxVecs_ += MVT::GetNumberVecs(**i);
00914     }
00915     
00916     // If the solver has been initialized, X and P are not necessarily orthogonal to new auxiliary vectors
00917     if (numAuxVecs_ > 0 && initialized_) {
00918       initialized_ = false;
00919       hasP_ = false;
00920     }
00921 
00922     if (om_->isVerbosity( Debug ) ) {
00923       // Check almost everything here
00924       CheckList chk;
00925       chk.checkQ = true;
00926       om_->print( Debug, accuracyCheck(chk, ": in setAuxVecs()") );
00927     }
00928   }
00929 
00930 
00932   /* Initialize the state of the solver
00933    * 
00934    * POST-CONDITIONS:
00935    *
00936    * initialized_ == true
00937    * X is orthonormal, orthogonal to auxVecs_
00938    * KX = Op*X
00939    * MX = M*X if hasM_
00940    * theta_ contains Ritz values of X
00941    * R = KX - MX*diag(theta_)
00942    * if hasP() == true,
00943    *   P orthogonal to auxVecs_
00944    *   if fullOrtho_ == true,
00945    *     P orthonormal and orthogonal to X
00946    *   KP = Op*P
00947    *   MP = M*P
00948    */
00949   template <class ScalarType, class MV, class OP>
00950   void LOBPCG<ScalarType,MV,OP>::initialize(LOBPCGState<ScalarType,MV> newstate)
00951   {
00952     // NOTE: memory has been allocated by setBlockSize(). Use SetBlock below; do not Clone
00953     // NOTE: Overall time spent in this routine is counted to timerInit_; portions will also be counted towards other primitives
00954 
00955     Teuchos::TimeMonitor lcltimer( *timerInit_ );
00956 
00957     std::vector<int> bsind(blockSize_);
00958     for (int i=0; i<blockSize_; i++) bsind[i] = i;
00959 
00960     // in LOBPCG, X (the subspace iterate) is primary
00961     // the order of dependence follows like so.
00962     // --init->                 X
00963     //    --op apply->          MX,KX
00964     //       --ritz analysis->  theta
00965     //          --optional->    P,MP,KP
00966     // 
00967     // if the user specifies all data for a level, we will accept it.
00968     // otherwise, we will generate the whole level, and all subsequent levels.
00969     //
00970     // the data members are ordered based on dependence, and the levels are
00971     // partitioned according to the amount of work required to produce the
00972     // items in a level.
00973     //
00974     // inconsitent multivectors widths and lengths will not be tolerated, and
00975     // will be treated with exceptions.
00976 
00977     // set up X, KX, MX: get them from "state" if user specified them
00978 
00979     //----------------------------------------
00980     // set up X, MX, KX
00981     //----------------------------------------
00982     if (newstate.X != Teuchos::null) {
00983       TEST_FOR_EXCEPTION( MVT::GetVecLength(*newstate.X) != MVT::GetVecLength(*X_),
00984                           std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.X not correct." );
00985       // newstate.X must have blockSize_ vectors; any more will be ignored
00986       TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.X) < blockSize_,
00987                           std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.X must have at least block size vectors.");
00988 
00989       // put X data in X_
00990       MVT::SetBlock(*newstate.X,bsind,*X_);
00991 
00992       // put MX data in MX_
00993       if (hasM_) {
00994         if (newstate.MX != Teuchos::null) {
00995           TEST_FOR_EXCEPTION( MVT::GetVecLength(*newstate.MX) != MVT::GetVecLength(*MX_),
00996                               std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.MX not correct." );
00997           // newstate.MX must have blockSize_ vectors; any more will be ignored
00998           TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.MX) < blockSize_,
00999                               std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.MX must have at least block size vectors.");
01000           MVT::SetBlock(*newstate.MX,bsind,*MX_);
01001         }
01002         else {
01003           // user didn't specify MX, compute it
01004           {
01005             Teuchos::TimeMonitor lcltimer( *timerMOp_ );
01006             OPT::Apply(*MOp_,*X_,*MX_);
01007             count_ApplyM_ += blockSize_;
01008           }
01009           // we generated MX; we will generate R as well
01010           newstate.R = Teuchos::null;
01011         }
01012       }
01013   
01014       // put data in KX
01015       if (newstate.KX != Teuchos::null) {
01016         TEST_FOR_EXCEPTION( MVT::GetVecLength(*newstate.KX) != MVT::GetVecLength(*KX_),
01017                             std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.KX not correct." );
01018         // newstate.KX must have blockSize_ vectors; any more will be ignored
01019         TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.KX) < blockSize_,
01020                             std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.KX must have at least block size vectors.");
01021         MVT::SetBlock(*newstate.KX,bsind,*KX_);
01022       }
01023       else {
01024         // user didn't specify KX, compute it
01025         {
01026           Teuchos::TimeMonitor lcltimer( *timerOp_ );
01027           OPT::Apply(*Op_,*X_,*KX_);
01028           count_ApplyOp_ += blockSize_;
01029         }
01030         // we generated KX; we will generate R as well
01031         newstate.R = Teuchos::null;
01032       }
01033     }
01034     else {
01035       // user did not specify X
01036       // we will initialize X, compute KX and MX, and compute R
01037       //
01038       // clear state so we won't use any data from it below
01039       newstate.P  = Teuchos::null;
01040       newstate.KP = Teuchos::null;
01041       newstate.MP = Teuchos::null;
01042       newstate.R  = Teuchos::null;
01043       newstate.T  = Teuchos::null;
01044 
01045       // generate a basis and projectAndNormalize
01046       Teuchos::RCP<const MV> ivec = problem_->getInitVec();
01047       TEST_FOR_EXCEPTION(ivec == Teuchos::null,std::logic_error,
01048                          "Anasazi::LOBPCG::initialize(): Eigenproblem did not specify initial vectors to clone from.");
01049 
01050       int initSize = MVT::GetNumberVecs(*ivec);
01051       if (initSize > blockSize_) {
01052         // we need only the first blockSize_ vectors from ivec; get a view of them
01053         initSize = blockSize_;
01054         std::vector<int> ind(blockSize_);
01055         for (int i=0; i<blockSize_; i++) ind[i] = i;
01056         ivec = MVT::CloneView(*ivec,ind);
01057       }
01058 
01059       // assign ivec to first part of X_ 
01060       if (initSize > 0) {
01061         std::vector<int> ind(initSize);
01062         for (int i=0; i<initSize; i++) ind[i] = i;
01063         MVT::SetBlock(*ivec,ind,*X_);
01064       }
01065       // fill the rest of X_ with random
01066       if (blockSize_ > initSize) {
01067         std::vector<int> ind(blockSize_ - initSize);
01068         for (int i=0; i<blockSize_ - initSize; i++) ind[i] = initSize + i;
01069         Teuchos::RCP<MV> rX = MVT::CloneView(*X_,ind);
01070         MVT::MvRandom(*rX);
01071         rX = Teuchos::null;
01072       }
01073 
01074       // put data in MX
01075       if (hasM_) {
01076         Teuchos::TimeMonitor lcltimer( *timerMOp_ );
01077         OPT::Apply(*MOp_,*X_,*MX_);
01078         count_ApplyM_ += blockSize_;
01079       }
01080   
01081       // remove auxVecs from X_ and normalize it
01082       if (numAuxVecs_ > 0) {
01083         Teuchos::TimeMonitor lcltimer( *timerOrtho_ );
01084         Teuchos::Array<Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> > > dummy;
01085         int rank = orthman_->projectAndNormalizeMat(*X_,MX_,dummy,Teuchos::null,auxVecs_);
01086         TEST_FOR_EXCEPTION(rank != blockSize_, LOBPCGInitFailure,
01087                            "Anasazi::LOBPCG::initialize(): Couldn't generate initial basis of full rank.");
01088       }
01089       else {
01090         Teuchos::TimeMonitor lcltimer( *timerOrtho_ );
01091         int rank = orthman_->normalizeMat(*X_,MX_,Teuchos::null);
01092         TEST_FOR_EXCEPTION(rank != blockSize_, LOBPCGInitFailure,
01093                            "Anasazi::LOBPCG::initialize(): Couldn't generate initial basis of full rank.");
01094       }
01095 
01096       // put data in KX
01097       {
01098         Teuchos::TimeMonitor lcltimer( *timerOp_ );
01099         OPT::Apply(*Op_,*X_,*KX_);
01100         count_ApplyOp_ += blockSize_;
01101       }
01102     } // end if (newstate.X != Teuchos::null)
01103 
01104 
01105     //----------------------------------------
01106     // set up Ritz values
01107     //----------------------------------------
01108     theta_.resize(3*blockSize_,NANVAL);
01109     if (newstate.T != Teuchos::null) {
01110       TEST_FOR_EXCEPTION( (signed int)(newstate.T->size()) < blockSize_,
01111                           std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.T must contain at least block size Ritz values.");
01112       for (int i=0; i<blockSize_; i++) {
01113         theta_[i] = (*newstate.T)[i];
01114       }
01115     }
01116     else {
01117       // get ritz vecs/vals
01118       Teuchos::SerialDenseMatrix<int,ScalarType> KK(blockSize_,blockSize_),
01119                                                  MM(blockSize_,blockSize_),
01120                                                   S(blockSize_,blockSize_);
01121       {
01122         Teuchos::TimeMonitor lcltimer( *timerLocalProj_ );
01123         // project K
01124         MVT::MvTransMv(ONE,*X_,*KX_,KK);
01125         // project M
01126         MVT::MvTransMv(ONE,*X_,*MX_,MM);
01127         nevLocal_ = blockSize_;
01128       }
01129 
01130       // solve the projected problem
01131       {
01132         Teuchos::TimeMonitor lcltimer( *timerDS_ );
01133         Utils::directSolver(blockSize_, KK, Teuchos::rcp(&MM,false), S, theta_, nevLocal_, 1);
01134         TEST_FOR_EXCEPTION(nevLocal_ != blockSize_,LOBPCGInitFailure,
01135                            "Anasazi::LOBPCG::initialize(): Initial Ritz analysis did not produce enough Ritz pairs to initialize algorithm.");
01136       }
01137 
01138       // We only have blockSize_ ritz pairs, ergo we do not need to select.
01139       // However, we still require them to be ordered correctly
01140       {
01141         Teuchos::TimeMonitor lcltimer( *timerSort_ );
01142 
01143         std::vector<int> order(blockSize_);
01144         // 
01145         // sort the first blockSize_ values in theta_
01146         sm_->sort( this, blockSize_, theta_, &order );   // don't catch exception
01147         //
01148         // apply the same ordering to the primitive ritz vectors
01149         Utils::permuteVectors(order,S);
01150       }
01151 
01152       // update the solution, use R for storage
01153       {
01154         Teuchos::TimeMonitor lcltimer( *timerLocalUpdate_ );
01155         // X <- X*S
01156         MVT::MvAddMv( ONE, *X_, ZERO, *X_, *R_ );        
01157         MVT::MvTimesMatAddMv( ONE, *R_, S, ZERO, *X_ );
01158         // KX <- KX*S
01159         MVT::MvAddMv( ONE, *KX_, ZERO, *KX_, *R_ );        
01160         MVT::MvTimesMatAddMv( ONE, *R_, S, ZERO, *KX_ );
01161         if (hasM_) {
01162           // MX <- MX*S
01163           MVT::MvAddMv( ONE, *MX_, ZERO, *MX_, *R_ );        
01164           MVT::MvTimesMatAddMv( ONE, *R_, S, ZERO, *MX_ );
01165         }
01166       }
01167     }
01168 
01169     //----------------------------------------
01170     // compute R
01171     //----------------------------------------
01172     if (newstate.R != Teuchos::null) {
01173       TEST_FOR_EXCEPTION( MVT::GetVecLength(*newstate.R) != MVT::GetVecLength(*R_),
01174                           std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.R not correct." );
01175       TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.R) < blockSize_,
01176                           std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.R must have blockSize number of vectors." );
01177       MVT::SetBlock(*newstate.R,bsind,*R_);
01178     }
01179     else {
01180       Teuchos::TimeMonitor lcltimer( *timerCompRes_ );
01181       // form R <- KX - MX*T
01182       MVT::MvAddMv(ZERO,*KX_,ONE,*KX_,*R_);
01183       Teuchos::SerialDenseMatrix<int,ScalarType> T(blockSize_,blockSize_);
01184       for (int i=0; i<blockSize_; i++) T(i,i) = theta_[i];
01185       MVT::MvTimesMatAddMv(-ONE,*MX_,T,ONE,*R_);
01186     }
01187 
01188     // R has been updated; mark the norms as out-of-date
01189     Rnorms_current_ = false;
01190     R2norms_current_ = false;
01191   
01192     // put data in P,KP,MP: P is not used to set theta
01193     if (newstate.P != Teuchos::null) {
01194       TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.P) < blockSize_ ,
01195                           std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.P must have blockSize number of vectors." );
01196       TEST_FOR_EXCEPTION( MVT::GetVecLength(*newstate.P) != MVT::GetVecLength(*P_),
01197                           std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.P not correct." );
01198       hasP_ = true;
01199 
01200       // set P_
01201       MVT::SetBlock(*newstate.P,bsind,*P_);
01202 
01203       // set/compute KP_
01204       if (newstate.KP != Teuchos::null) {
01205         TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.KP) < blockSize_,
01206                             std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.KP must have blockSize number of vectors." );
01207         TEST_FOR_EXCEPTION( MVT::GetVecLength(*newstate.KP) != MVT::GetVecLength(*KP_),
01208                             std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.KP not correct." );
01209         MVT::SetBlock(*newstate.KP,bsind,*KP_);
01210       }
01211       else {
01212         Teuchos::TimeMonitor lcltimer( *timerOp_ );
01213         OPT::Apply(*Op_,*P_,*KP_);
01214         count_ApplyOp_ += blockSize_;
01215       }
01216 
01217       // set/compute MP_
01218       if (hasM_) {
01219         if (newstate.MP != Teuchos::null) {
01220           TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.MP) < blockSize_,
01221                               std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.MP must have blockSize number of vectors." );
01222           TEST_FOR_EXCEPTION( MVT::GetVecLength(*newstate.MP) != MVT::GetVecLength(*MP_),
01223                               std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.MP not correct." );
01224           MVT::SetBlock(*newstate.MP,bsind,*MP_);
01225         }
01226         else {
01227           Teuchos::TimeMonitor lcltimer( *timerMOp_ );
01228           OPT::Apply(*MOp_,*P_,*MP_);
01229           count_ApplyM_ += blockSize_;
01230         }
01231       }
01232     }
01233     else {
01234       hasP_ = false;
01235     }
01236 
01237     // finally, we are initialized
01238     initialized_ = true;
01239 
01240     if (om_->isVerbosity( Debug ) ) {
01241       // Check almost everything here
01242       CheckList chk;
01243       chk.checkX = true;
01244       chk.checkKX = true;
01245       chk.checkMX = true;
01246       chk.checkP = true;
01247       chk.checkKP = true;
01248       chk.checkMP = true;
01249       chk.checkR = true;
01250       chk.checkQ = true;
01251       om_->print( Debug, accuracyCheck(chk, ": after initialize()") );
01252     }
01253 
01254   }
01255 
01256   template <class ScalarType, class MV, class OP>
01257   void LOBPCG<ScalarType,MV,OP>::initialize()
01258   {
01259     LOBPCGState<ScalarType,MV> empty;
01260     initialize(empty);
01261   }
01262 
01263 
01265   // Instruct the solver to use full orthogonalization
01266   template <class ScalarType, class MV, class OP>
01267   void LOBPCG<ScalarType,MV,OP>::setFullOrtho (bool fullOrtho)
01268   {
01269     if ( fullOrtho_ == true || initialized_ == false || fullOrtho == fullOrtho_ ) {
01270       // state is already orthogonalized or solver is not initialized
01271       fullOrtho_ = fullOrtho;
01272     }
01273     else {
01274       // solver is initialized, state is not fully orthogonalized, and user has requested full orthogonalization
01275       // ergo, we must throw away data in P
01276       fullOrtho_ = true;
01277       hasP_ = false;
01278     }
01279 
01280     // the user has called setFullOrtho, so the class has been instantiated
01281     // ergo, the data has already been allocated, i.e., setBlockSize() has been called
01282     // if it is already allocated, it should be the proper size
01283     if (fullOrtho_ && tmpmvec_ == Teuchos::null) {
01284       // allocated the workspace
01285       tmpmvec_ = MVT::Clone(*X_,blockSize_);
01286     }
01287     else if (fullOrtho_==false) {
01288       // free the workspace
01289       tmpmvec_ = Teuchos::null;
01290     }
01291   }
01292 
01293 
01295   // Perform LOBPCG iterations until the StatusTest tells us to stop.
01296   template <class ScalarType, class MV, class OP>
01297   void LOBPCG<ScalarType,MV,OP>::iterate () 
01298   {
01299     //
01300     // Allocate/initialize data structures
01301     //
01302     if (initialized_ == false) {
01303       initialize();
01304     }
01305 
01306     //
01307     // Miscellaneous definitions
01308     const int oneBlock    =   blockSize_;
01309     const int twoBlocks   = 2*blockSize_;
01310     const int threeBlocks = 3*blockSize_;
01311 
01312     std::vector<int> indblock1(blockSize_), indblock2(blockSize_), indblock3(blockSize_);
01313     for (int i=0; i<blockSize_; i++) {
01314       indblock1[i] = i;
01315       indblock2[i] = i +  blockSize_;
01316       indblock3[i] = i + 2*blockSize_;
01317     }
01318 
01319     //
01320     // Define dense projected/local matrices
01321     //   KK = Local stiffness matrix               (size: 3*blockSize_ x 3*blockSize_)
01322     //   MM = Local mass matrix                    (size: 3*blockSize_ x 3*blockSize_)
01323     //    S = Local eigenvectors                   (size: 3*blockSize_ x 3*blockSize_)
01324     Teuchos::SerialDenseMatrix<int,ScalarType> KK( threeBlocks, threeBlocks ), 
01325                                                MM( threeBlocks, threeBlocks ),
01326                                                 S( threeBlocks, threeBlocks );
01327 
01328     while (tester_->checkStatus(this) != Passed) {
01329 
01330       // Print information on current status
01331       if (om_->isVerbosity(Debug)) {
01332         currentStatus( om_->stream(Debug) );
01333       }
01334       else if (om_->isVerbosity(IterationDetails)) {
01335         currentStatus( om_->stream(IterationDetails) );
01336       }
01337 
01338       // increment iteration counter
01339       iter_++;
01340 
01341       // Apply the preconditioner on the residuals: H <- Prec*R
01342       if (Prec_ != Teuchos::null) {
01343         Teuchos::TimeMonitor lcltimer( *timerPrec_ );
01344         OPT::Apply( *Prec_, *R_, *H_ );   // don't catch the exception
01345         count_ApplyPrec_ += blockSize_;
01346       }
01347       else {
01348         MVT::MvAddMv(ONE,*R_,ZERO,*R_,*H_);
01349       }
01350 
01351       // Apply the mass matrix on H
01352       if (hasM_) {
01353         Teuchos::TimeMonitor lcltimer( *timerMOp_ );
01354         OPT::Apply( *MOp_, *H_, *MH_);    // don't catch the exception
01355         count_ApplyM_ += blockSize_;
01356       }
01357 
01358       // orthogonalize H against the auxiliary vectors
01359       // optionally: orthogonalize H against X and P ([X P] is already orthonormal)
01360       Teuchos::Array<Teuchos::RCP<const MV> > Q;
01361       Teuchos::Array<Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> > > C = 
01362         Teuchos::tuple<Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> > >(Teuchos::null);
01363       Q = auxVecs_;
01364       if (fullOrtho_) {
01365         // X and P are not contiguous, so there is not much point in putting them under 
01366         // a single multivector view
01367         Q.push_back(X_);
01368         if (hasP_) {
01369           Q.push_back(P_);
01370         }
01371       }
01372       {
01373         Teuchos::TimeMonitor lcltimer( *timerOrtho_ );
01374         int rank = orthman_->projectAndNormalizeMat(*H_,MH_,C,Teuchos::null,Q);
01375         // our views are currently in place; it is safe to throw an exception
01376         TEST_FOR_EXCEPTION(rank != blockSize_,LOBPCGOrthoFailure,
01377                            "Anasazi::LOBPCG::iterate(): unable to compute orthonormal basis for H.");
01378       }
01379 
01380       if (om_->isVerbosity( Debug ) ) {
01381         CheckList chk;
01382         chk.checkH = true;
01383         chk.checkMH = true;
01384         om_->print( Debug, accuracyCheck(chk, ": after ortho H") );
01385       }
01386       else if (om_->isVerbosity( OrthoDetails ) ) {
01387         CheckList chk;
01388         chk.checkH = true;
01389         chk.checkMH = true;
01390         om_->print( OrthoDetails, accuracyCheck(chk,": after ortho H") );
01391       }
01392 
01393       // Apply the stiffness matrix to H
01394       {
01395         Teuchos::TimeMonitor lcltimer( *timerOp_ );
01396         OPT::Apply( *Op_, *H_, *KH_);   // don't catch the exception
01397         count_ApplyOp_ += blockSize_;
01398       }
01399 
01400       if (hasP_) {
01401         nevLocal_ = threeBlocks;
01402       }
01403       else {
01404         nevLocal_ = twoBlocks;
01405       }
01406 
01407       //
01408       // we need bases: [X H P] and [H P] (only need the latter if fullOrtho == false)
01409       // we need to perform the following operations:
01410       //    X' [KX KH KP]
01411       //    X' [MX MH MP]
01412       //    H' [KH KP]
01413       //    H' [MH MP]
01414       //    P' [KP]
01415       //    P' [MP]
01416       //    [X H P] CX
01417       //    { [X H P] CP    if  fullOrtho
01418       //    {   [H P] CP    if !fullOrtho
01419       //
01420       // since M[X H P] is potentially the same memory as [X H P], and 
01421       // because we are not allowed to have overlapping non-const views of 
01422       // a multivector, we will now abandon our non-const views in favor of 
01423       // const views
01424       //
01425       X_ = Teuchos::null;
01426       KX_ = Teuchos::null;
01427       MX_ = Teuchos::null;
01428       H_ = Teuchos::null;
01429       KH_ = Teuchos::null;
01430       MH_ = Teuchos::null;
01431       P_ = Teuchos::null;
01432       KP_ = Teuchos::null;
01433       MP_ = Teuchos::null;
01434       Teuchos::RCP<const MV> cX, cH, cXHP, cHP, cK_XHP, cK_HP, cM_XHP, cM_HP, cP, cK_P, cM_P;
01435       {
01436         cX = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(V_),indblock1);
01437         cH = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(V_),indblock2);
01438 
01439         std::vector<int> indXHP(nevLocal_);
01440         for (int i=0; i<nevLocal_; i++) {
01441           indXHP[i] = i;
01442         }
01443         cXHP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(V_),indXHP);
01444         cK_XHP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(KV_),indXHP);
01445         if (hasM_) {
01446           cM_XHP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(MV_),indXHP);
01447         }
01448         else {
01449           cM_XHP = cXHP;
01450         }
01451 
01452         std::vector<int> indHP(nevLocal_-blockSize_);
01453         for (int i=blockSize_; i<nevLocal_; i++) {
01454           indHP[i-blockSize_] = i;
01455         }
01456         cHP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(V_),indHP);
01457         cK_HP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(KV_),indHP);
01458         if (hasM_) {
01459           cM_HP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(MV_),indHP);
01460         }
01461         else {
01462           cM_HP = cHP;
01463         }
01464 
01465         if (nevLocal_ == threeBlocks) {
01466           cP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(V_),indblock3);
01467           cK_P = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(KV_),indblock3);
01468           if (hasM_) {
01469             cM_P = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(MV_),indblock3);
01470           }
01471           else {
01472             cM_P = cP;
01473           }
01474         }
01475       }
01476 
01477       //
01478       //----------------------------------------
01479       // Form "local" mass and stiffness matrices
01480       //----------------------------------------
01481       {
01482         // We will form only the block upper triangular part of 
01483         // [X H P]' K [X H P]  and  [X H P]' M [X H P]
01484         // Get the necessary views into KK and MM:
01485         //      [--K1--]        [--M1--]
01486         // KK = [  -K2-]   MM = [  -M2-]
01487         //      [    K3]        [    M3]
01488         // 
01489         // It is okay to declare a zero-area view of a Teuchos::SerialDenseMatrix
01490         //
01491         Teuchos::SerialDenseMatrix<int,ScalarType> 
01492           K1(Teuchos::View,KK,blockSize_,nevLocal_             ,0*blockSize_,0*blockSize_),
01493           K2(Teuchos::View,KK,blockSize_,nevLocal_-1*blockSize_,1*blockSize_,1*blockSize_),
01494           K3(Teuchos::View,KK,blockSize_,nevLocal_-2*blockSize_,2*blockSize_,2*blockSize_),
01495           M1(Teuchos::View,MM,blockSize_,nevLocal_             ,0*blockSize_,0*blockSize_),
01496           M2(Teuchos::View,MM,blockSize_,nevLocal_-1*blockSize_,1*blockSize_,1*blockSize_),
01497           M3(Teuchos::View,MM,blockSize_,nevLocal_-2*blockSize_,2*blockSize_,2*blockSize_);
01498         {
01499           Teuchos::TimeMonitor lcltimer( *timerLocalProj_ );
01500           MVT::MvTransMv( ONE, *cX, *cK_XHP, K1 );
01501           MVT::MvTransMv( ONE, *cX, *cM_XHP, M1 );
01502           MVT::MvTransMv( ONE, *cH, *cK_HP , K2 );
01503           MVT::MvTransMv( ONE, *cH, *cM_HP , M2 );
01504           if (nevLocal_ == threeBlocks) {
01505             MVT::MvTransMv( ONE, *cP, *cK_P, K3 );
01506             MVT::MvTransMv( ONE, *cP, *cM_P, M3 );
01507           }
01508         }
01509       }
01510       // below, we only need bases [X H P] and [H P] and friends
01511       // furthermore, we only need [H P] and friends if fullOrtho == false
01512       // clear the others now
01513       cX   = Teuchos::null;
01514       cH   = Teuchos::null;
01515       cP   = Teuchos::null;
01516       cK_P = Teuchos::null;
01517       cM_P = Teuchos::null;
01518       if (fullOrtho_ == true) {
01519         cHP = Teuchos::null;
01520         cK_HP = Teuchos::null;
01521         cM_HP = Teuchos::null;
01522       }
01523 
01524       //
01525       //---------------------------------------------------
01526       // Perform a spectral decomposition of (KK,MM)
01527       //---------------------------------------------------
01528       //
01529       // Get pointers to relevant part of KK, MM and S for Rayleigh-Ritz analysis
01530       Teuchos::SerialDenseMatrix<int,ScalarType> lclKK(Teuchos::View,KK,nevLocal_,nevLocal_), 
01531                                                  lclMM(Teuchos::View,MM,nevLocal_,nevLocal_),
01532                                                   lclS(Teuchos::View, S,nevLocal_,nevLocal_);
01533       {
01534         Teuchos::TimeMonitor lcltimer( *timerDS_ );
01535         int localSize = nevLocal_;
01536         Utils::directSolver(localSize, lclKK, Teuchos::rcp(&lclMM,false), lclS, theta_, nevLocal_, 0);
01537         // localSize tells directSolver() how big KK,MM are
01538         // however, directSolver() may choose to use only the principle submatrices of KK,MM 
01539         // because of loss of MM-orthogonality in the projected eigenvectors
01540         // nevLocal_ tells us how much it used, telling us the effective localSize 
01541         // (i.e., how much of KK,MM used by directSolver)
01542         // we will not tolerate any indefiniteness, and will throw an exception if it was 
01543         // detected by directSolver
01544         //
01545         if (nevLocal_ != localSize) {
01546           // before throwing the exception, and thereby leaving iterate(), setup the views again
01547           // first, clear the const views
01548           cXHP   = Teuchos::null;
01549           cK_XHP = Teuchos::null;
01550           cM_XHP = Teuchos::null;
01551           cHP    = Teuchos::null;
01552           cK_HP  = Teuchos::null;
01553           cM_HP  = Teuchos::null;
01554           setupViews();
01555         }
01556         TEST_FOR_EXCEPTION(nevLocal_ != localSize, LOBPCGRitzFailure, 
01557             "Anasazi::LOBPCG::iterate(): indefiniteness detected in projected mass matrix." );
01558       }
01559 
01560       //
01561       //---------------------------------------------------
01562       // Sort the ritz values using the sort manager
01563       //---------------------------------------------------
01564       Teuchos::LAPACK<int,ScalarType> lapack;
01565       Teuchos::BLAS<int,ScalarType> blas;
01566       {
01567         Teuchos::TimeMonitor lcltimer( *timerSort_ );
01568 
01569         std::vector<int> order(nevLocal_);
01570         // 
01571         // Sort the first nevLocal_ values in theta_
01572         sm_->sort( this, nevLocal_, theta_, &order );   // don't catch exception
01573         //
01574         // Sort the primitive ritz vectors
01575         Utils::permuteVectors(order,lclS);
01576       }
01577 
01578       //
01579       //----------------------------------------
01580       // Compute coefficients for X and P under [X H P]
01581       //----------------------------------------
01582       // Before computing X,P, optionally perform orthogonalization per Hetmaniuk,Lehoucq paper
01583       // CX will be the coefficients of [X,H,P] for new X, CP for new P
01584       // The paper suggests orthogonalizing CP against CX and orthonormalizing CP, w.r.t. MM
01585       // Here, we will also orthonormalize CX.
01586       // This is accomplished using the Cholesky factorization of [CX CP]^H lclMM [CX CP]
01587       Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> > CX, CP;
01588       if (fullOrtho_) {
01589         // build orthonormal basis for (  0  ) that is MM orthogonal to ( S11 )
01590         //                             ( S21 )                          ( S21 )
01591         //                             ( S31 )                          ( S31 )
01592         // Do this using Cholesky factorization of ( S11  0  )
01593         //                                         ( S21 S21 )
01594         //                                         ( S31 S31 )
01595         //           ( S11  0  )
01596         // Build C = ( S21 S21 )
01597         //           ( S31 S31 )
01598         Teuchos::SerialDenseMatrix<int,ScalarType> C(nevLocal_,twoBlocks),
01599                                                 tmp1(nevLocal_,twoBlocks),
01600                                                 tmp2(twoBlocks  ,twoBlocks);
01601 
01602         // first block of rows: ( S11 0 )
01603         for (int j=0; j<oneBlock; j++) {
01604           for (int i=0; i<oneBlock; i++) {
01605             // CX
01606             C(i,j) = lclS(i,j);
01607             // CP
01608             C(i,j+oneBlock) = ZERO;
01609           }
01610         }
01611         // second block of rows: (S21 S21)
01612         for (int j=0; j<oneBlock; j++) {
01613           for (int i=oneBlock; i<twoBlocks; i++) {
01614             // CX
01615             C(i,j)          = lclS(i,j);
01616             // CP
01617             C(i,j+oneBlock) = lclS(i,j);
01618           }
01619         }
01620         // third block of rows
01621         if (nevLocal_ == threeBlocks) {
01622           for (int j=0; j<oneBlock; j++) {
01623             for (int i=twoBlocks; i<threeBlocks; i++) {
01624               // CX
01625               C(i,j)          = lclS(i,j);
01626               // CP
01627               C(i,j+oneBlock) = lclS(i,j);
01628             }
01629           }
01630         }
01631 
01632         // compute tmp1 = lclMM*C
01633         int teuchosret;
01634         teuchosret = tmp1.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,ONE,lclMM,C,ZERO);
01635         TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,
01636                            "Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply");
01637 
01638         // compute tmp2 = C^H*tmp1 == C^H*lclMM*C
01639         teuchosret = tmp2.multiply(Teuchos::CONJ_TRANS,Teuchos::NO_TRANS,ONE,C,tmp1,ZERO);
01640         TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,
01641                            "Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply");
01642 
01643         // compute R (cholesky) of tmp2
01644         int info;
01645         lapack.POTRF('U',twoBlocks,tmp2.values(),tmp2.stride(),&info);
01646         // our views ARE NOT currently in place; we must reestablish them before throwing an exception
01647         if (info != 0) {
01648           cXHP = Teuchos::null;
01649           cHP = Teuchos::null;
01650           cK_XHP = Teuchos::null;
01651           cK_HP = Teuchos::null;
01652           cM_XHP = Teuchos::null;
01653           cM_HP = Teuchos::null;
01654           setupViews();
01655         }
01656         TEST_FOR_EXCEPTION(info != 0, LOBPCGOrthoFailure, 
01657                            "Anasazi::LOBPCG::iterate(): Cholesky factorization failed during full orthogonalization.");
01658         // compute C = C inv(R)
01659         blas.TRSM(Teuchos::RIGHT_SIDE,Teuchos::UPPER_TRI,Teuchos::NO_TRANS,Teuchos::NON_UNIT_DIAG,
01660                   nevLocal_,twoBlocks,ONE,tmp2.values(),tmp2.stride(),C.values(),C.stride());
01661         // put C(:,0:oneBlock-1) into CX
01662         CX = Teuchos::rcp( new Teuchos::SerialDenseMatrix<int,ScalarType>(Teuchos::Copy,C,nevLocal_,oneBlock,0,0) );
01663         // put C(:,oneBlock:twoBlocks-1) into CP
01664         CP = Teuchos::rcp( new Teuchos::SerialDenseMatrix<int,ScalarType>(Teuchos::Copy,C,nevLocal_,oneBlock,0,oneBlock) );
01665 
01666         // check the results
01667         if (om_->isVerbosity( Debug ) ) {
01668           Teuchos::SerialDenseMatrix<int,ScalarType> tmp1(nevLocal_,oneBlock),
01669                                                      tmp2(oneBlock,oneBlock);
01670           MagnitudeType tmp;
01671           int teuchosret;
01672           std::stringstream os;
01673           os.precision(2);
01674           os.setf(std::ios::scientific, std::ios::floatfield);
01675 
01676           os << " Checking Full Ortho: iteration " << iter_ << std::endl;
01677 
01678           // check CX^T MM CX == I
01679           // compute tmp1 = MM*CX
01680           teuchosret = tmp1.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,ONE,lclMM,*CX,ZERO);
01681           TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,
01682               "Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply");
01683           // compute tmp2 = CX^H*tmp1 == CX^H*MM*CX
01684           teuchosret = tmp2.multiply(Teuchos::CONJ_TRANS,Teuchos::NO_TRANS,ONE,*CX,tmp1,ZERO);
01685           TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,
01686               "Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply");
01687           // subtrace tmp2 - I == CX^H * MM * CX - I
01688           for (int i=0; i<oneBlock; i++) tmp2(i,i) -= ONE;
01689           tmp = tmp2.normFrobenius();          
01690           os << " >> Error in CX^H MM CX == I : " << tmp << std::endl;
01691 
01692           // check CP^T MM CP == I
01693           // compute tmp1 = MM*CP
01694           teuchosret = tmp1.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,ONE,lclMM,*CP,ZERO);
01695           TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,
01696               "Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply");
01697           // compute tmp2 = CP^H*tmp1 == CP^H*MM*CP
01698           teuchosret = tmp2.multiply(Teuchos::CONJ_TRANS,Teuchos::NO_TRANS,ONE,*CP,tmp1,ZERO);
01699           TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,
01700               "Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply");
01701           // subtrace tmp2 - I == CP^H * MM * CP - I
01702           for (int i=0; i<oneBlock; i++) tmp2(i,i) -= ONE;
01703           tmp = tmp2.normFrobenius();          
01704           os << " >> Error in CP^H MM CP == I : " << tmp << std::endl;
01705 
01706           // check CX^T MM CP == 0
01707           // compute tmp1 = MM*CP
01708           teuchosret = tmp1.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,ONE,lclMM,*CP,ZERO);
01709           TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,"Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply");
01710           // compute tmp2 = CX^H*tmp1 == CX^H*MM*CP
01711           teuchosret = tmp2.multiply(Teuchos::CONJ_TRANS,Teuchos::NO_TRANS,ONE,*CX,tmp1,ZERO);
01712           TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,"Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply");
01713           // subtrace tmp2 == CX^H * MM * CP
01714           tmp = tmp2.normFrobenius();          
01715           os << " >> Error in CX^H MM CP == 0 : " << tmp << std::endl;
01716 
01717           os << std::endl;
01718           om_->print(Debug,os.str());
01719         }
01720       }
01721       else {
01722         //     [S11 ... ...]
01723         // S = [S21 ... ...]
01724         //     [S31 ... ...]
01725         //
01726         // CX = [S11]
01727         //      [S21]
01728         //      [S31]   ->  X = [X H P] CX
01729         //      
01730         // CP = [S21]   ->  P =   [H P] CP
01731         //      [S31]
01732         //
01733         CX = Teuchos::rcp( new Teuchos::SerialDenseMatrix<int,ScalarType>(Teuchos::Copy,lclS,nevLocal_         ,oneBlock,0       ,0) );
01734         CP = Teuchos::rcp( new Teuchos::SerialDenseMatrix<int,ScalarType>(Teuchos::Copy,lclS,nevLocal_-oneBlock,oneBlock,oneBlock,0) );
01735       }
01736 
01737       //
01738       //----------------------------------------
01739       // Compute new X and new P
01740       //----------------------------------------
01741       // Note: Use R as a temporary work space and (if full ortho) tmpMV as well
01742       {
01743         Teuchos::TimeMonitor lcltimer( *timerLocalUpdate_ );
01744 
01745         // if full ortho, then CX and CP are dense
01746         // we multiply [X H P]*CX into tmpMV
01747         //             [X H P]*CP into R
01748         // then put V(:,firstblock) <- tmpMV
01749         //          V(:,thirdblock) <- R
01750         //
01751         // if no full ortho, then [H P]*CP doesn't reference first block (X) 
01752         // of V, so that we can modify it before computing P
01753         // so we multiply [X H P]*CX into R
01754         //                V(:,firstblock) <- R
01755         //       multiply [H P]*CP into R
01756         //                V(:,thirdblock) <- R
01757         //
01758         // mutatis mutandis for K[XP] and M[XP]
01759         //
01760         // use SetBlock to do the assignments into V_
01761         //
01762         // in either case, views are only allowed to be overlapping
01763         // if they are const, and it should be assume that SetBlock
01764         // creates a view of the associated part
01765         //
01766         // we have from above const-pointers to [KM]XHP, [KM]HP and (if hasP) [KM]P
01767         //
01768         if (fullOrtho_) {
01769           // X,P
01770           MVT::MvTimesMatAddMv(ONE,*cXHP,*CX,ZERO,*tmpmvec_);
01771           MVT::MvTimesMatAddMv(ONE,*cXHP,*CP,ZERO,*R_);
01772           cXHP = Teuchos::null;
01773           MVT::SetBlock(*tmpmvec_,indblock1,*V_);
01774           MVT::SetBlock(*R_      ,indblock3,*V_);
01775           // KX,KP
01776           MVT::MvTimesMatAddMv(ONE,*cK_XHP,*CX,ZERO,*tmpmvec_);
01777           MVT::MvTimesMatAddMv(ONE,*cK_XHP,*CP,ZERO,*R_);
01778           cK_XHP = Teuchos::null;
01779           MVT::SetBlock(*tmpmvec_,indblock1,*KV_);
01780           MVT::SetBlock(*R_      ,indblock3,*KV_);
01781           // MX,MP
01782           if (hasM_) {
01783             MVT::MvTimesMatAddMv(ONE,*cM_XHP,*CX,ZERO,*tmpmvec_);
01784             MVT::MvTimesMatAddMv(ONE,*cM_XHP,*CP,ZERO,*R_);
01785             cM_XHP = Teuchos::null;
01786             MVT::SetBlock(*tmpmvec_,indblock1,*MV_);
01787             MVT::SetBlock(*R_      ,indblock3,*MV_);
01788           }
01789           else {
01790             cM_XHP = Teuchos::null;
01791           }
01792         }
01793         else {
01794           // X,P
01795           MVT::MvTimesMatAddMv(ONE,*cXHP,*CX,ZERO,*R_);
01796           cXHP = Teuchos::null;
01797           MVT::SetBlock(*R_,indblock1,*V_);
01798           MVT::MvTimesMatAddMv(ONE,*cHP,*CP,ZERO,*R_);
01799           cHP = Teuchos::null;
01800           MVT::SetBlock(*R_,indblock3,*V_);
01801           // KX,KP
01802           MVT::MvTimesMatAddMv(ONE,*cK_XHP,*CX,ZERO,*R_);
01803           cK_XHP = Teuchos::null;
01804           MVT::SetBlock(*R_,indblock1,*KV_);
01805           MVT::MvTimesMatAddMv(ONE,*cK_HP,*CP,ZERO,*R_);
01806           cK_HP = Teuchos::null;
01807           MVT::SetBlock(*R_,indblock3,*KV_);
01808           // MX,MP
01809           if (hasM_) {
01810             MVT::MvTimesMatAddMv(ONE,*cM_XHP,*CX,ZERO,*R_);
01811             cM_XHP = Teuchos::null;
01812             MVT::SetBlock(*R_,indblock1,*MV_);
01813             MVT::MvTimesMatAddMv(ONE,*cM_HP,*CP,ZERO,*R_);
01814             cM_HP = Teuchos::null;
01815             MVT::SetBlock(*R_,indblock3,*MV_);
01816           }
01817           else {
01818             cM_XHP = Teuchos::null;
01819             cM_HP = Teuchos::null;
01820           }
01821         }
01822       } // end timing block
01823       // done with coefficient matrices
01824       CX = Teuchos::null;
01825       CP = Teuchos::null;
01826 
01827       //
01828       // we now have a P direction
01829       hasP_ = true;
01830 
01831       // debugging check: all of our const views should have been cleared by now
01832       // if not, we have a logic error above
01833       TEST_FOR_EXCEPTION(   cXHP != Teuchos::null || cK_XHP != Teuchos::null || cM_XHP != Teuchos::null
01834                           || cHP != Teuchos::null ||  cK_HP != Teuchos::null || cM_HP  != Teuchos::null
01835                           ||  cP != Teuchos::null ||   cK_P != Teuchos::null || cM_P   != Teuchos::null,
01836                           std::logic_error,
01837                           "Anasazi::BlockKrylovSchur::iterate(): const views were not all cleared! Something went wrong!" );
01838 
01839       //
01840       // recreate our const MV views of X,H,P and friends
01841       setupViews();
01842 
01843       //
01844       // Compute the new residuals, explicitly
01845       {
01846         Teuchos::TimeMonitor lcltimer( *timerCompRes_ );
01847         MVT::MvAddMv( ONE, *KX_, ZERO, *KX_, *R_ );
01848         Teuchos::SerialDenseMatrix<int,ScalarType> T( blockSize_, blockSize_ );
01849         for (int i = 0; i < blockSize_; i++) {
01850           T(i,i) = theta_[i];
01851         }
01852         MVT::MvTimesMatAddMv( -ONE, *MX_, T, ONE, *R_ );
01853       }
01854 
01855       // R has been updated; mark the norms as out-of-date
01856       Rnorms_current_ = false;
01857       R2norms_current_ = false;
01858 
01859       // When required, monitor some orthogonalities
01860       if (om_->isVerbosity( Debug ) ) {
01861         // Check almost everything here
01862         CheckList chk;
01863         chk.checkX = true;
01864         chk.checkKX = true;
01865         chk.checkMX = true;
01866         chk.checkP = true;
01867         chk.checkKP = true;
01868         chk.checkMP = true;
01869         chk.checkR = true;
01870         om_->print( Debug, accuracyCheck(chk, ": after local update") );
01871       }
01872       else if (om_->isVerbosity( OrthoDetails )) {
01873         CheckList chk;
01874         chk.checkX = true;
01875         chk.checkP = true;
01876         chk.checkR = true;
01877         om_->print( OrthoDetails, accuracyCheck(chk, ": after local update") );
01878       }
01879     } // end while (statusTest == false)
01880   }
01881 
01882 
01884   // compute/return residual M-norms
01885   template <class ScalarType, class MV, class OP>
01886   std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> 
01887   LOBPCG<ScalarType,MV,OP>::getResNorms() {
01888     if (Rnorms_current_ == false) {
01889       // Update the residual norms
01890       orthman_->norm(*R_,&Rnorms_);
01891       Rnorms_current_ = true;
01892     }
01893     return Rnorms_;
01894   }
01895 
01896   
01898   // compute/return residual 2-norms
01899   template <class ScalarType, class MV, class OP>
01900   std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> 
01901   LOBPCG<ScalarType,MV,OP>::getRes2Norms() {
01902     if (R2norms_current_ == false) {
01903       // Update the residual 2-norms 
01904       MVT::MvNorm(*R_,&R2norms_);
01905       R2norms_current_ = true;
01906     }
01907     return R2norms_;
01908   }
01909 
01910 
01911 
01912 
01914   // Check accuracy, orthogonality, and other debugging stuff
01915   // 
01916   // bools specify which tests we want to run (instead of running more than we actually care about)
01917   //
01918   // we don't bother checking the following because they are computed explicitly:
01919   //    H == Prec*R
01920   //   KH == K*H
01921   //
01922   // 
01923   // checkX : X orthonormal
01924   //          orthogonal to auxvecs
01925   // checkMX: check MX == M*X
01926   // checkKX: check KX == K*X
01927   // checkP : if fullortho P orthonormal and orthogonal to X
01928   //          orthogonal to auxvecs
01929   // checkMP: check MP == M*P
01930   // checkKP: check KP == K*P
01931   // checkH : if fullortho H orthonormal and orthogonal to X and P
01932   //          orthogonal to auxvecs
01933   // checkMH: check MH == M*H
01934   // checkR : check R orthogonal to X
01935   // checkQ : check that auxiliary vectors are actually orthonormal
01936   //
01937   // TODO: 
01938   //  add checkTheta 
01939   //
01940   template <class ScalarType, class MV, class OP>
01941   std::string LOBPCG<ScalarType,MV,OP>::accuracyCheck( const CheckList &chk, const std::string &where ) const 
01942   {
01943     using std::endl;
01944 
01945     std::stringstream os;
01946     os.precision(2);
01947     os.setf(std::ios::scientific, std::ios::floatfield);
01948     MagnitudeType tmp;
01949 
01950     os << " Debugging checks: iteration " << iter_ << where << endl;
01951 
01952     // X and friends
01953     if (chk.checkX && initialized_) {
01954       tmp = orthman_->orthonormError(*X_);
01955       os << " >> Error in X^H M X == I : " << tmp << endl;
01956       for (unsigned int i=0; i<auxVecs_.size(); i++) {
01957         tmp = orthman_->orthogError(*X_,*auxVecs_[i]);
01958         os << " >> Error in X^H M Q[" << i << "] == 0 : " << tmp << endl;
01959       }
01960     }
01961     if (chk.checkMX && hasM_ && initialized_) {
01962       tmp = Utils::errorEquality(*X_, *MX_, MOp_);
01963       os << " >> Error in MX == M*X    : " << tmp << endl;
01964     }
01965     if (chk.checkKX && initialized_) {
01966       tmp = Utils::errorEquality(*X_, *KX_, Op_);
01967       os << " >> Error in KX == K*X    : " << tmp << endl;
01968     }
01969 
01970     // P and friends
01971     if (chk.checkP && hasP_ && initialized_) {
01972       if (fullOrtho_) {
01973         tmp = orthman_->orthonormError(*P_);
01974         os << " >> Error in P^H M P == I : " << tmp << endl;
01975         tmp = orthman_->orthogError(*P_,*X_);
01976         os << " >> Error in P^H M X == 0 : " << tmp << endl;
01977       }
01978       for (unsigned int i=0; i<auxVecs_.size(); i++) {
01979         tmp = orthman_->orthogError(*P_,*auxVecs_[i]);
01980         os << " >> Error in P^H M Q[" << i << "] == 0 : " << tmp << endl;
01981       }
01982     }
01983     if (chk.checkMP && hasM_ && hasP_ && initialized_) {
01984       tmp = Utils::errorEquality(*P_, *MP_, MOp_);
01985       os << " >> Error in MP == M*P    : " << tmp << endl;
01986     }
01987     if (chk.checkKP && hasP_ && initialized_) {
01988       tmp = Utils::errorEquality(*P_, *KP_, Op_);
01989       os << " >> Error in KP == K*P    : " << tmp << endl;
01990     }
01991 
01992     // H and friends
01993     if (chk.checkH && initialized_) {
01994       if (fullOrtho_) {
01995         tmp = orthman_->orthonormError(*H_);
01996         os << " >> Error in H^H M H == I : " << tmp << endl;
01997         tmp = orthman_->orthogError(*H_,*X_);
01998         os << " >> Error in H^H M X == 0 : " << tmp << endl;
01999         if (hasP_) {
02000           tmp = orthman_->orthogError(*H_,*P_);
02001           os << " >> Error in H^H M P == 0 : " << tmp << endl;
02002         }
02003       }
02004       for (unsigned int i=0; i<auxVecs_.size(); i++) {
02005         tmp = orthman_->orthogError(*H_,*auxVecs_[i]);
02006         os << " >> Error in H^H M Q[" << i << "] == 0 : " << tmp << endl;
02007       }
02008     }
02009     if (chk.checkMH && hasM_ && initialized_) {
02010       tmp = Utils::errorEquality(*H_, *MH_, MOp_);
02011       os << " >> Error in MH == M*H    : " << tmp << endl;
02012     }
02013 
02014     // R: this is not M-orthogonality, but standard euclidean orthogonality
02015     if (chk.checkR && initialized_) {
02016       Teuchos::SerialDenseMatrix<int,ScalarType> xTx(blockSize_,blockSize_);
02017       MVT::MvTransMv(ONE,*X_,*R_,xTx);
02018       tmp = xTx.normFrobenius();
02019       MVT::MvTransMv(ONE,*R_,*R_,xTx);
02020       double normR = xTx.normFrobenius();
02021       os << " >> RelError in X^H R == 0: " << tmp/normR << endl;
02022     }
02023 
02024     // Q
02025     if (chk.checkQ) {
02026       for (unsigned int i=0; i<auxVecs_.size(); i++) {
02027         tmp = orthman_->orthonormError(*auxVecs_[i]);
02028         os << " >> Error in Q[" << i << "]^H M Q[" << i << "] == I : " << tmp << endl;
02029         for (unsigned int j=i+1; j<auxVecs_.size(); j++) {
02030           tmp = orthman_->orthogError(*auxVecs_[i],*auxVecs_[j]);
02031           os << " >> Error in Q[" << i << "]^H M Q[" << j << "] == 0 : " << tmp << endl;
02032         }
02033       }
02034     }
02035 
02036     os << endl;
02037 
02038     return os.str();
02039   }
02040 
02041 
02043   // Print the current status of the solver
02044   template <class ScalarType, class MV, class OP>
02045   void 
02046   LOBPCG<ScalarType,MV,OP>::currentStatus(std::ostream &os) 
02047   {
02048     using std::endl;
02049 
02050     os.setf(std::ios::scientific, std::ios::floatfield);  
02051     os.precision(6);
02052     os <<endl;
02053     os <<"================================================================================" << endl;
02054     os << endl;
02055     os <<"                              LOBPCG Solver Status" << endl;
02056     os << endl;
02057     os <<"The solver is "<<(initialized_ ? "initialized." : "not initialized.") << endl;
02058     os <<"The number of iterations performed is " << iter_       << endl;
02059     os <<"The current block size is             " << blockSize_  << endl;
02060     os <<"The number of auxiliary vectors is    " << numAuxVecs_ << endl;
02061     os <<"The number of operations Op*x   is " << count_ApplyOp_   << endl;
02062     os <<"The number of operations M*x    is " << count_ApplyM_    << endl;
02063     os <<"The number of operations Prec*x is " << count_ApplyPrec_ << endl;
02064 
02065     os.setf(std::ios_base::right, std::ios_base::adjustfield);
02066 
02067     if (initialized_) {
02068       os << endl;
02069       os <<"CURRENT EIGENVALUE ESTIMATES             "<<endl;
02070       os << std::setw(20) << "Eigenvalue" 
02071          << std::setw(20) << "Residual(M)"
02072          << std::setw(20) << "Residual(2)"
02073          << endl;
02074       os <<"--------------------------------------------------------------------------------"<<endl;
02075       for (int i=0; i<blockSize_; i++) {
02076         os << std::setw(20) << theta_[i];
02077         if (Rnorms_current_) os << std::setw(20) << Rnorms_[i];
02078         else os << std::setw(20) << "not current";
02079         if (R2norms_current_) os << std::setw(20) << R2norms_[i];
02080         else os << std::setw(20) << "not current";
02081         os << endl;
02082       }
02083     }
02084     os <<"================================================================================" << endl;
02085     os << endl;
02086   }
02087 
02089   // are we initialized or not?
02090   template <class ScalarType, class MV, class OP>
02091   bool LOBPCG<ScalarType,MV,OP>::isInitialized() const { 
02092     return initialized_; 
02093   }
02094 
02095 
02097   // is P valid or not?
02098   template <class ScalarType, class MV, class OP>
02099   bool LOBPCG<ScalarType,MV,OP>::hasP() {
02100     return hasP_;
02101   }
02102   
02104   // is full orthogonalization enabled or not?
02105   template <class ScalarType, class MV, class OP>
02106   bool LOBPCG<ScalarType,MV,OP>::getFullOrtho() const { 
02107     return(fullOrtho_); 
02108   }
02109 
02110   
02112   // return the current auxilliary vectors
02113   template <class ScalarType, class MV, class OP>
02114   Teuchos::Array<Teuchos::RCP<const MV> > LOBPCG<ScalarType,MV,OP>::getAuxVecs() const {
02115     return auxVecs_;
02116   }
02117 
02119   // return the current block size
02120   template <class ScalarType, class MV, class OP>
02121   int LOBPCG<ScalarType,MV,OP>::getBlockSize() const {
02122     return(blockSize_); 
02123   }
02124 
02126   // return the current eigenproblem
02127   template <class ScalarType, class MV, class OP>
02128   const Eigenproblem<ScalarType,MV,OP>& LOBPCG<ScalarType,MV,OP>::getProblem() const { 
02129     return(*problem_); 
02130   }
02131 
02132   
02134   // return the max subspace dimension
02135   template <class ScalarType, class MV, class OP>
02136   int LOBPCG<ScalarType,MV,OP>::getMaxSubspaceDim() const {
02137     return 3*blockSize_;
02138   }
02139 
02141   // return the current subspace dimension
02142   template <class ScalarType, class MV, class OP>
02143   int LOBPCG<ScalarType,MV,OP>::getCurSubspaceDim() const {
02144     if (!initialized_) return 0;
02145     return nevLocal_;
02146   }
02147 
02148   
02150   // return the current ritz residual norms
02151   template <class ScalarType, class MV, class OP>
02152   std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> 
02153   LOBPCG<ScalarType,MV,OP>::getRitzRes2Norms() 
02154   {
02155     return this->getRes2Norms();
02156   }
02157 
02158   
02160   // return the current compression indices
02161   template <class ScalarType, class MV, class OP>
02162   std::vector<int> LOBPCG<ScalarType,MV,OP>::getRitzIndex() {
02163     std::vector<int> ret(nevLocal_,0);
02164     return ret;
02165   }
02166 
02167   
02169   // return the current ritz values
02170   template <class ScalarType, class MV, class OP>
02171   std::vector<Value<ScalarType> > LOBPCG<ScalarType,MV,OP>::getRitzValues() { 
02172     std::vector<Value<ScalarType> > ret(nevLocal_);
02173     for (int i=0; i<nevLocal_; i++) {
02174       ret[i].realpart = theta_[i];
02175       ret[i].imagpart = ZERO;
02176     }
02177     return ret;
02178   }
02179 
02180   
02182   // return the current ritz vectors
02183   template <class ScalarType, class MV, class OP>
02184   Teuchos::RCP<const MV> LOBPCG<ScalarType,MV,OP>::getRitzVectors() {
02185     return X_;
02186   }
02187 
02188   
02190   // reset the iteration counter
02191   template <class ScalarType, class MV, class OP>
02192   void LOBPCG<ScalarType,MV,OP>::resetNumIters() {
02193     iter_=0; 
02194   }
02195 
02196   
02198   // return the number of iterations
02199   template <class ScalarType, class MV, class OP>
02200   int LOBPCG<ScalarType,MV,OP>::getNumIters() const { 
02201     return(iter_); 
02202   }
02203 
02204   
02206   // return the state
02207   template <class ScalarType, class MV, class OP>
02208   LOBPCGState<ScalarType,MV> LOBPCG<ScalarType,MV,OP>::getState() const {
02209     LOBPCGState<ScalarType,MV> state;
02210     state.V = V_;
02211     state.KV = KV_;
02212     state.X = X_;
02213     state.KX = KX_;
02214     state.P = P_;
02215     state.KP = KP_;
02216     state.H = H_;
02217     state.KH = KH_;
02218     state.R = R_;
02219     state.T = Teuchos::rcp(new std::vector<MagnitudeType>(theta_));
02220     if (hasM_) {
02221       state.MV = MV_;
02222       state.MX = MX_;
02223       state.MP = MP_;
02224       state.MH = MH_;
02225     }
02226     else {
02227       state.MX = Teuchos::null;
02228       state.MP = Teuchos::null;
02229       state.MH = Teuchos::null;
02230     }
02231     return state;
02232   }
02233 
02234 } // end Anasazi namespace
02235 
02236 #endif // ANASAZI_LOBPCG_HPP

Generated on Tue Oct 20 12:45:18 2009 for Anasazi by doxygen 1.4.7