AnasaziLOBPCG.hpp

Go to the documentation of this file.
00001 // @HEADER
00002 // ***********************************************************************
00003 //
00004 //                 Anasazi: Block Eigensolvers Package
00005 //                 Copyright (2004) Sandia Corporation
00006 //
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 //
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00025 //
00026 // ***********************************************************************
00027 // @HEADER
00028 
00029 
00034 /*
00035     LOBPCG contains local storage of up to 10*blockSize_ vectors, representing 10 entities
00036       X,H,P,R
00037       KX,KH,KP  (product of K and the above)
00038       MX,MH,MP  (product of M and the above, not allocated if we don't have an M matrix)
00039     If full orthogonalization is enabled, one extra multivector of blockSize_ vectors is required to 
00040     compute the local update of X and P.
00041     
00042     A solver is bound to an eigenproblem at declaration.
00043     Other solver parameters (e.g., block size, auxiliary vectors) can be changed dynamically.
00044     
00045     The orthogonalization manager is used to project away from the auxiliary vectors.
00046     If full orthogonalization is enabled, the orthogonalization manager is also used to construct an M orthonormal basis.
00047     The orthogonalization manager is subclass of MatOrthoManager, which LOBPCG assumes to be defined by the M inner product.
00048     LOBPCG will not work correctly if the orthomanager uses a different inner product.
00049  */
00050 
00051 
00052 #ifndef ANASAZI_LOBPCG_HPP
00053 #define ANASAZI_LOBPCG_HPP
00054 
00055 #include "AnasaziTypes.hpp"
00056 
00057 #include "AnasaziEigensolver.hpp"
00058 #include "AnasaziMultiVecTraits.hpp"
00059 #include "AnasaziOperatorTraits.hpp"
00060 #include "Teuchos_ScalarTraits.hpp"
00061 
00062 #include "AnasaziMatOrthoManager.hpp"
00063 #include "AnasaziSolverUtils.hpp"
00064 
00065 #include "Teuchos_LAPACK.hpp"
00066 #include "Teuchos_BLAS.hpp"
00067 #include "Teuchos_SerialDenseMatrix.hpp"
00068 #include "Teuchos_ParameterList.hpp"
00069 #include "Teuchos_TimeMonitor.hpp"
00070 
00091 namespace Anasazi {
00092 
00094 
00095 
00100   template <class ScalarType, class MultiVector>
00101   struct LOBPCGState {
00103     Teuchos::RCP<const MultiVector> V; 
00105     Teuchos::RCP<const MultiVector> KV; 
00107     Teuchos::RCP<const MultiVector> MV;
00108 
00110     Teuchos::RCP<const MultiVector> X; 
00112     Teuchos::RCP<const MultiVector> KX; 
00114     Teuchos::RCP<const MultiVector> MX;
00115 
00117     Teuchos::RCP<const MultiVector> P; 
00119     Teuchos::RCP<const MultiVector> KP; 
00121     Teuchos::RCP<const MultiVector> MP;
00122 
00127     Teuchos::RCP<const MultiVector> H; 
00129     Teuchos::RCP<const MultiVector> KH; 
00131     Teuchos::RCP<const MultiVector> MH;
00132 
00134     Teuchos::RCP<const MultiVector> R;
00135 
00137     Teuchos::RCP<const std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> > T;
00138 
00139     LOBPCGState() : 
00140       V(Teuchos::null),KV(Teuchos::null),MV(Teuchos::null),
00141       X(Teuchos::null),KX(Teuchos::null),MX(Teuchos::null),
00142       P(Teuchos::null),KP(Teuchos::null),MP(Teuchos::null),
00143       H(Teuchos::null),KH(Teuchos::null),MH(Teuchos::null),
00144       R(Teuchos::null),T(Teuchos::null) {};
00145   };
00146 
00148 
00150 
00151 
00165   class LOBPCGRitzFailure : public AnasaziError {public:
00166     LOBPCGRitzFailure(const std::string& what_arg) : AnasaziError(what_arg)
00167     {}};
00168 
00181   class LOBPCGInitFailure : public AnasaziError {public:
00182     LOBPCGInitFailure(const std::string& what_arg) : AnasaziError(what_arg)
00183     {}};
00184 
00201   class LOBPCGOrthoFailure : public AnasaziError {public:
00202     LOBPCGOrthoFailure(const std::string& what_arg) : AnasaziError(what_arg)
00203     {}};
00204 
00206 
00207 
00208   template <class ScalarType, class MV, class OP>
00209   class LOBPCG : public Eigensolver<ScalarType,MV,OP> { 
00210   public:
00211     
00213 
00214     
00222     LOBPCG( const Teuchos::RCP<Eigenproblem<ScalarType,MV,OP> > &problem, 
00223             const Teuchos::RCP<SortManager<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> > &sorter,
00224             const Teuchos::RCP<OutputManager<ScalarType> > &printer,
00225             const Teuchos::RCP<StatusTest<ScalarType,MV,OP> > &tester,
00226             const Teuchos::RCP<MatOrthoManager<ScalarType,MV,OP> > &ortho,
00227             Teuchos::ParameterList &params 
00228           );
00229     
00231     virtual ~LOBPCG() {};
00232 
00234 
00236 
00237 
00264     void iterate();
00265 
00291     void initialize(LOBPCGState<ScalarType,MV> newstate);
00292 
00296     void initialize();
00297 
00317     bool isInitialized() const;
00318 
00329     LOBPCGState<ScalarType,MV> getState() const;
00330 
00332 
00334 
00335 
00337     int getNumIters() const;
00338 
00340     void resetNumIters();
00341 
00349     Teuchos::RCP<const MV> getRitzVectors();
00350 
00356     std::vector<Value<ScalarType> > getRitzValues();
00357 
00365     std::vector<int> getRitzIndex();
00366 
00367 
00373     std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> getResNorms();
00374 
00375 
00381     std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> getRes2Norms();
00382 
00383 
00391     std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> getRitzRes2Norms();
00392 
00393 
00402     int getCurSubspaceDim() const;
00403 
00407     int getMaxSubspaceDim() const;
00408 
00410 
00412 
00413 
00415     void setStatusTest(Teuchos::RCP<StatusTest<ScalarType,MV,OP> > test);
00416 
00418     Teuchos::RCP<StatusTest<ScalarType,MV,OP> > getStatusTest() const;
00419 
00421     const Eigenproblem<ScalarType,MV,OP>& getProblem() const;
00422 
00423 
00432     void setBlockSize(int blockSize);
00433 
00434 
00436     int getBlockSize() const;
00437 
00438 
00450     void setAuxVecs(const Teuchos::Array<Teuchos::RCP<const MV> > &auxvecs);
00451 
00453     Teuchos::Array<Teuchos::RCP<const MV> > getAuxVecs() const;
00454 
00456 
00458 
00459 
00465     void setFullOrtho(bool fullOrtho);
00466 
00468     bool getFullOrtho() const;
00469     
00471     bool hasP();
00472 
00474     
00476 
00477 
00479     void currentStatus(std::ostream &os);
00480 
00482 
00483   private:
00484     //
00485     //
00486     //
00487     void setupViews();
00488     //
00489     // Convenience typedefs
00490     //
00491     typedef SolverUtils<ScalarType,MV,OP> Utils;
00492     typedef MultiVecTraits<ScalarType,MV> MVT;
00493     typedef OperatorTraits<ScalarType,MV,OP> OPT;
00494     typedef Teuchos::ScalarTraits<ScalarType> SCT;
00495     typedef typename SCT::magnitudeType MagnitudeType;
00496     const MagnitudeType ONE;  
00497     const MagnitudeType ZERO; 
00498     const MagnitudeType NANVAL;
00499     //
00500     // Internal structs
00501     //
00502     struct CheckList {
00503       bool checkX, checkMX, checkKX;
00504       bool checkH, checkMH;
00505       bool checkP, checkMP, checkKP;
00506       bool checkR, checkQ;
00507       CheckList() : checkX(false),checkMX(false),checkKX(false),
00508                     checkH(false),checkMH(false),
00509                     checkP(false),checkMP(false),checkKP(false),
00510                     checkR(false),checkQ(false) {};
00511     };
00512     //
00513     // Internal methods
00514     //
00515     std::string accuracyCheck(const CheckList &chk, const std::string &where) const;
00516     //
00517     // Classes inputed through constructor that define the eigenproblem to be solved.
00518     //
00519     const Teuchos::RCP<Eigenproblem<ScalarType,MV,OP> >     problem_;
00520     const Teuchos::RCP<SortManager<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> > sm_;
00521     const Teuchos::RCP<OutputManager<ScalarType> >          om_;
00522     Teuchos::RCP<StatusTest<ScalarType,MV,OP> >       tester_;
00523     const Teuchos::RCP<MatOrthoManager<ScalarType,MV,OP> >  orthman_;
00524     //
00525     // Information obtained from the eigenproblem
00526     //
00527     Teuchos::RCP<const OP> Op_;
00528     Teuchos::RCP<const OP> MOp_;
00529     Teuchos::RCP<const OP> Prec_;
00530     bool hasM_;
00531     //
00532     // Internal timers
00533     //
00534     Teuchos::RCP<Teuchos::Time> timerOp_, timerMOp_, timerPrec_,
00535                                 timerSort_, 
00536                                 timerLocalProj_, timerDS_,
00537                                 timerLocalUpdate_, timerCompRes_,
00538                                 timerOrtho_, timerInit_;
00539     //
00540     // Counters
00541     //
00542     // Number of operator applications
00543     int count_ApplyOp_, count_ApplyM_, count_ApplyPrec_;
00544 
00545     //
00546     // Algorithmic parameters.
00547     //
00548     // blockSize_ is the solver block size
00549     int blockSize_;
00550     //
00551     // fullOrtho_ dictates whether the orthogonalization procedures specified by Hetmaniuk and Lehoucq should
00552     // be activated (see citations at the top of this file)
00553     bool fullOrtho_;
00554 
00555     //
00556     // Current solver state
00557     //
00558     // initialized_ specifies that the basis vectors have been initialized and the iterate() routine
00559     // is capable of running; _initialize is controlled  by the initialize() member method
00560     // For the implications of the state of initialized_, please see documentation for initialize()
00561     bool initialized_;
00562     //
00563     // nevLocal_ reflects how much of the current basis is valid (0 <= nevLocal_ <= 3*blockSize_)
00564     // this tells us how many of the values in theta_ are valid Ritz values
00565     int nevLocal_;
00566     //
00567     // hasP_ tells us whether there is valid data in P (and KP,MP)
00568     bool hasP_;
00569     //
00570     // State Multivecs
00571     // V_, KV_ MV_  and  R_ are primary pointers to allocated multivectors
00572     Teuchos::RCP<MV> V_, KV_, MV_, R_;
00573     // the rest are multivector views into V_, KV_ and MV_
00574     Teuchos::RCP<MV> X_, KX_, MX_,
00575                      H_, KH_, MH_,
00576                      P_, KP_, MP_;
00577 
00578     //
00579     // if fullOrtho_ == true, then we must produce the following on every iteration:
00580     // [newX newP] = [X H P] [CX;CP]
00581     // the structure of [CX;CP] when using full orthogonalization does not allow us to 
00582     // do this in situ, and R_ does not have enough storage for newX and newP. therefore, 
00583     // we must allocate additional storage for this.
00584     // otherwise, when not using full orthogonalization, the structure
00585     // [newX newP] = [X H P] [CX1  0 ]
00586     //                       [CX2 CP2]  allows us to work using only R as work space
00587     //                       [CX3 CP3] 
00588     Teuchos::RCP<MV> tmpmvec_;        
00589     // 
00590     // auxiliary vectors
00591     Teuchos::Array<Teuchos::RCP<const MV> > auxVecs_;
00592     int numAuxVecs_;
00593     //
00594     // Number of iterations that have been performed.
00595     int iter_;
00596     // 
00597     // Current eigenvalues, residual norms
00598     std::vector<MagnitudeType> theta_, Rnorms_, R2norms_;
00599     // 
00600     // are the residual norms current with the residual?
00601     bool Rnorms_current_, R2norms_current_;
00602 
00603   };
00604 
00605 
00606 
00607 
00609   // Constructor
00610   template <class ScalarType, class MV, class OP>
00611   LOBPCG<ScalarType,MV,OP>::LOBPCG(
00612         const Teuchos::RCP<Eigenproblem<ScalarType,MV,OP> > &problem, 
00613         const Teuchos::RCP<SortManager<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> > &sorter,
00614         const Teuchos::RCP<OutputManager<ScalarType> > &printer,
00615         const Teuchos::RCP<StatusTest<ScalarType,MV,OP> > &tester,
00616         const Teuchos::RCP<MatOrthoManager<ScalarType,MV,OP> > &ortho,
00617         Teuchos::ParameterList &params
00618         ) :
00619     ONE(Teuchos::ScalarTraits<MagnitudeType>::one()),
00620     ZERO(Teuchos::ScalarTraits<MagnitudeType>::zero()),
00621     NANVAL(Teuchos::ScalarTraits<MagnitudeType>::nan()),
00622     // problem, tools
00623     problem_(problem), 
00624     sm_(sorter),
00625     om_(printer),
00626     tester_(tester),
00627     orthman_(ortho),
00628     // timers, counters
00629     timerOp_(Teuchos::TimeMonitor::getNewTimer("LOBPCG::Operation Op*x")),
00630     timerMOp_(Teuchos::TimeMonitor::getNewTimer("LOBPCG::Operation M*x")),
00631     timerPrec_(Teuchos::TimeMonitor::getNewTimer("LOBPCG::Operation Prec*x")),
00632     timerSort_(Teuchos::TimeMonitor::getNewTimer("LOBPCG::Sorting eigenvalues")),
00633     timerLocalProj_(Teuchos::TimeMonitor::getNewTimer("LOBPCG::Local projection")),
00634     timerDS_(Teuchos::TimeMonitor::getNewTimer("LOBPCG::Direct solve")),
00635     timerLocalUpdate_(Teuchos::TimeMonitor::getNewTimer("LOBPCG::Local update")),
00636     timerCompRes_(Teuchos::TimeMonitor::getNewTimer("LOBPCG::Computing residuals")),
00637     timerOrtho_(Teuchos::TimeMonitor::getNewTimer("LOBPCG::Orthogonalization")),
00638     timerInit_(Teuchos::TimeMonitor::getNewTimer("LOBPCG::Initialization")),
00639     count_ApplyOp_(0),
00640     count_ApplyM_(0),
00641     count_ApplyPrec_(0),
00642     // internal data
00643     blockSize_(0),
00644     fullOrtho_(params.get("Full Ortho", true)),
00645     initialized_(false),
00646     nevLocal_(0),
00647     hasP_(false),
00648     auxVecs_( Teuchos::Array<Teuchos::RCP<const MV> >(0) ), 
00649     numAuxVecs_(0),
00650     iter_(0),
00651     Rnorms_current_(false),
00652     R2norms_current_(false)
00653   {     
00654     TEST_FOR_EXCEPTION(problem_ == Teuchos::null,std::invalid_argument,
00655                        "Anasazi::LOBPCG::constructor: user passed null problem pointer.");
00656     TEST_FOR_EXCEPTION(sm_ == Teuchos::null,std::invalid_argument,
00657                        "Anasazi::LOBPCG::constructor: user passed null sort manager pointer.");
00658     TEST_FOR_EXCEPTION(om_ == Teuchos::null,std::invalid_argument,
00659                        "Anasazi::LOBPCG::constructor: user passed null output manager pointer.");
00660     TEST_FOR_EXCEPTION(tester_ == Teuchos::null,std::invalid_argument,
00661                        "Anasazi::LOBPCG::constructor: user passed null status test pointer.");
00662     TEST_FOR_EXCEPTION(orthman_ == Teuchos::null,std::invalid_argument,
00663                        "Anasazi::LOBPCG::constructor: user passed null orthogonalization manager pointer.");
00664     TEST_FOR_EXCEPTION(problem_->isProblemSet() == false, std::invalid_argument,
00665                        "Anasazi::LOBPCG::constructor: problem is not set.");
00666     TEST_FOR_EXCEPTION(problem_->isHermitian() == false, std::invalid_argument,
00667                        "Anasazi::LOBPCG::constructor: problem is not Hermitian; LOBPCG requires Hermitian problem.");
00668 
00669     // get the problem operators
00670     Op_   = problem_->getOperator();
00671     TEST_FOR_EXCEPTION(Op_ == Teuchos::null, std::invalid_argument,
00672                        "Anasazi::LOBPCG::constructor: problem provides no operator.");
00673     MOp_  = problem_->getM();
00674     Prec_ = problem_->getPrec();
00675     hasM_ = (MOp_ != Teuchos::null);
00676 
00677     // set the block size and allocate data
00678     int bs = params.get("Block Size", problem_->getNEV());
00679     setBlockSize(bs);
00680   }
00681 
00682 
00684   // Set the block size and make necessary adjustments.
00685   template <class ScalarType, class MV, class OP>
00686   void LOBPCG<ScalarType,MV,OP>::setBlockSize (int newBS) 
00687   {
00688     // time spent here counts towards timerInit_
00689     Teuchos::TimeMonitor lcltimer( *timerInit_ );
00690 
00691     // This routine only allocates space; it doesn't not perform any computation
00692     // if size is decreased, take the first newBS vectors of all and leave state as is
00693     // otherwise, grow/allocate space and set solver to unitialized
00694 
00695     Teuchos::RCP<const MV> tmp;
00696     // grab some Multivector to Clone
00697     // in practice, getInitVec() should always provide this, but it is possible to use a 
00698     // Eigenproblem with nothing in getInitVec() by manually initializing with initialize(); 
00699     // in case of that strange scenario, we will try to Clone from R_ because it is smaller 
00700     // than V_, and we don't want to keep V_ around longer than necessary
00701     if (blockSize_ > 0) {
00702       tmp = R_;
00703     }
00704     else {
00705       tmp = problem_->getInitVec();
00706       TEST_FOR_EXCEPTION(tmp == Teuchos::null,std::logic_error,
00707                          "Anasazi::LOBPCG::setBlockSize(): eigenproblem did not specify initial vectors to clone from.");
00708     }
00709     
00710     TEST_FOR_EXCEPTION(newBS <= 0 || newBS > MVT::GetVecLength(*tmp), std::invalid_argument, "Anasazi::LOBPCG::setBlockSize(): block size must be strictly positive.");
00711     if (newBS == blockSize_) {
00712       // do nothing
00713       return;
00714     }
00715     else if (newBS < blockSize_ && initialized_) {
00716       //
00717       // shrink vectors
00718 
00719       // release views so we can modify the bases
00720       X_ = Teuchos::null;
00721       KX_ = Teuchos::null;
00722       MX_ = Teuchos::null;
00723       H_ = Teuchos::null;
00724       KH_ = Teuchos::null;
00725       MH_ = Teuchos::null;
00726       P_ = Teuchos::null;
00727       KP_ = Teuchos::null;
00728       MP_ = Teuchos::null;
00729 
00730       // make new indices vectors
00731       std::vector<int> newind(newBS), oldind(newBS);
00732       for (int i=0; i<newBS; i++) {
00733         newind[i] = i;
00734         oldind[i] = i;
00735       }
00736 
00737       Teuchos::RCP<MV> newV, newMV, newKV, newR;
00738       Teuchos::RCP<const MV> src;
00739       // allocate R and newV
00740       newR = MVT::Clone(*tmp,newBS);
00741       newV = MVT::Clone(*tmp,newBS*3);
00742       newKV = MVT::Clone(*tmp,newBS*3);
00743       if (hasM_) {
00744         newMV = MVT::Clone(*tmp,newBS*3);
00745       }
00746 
00747       //
00748       // if we are initialized, we want to pull the data from V_ into newV:
00749       //           bs  |  bs  | bs 
00750       // newV  = [newX | **** |newP ]
00751       // newKV = [newKX| **** |newKP]
00752       // newMV = [newMX| **** |newMP]
00753       // where 
00754       //          oldbs   |  oldbs  |   oldbs   
00755       //  V_ = [newX  *** | ******* | newP  ***]
00756       // KV_ = [newKX *** | ******* | newKP ***]
00757       // MV_ = [newMX *** | ******* | newMP ***]
00758       //
00759       // we don't care to copy the data corresponding to H
00760       // we will not copy the M data if !hasM_, because it doesn't exist
00761       //
00762 
00763       // these are shrink operations which preserve their data
00764       theta_.resize(3*newBS);
00765       Rnorms_.resize(newBS);
00766       R2norms_.resize(newBS);
00767 
00768       // copy residual vectors: oldind,newind currently contains [0,...,newBS-1]
00769       src = MVT::CloneView(*R_,newind);
00770       MVT::SetBlock(*src,newind,*newR);
00771       // free old memory and point to new memory
00772       R_ = newR;
00773 
00774       // copy in order: newX newKX newMX, then newP newKP newMP
00775       // for  X: [0,bs-1] <-- [0,bs-1] 
00776       src = MVT::CloneView(*V_,oldind);
00777       MVT::SetBlock(*src,newind,*newV);
00778       src = MVT::CloneView(*KV_,oldind);
00779       MVT::SetBlock(*src,newind,*newKV);
00780       if (hasM_) {
00781         src = MVT::CloneView(*MV_,oldind);
00782         MVT::SetBlock(*src,newind,*newMV);
00783       }
00784       // for  P: [2*bs, 3*bs-1] <-- [2*oldbs, 2*oldbs+bs-1] 
00785       for (int i=0; i<newBS; i++) {
00786         newind[i] += 2*newBS;
00787         oldind[i] += 2*blockSize_;
00788       }
00789       src = MVT::CloneView(*V_,oldind);
00790       MVT::SetBlock(*src,newind,*newV);
00791       src = MVT::CloneView(*KV_,oldind);
00792       MVT::SetBlock(*src,newind,*newKV);
00793       if (hasM_) {
00794         src = MVT::CloneView(*MV_,oldind);
00795         MVT::SetBlock(*src,newind,*newMV);
00796       }
00797 
00798       // release temp view
00799       src = Teuchos::null;
00800 
00801       // release old allocations and point at new memory
00802       V_ = newV;
00803       KV_ = newKV;
00804       if (hasM_) {
00805         MV_ = newMV;
00806       }
00807       else {
00808         MV_ = V_;
00809       }
00810     }
00811     else {  
00812       // newBS > blockSize_  or  not initialized
00813       // this is also the scenario for our initial call to setBlockSize(), in the constructor
00814       initialized_ = false;
00815       hasP_ = false;
00816 
00817       // release views
00818       X_ = Teuchos::null;
00819       KX_ = Teuchos::null;
00820       MX_ = Teuchos::null;
00821       H_ = Teuchos::null;
00822       KH_ = Teuchos::null;
00823       MH_ = Teuchos::null;
00824       P_ = Teuchos::null;
00825       KP_ = Teuchos::null;
00826       MP_ = Teuchos::null;
00827 
00828       // free allocated storage
00829       R_ = Teuchos::null;
00830       V_ = Teuchos::null;
00831 
00832       // allocate scalar vectors
00833       theta_.resize(3*newBS,NANVAL);
00834       Rnorms_.resize(newBS,NANVAL);
00835       R2norms_.resize(newBS,NANVAL);
00836       
00837       // clone multivectors off of tmp
00838       R_ = MVT::Clone(*tmp,newBS);
00839       V_ = MVT::Clone(*tmp,newBS*3);
00840       KV_ = MVT::Clone(*tmp,newBS*3);
00841       if (hasM_) {
00842         MV_ = MVT::Clone(*tmp,newBS*3);
00843       }
00844       else {
00845         MV_ = V_;
00846       }
00847     }
00848 
00849     // allocate tmp space
00850     tmpmvec_ = Teuchos::null;
00851     if (fullOrtho_) {
00852       tmpmvec_ = MVT::Clone(*tmp,newBS);
00853     }
00854 
00855     // set new block size
00856     blockSize_ = newBS;
00857 
00858     // setup new views
00859     setupViews();
00860   }
00861 
00862 
00864   // Setup views into V,KV,MV
00865   template <class ScalarType, class MV, class OP>
00866   void LOBPCG<ScalarType,MV,OP>::setupViews() 
00867   {
00868     std::vector<int> ind(blockSize_);
00869 
00870     for (int i=0; i<blockSize_; i++) {
00871       ind[i] = i;
00872     }
00873     X_  = MVT::CloneViewNonConst(*V_,ind);
00874     KX_ = MVT::CloneViewNonConst(*KV_,ind);
00875     if (hasM_) {
00876       MX_ = MVT::CloneViewNonConst(*MV_,ind);
00877     }
00878     else {
00879       MX_ = X_;
00880     }
00881 
00882     for (int i=0; i<blockSize_; i++) {
00883       ind[i] += blockSize_;
00884     }
00885     H_  = MVT::CloneViewNonConst(*V_,ind);
00886     KH_ = MVT::CloneViewNonConst(*KV_,ind);
00887     if (hasM_) {
00888       MH_ = MVT::CloneViewNonConst(*MV_,ind);
00889     }
00890     else {
00891       MH_ = H_;
00892     }
00893 
00894     for (int i=0; i<blockSize_; i++) {
00895       ind[i] += blockSize_;
00896     }
00897     P_  = MVT::CloneViewNonConst(*V_,ind);
00898     KP_ = MVT::CloneViewNonConst(*KV_,ind);
00899     if (hasM_) {
00900       MP_ = MVT::CloneViewNonConst(*MV_,ind);
00901     }
00902     else {
00903       MP_ = P_;
00904     }
00905   }
00906 
00907 
00909   // Set the auxiliary vectors
00910   template <class ScalarType, class MV, class OP>
00911   void LOBPCG<ScalarType,MV,OP>::setAuxVecs(const Teuchos::Array<Teuchos::RCP<const MV> > &auxvecs) {
00912     typedef typename Teuchos::Array<Teuchos::RCP<const MV> >::iterator tarcpmv;
00913 
00914     // set new auxiliary vectors
00915     auxVecs_ = auxvecs;
00916     
00917     numAuxVecs_ = 0;
00918     for (tarcpmv i=auxVecs_.begin(); i != auxVecs_.end(); i++) {
00919       numAuxVecs_ += MVT::GetNumberVecs(**i);
00920     }
00921     
00922     // If the solver has been initialized, X and P are not necessarily orthogonal to new auxiliary vectors
00923     if (numAuxVecs_ > 0 && initialized_) {
00924       initialized_ = false;
00925       hasP_ = false;
00926     }
00927 
00928     if (om_->isVerbosity( Debug ) ) {
00929       // Check almost everything here
00930       CheckList chk;
00931       chk.checkQ = true;
00932       om_->print( Debug, accuracyCheck(chk, ": in setAuxVecs()") );
00933     }
00934   }
00935 
00936 
00938   /* Initialize the state of the solver
00939    * 
00940    * POST-CONDITIONS:
00941    *
00942    * initialized_ == true
00943    * X is orthonormal, orthogonal to auxVecs_
00944    * KX = Op*X
00945    * MX = M*X if hasM_
00946    * theta_ contains Ritz values of X
00947    * R = KX - MX*diag(theta_)
00948    * if hasP() == true,
00949    *   P orthogonal to auxVecs_
00950    *   if fullOrtho_ == true,
00951    *     P orthonormal and orthogonal to X
00952    *   KP = Op*P
00953    *   MP = M*P
00954    */
00955   template <class ScalarType, class MV, class OP>
00956   void LOBPCG<ScalarType,MV,OP>::initialize(LOBPCGState<ScalarType,MV> newstate)
00957   {
00958     // NOTE: memory has been allocated by setBlockSize(). Use SetBlock below; do not Clone
00959     // NOTE: Overall time spent in this routine is counted to timerInit_; portions will also be counted towards other primitives
00960 
00961     Teuchos::TimeMonitor inittimer( *timerInit_ );
00962 
00963     std::vector<int> bsind(blockSize_);
00964     for (int i=0; i<blockSize_; i++) bsind[i] = i;
00965 
00966     // in LOBPCG, X (the subspace iterate) is primary
00967     // the order of dependence follows like so.
00968     // --init->                 X
00969     //    --op apply->          MX,KX
00970     //       --ritz analysis->  theta
00971     //          --optional->    P,MP,KP
00972     // 
00973     // if the user specifies all data for a level, we will accept it.
00974     // otherwise, we will generate the whole level, and all subsequent levels.
00975     //
00976     // the data members are ordered based on dependence, and the levels are
00977     // partitioned according to the amount of work required to produce the
00978     // items in a level.
00979     //
00980     // inconsitent multivectors widths and lengths will not be tolerated, and
00981     // will be treated with exceptions.
00982 
00983     // set up X, KX, MX: get them from "state" if user specified them
00984 
00985     //----------------------------------------
00986     // set up X, MX, KX
00987     //----------------------------------------
00988     if (newstate.X != Teuchos::null) {
00989       TEST_FOR_EXCEPTION( MVT::GetVecLength(*newstate.X) != MVT::GetVecLength(*X_),
00990                           std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.X not correct." );
00991       // newstate.X must have blockSize_ vectors; any more will be ignored
00992       TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.X) < blockSize_,
00993                           std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.X must have at least block size vectors.");
00994 
00995       // put X data in X_
00996       MVT::SetBlock(*newstate.X,bsind,*X_);
00997 
00998       // put MX data in MX_
00999       if (hasM_) {
01000         if (newstate.MX != Teuchos::null) {
01001           TEST_FOR_EXCEPTION( MVT::GetVecLength(*newstate.MX) != MVT::GetVecLength(*MX_),
01002                               std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.MX not correct." );
01003           // newstate.MX must have blockSize_ vectors; any more will be ignored
01004           TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.MX) < blockSize_,
01005                               std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.MX must have at least block size vectors.");
01006           MVT::SetBlock(*newstate.MX,bsind,*MX_);
01007         }
01008         else {
01009           // user didn't specify MX, compute it
01010           {
01011             Teuchos::TimeMonitor lcltimer( *timerMOp_ );
01012             OPT::Apply(*MOp_,*X_,*MX_);
01013             count_ApplyM_ += blockSize_;
01014           }
01015           // we generated MX; we will generate R as well
01016           newstate.R = Teuchos::null;
01017         }
01018       }
01019   
01020       // put data in KX
01021       if (newstate.KX != Teuchos::null) {
01022         TEST_FOR_EXCEPTION( MVT::GetVecLength(*newstate.KX) != MVT::GetVecLength(*KX_),
01023                             std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.KX not correct." );
01024         // newstate.KX must have blockSize_ vectors; any more will be ignored
01025         TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.KX) < blockSize_,
01026                             std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.KX must have at least block size vectors.");
01027         MVT::SetBlock(*newstate.KX,bsind,*KX_);
01028       }
01029       else {
01030         // user didn't specify KX, compute it
01031         {
01032           Teuchos::TimeMonitor lcltimer( *timerOp_ );
01033           OPT::Apply(*Op_,*X_,*KX_);
01034           count_ApplyOp_ += blockSize_;
01035         }
01036         // we generated KX; we will generate R as well
01037         newstate.R = Teuchos::null;
01038       }
01039     }
01040     else {
01041       // user did not specify X
01042       // we will initialize X, compute KX and MX, and compute R
01043       //
01044       // clear state so we won't use any data from it below
01045       newstate.P  = Teuchos::null;
01046       newstate.KP = Teuchos::null;
01047       newstate.MP = Teuchos::null;
01048       newstate.R  = Teuchos::null;
01049       newstate.T  = Teuchos::null;
01050 
01051       // generate a basis and projectAndNormalize
01052       Teuchos::RCP<const MV> ivec = problem_->getInitVec();
01053       TEST_FOR_EXCEPTION(ivec == Teuchos::null,std::logic_error,
01054                          "Anasazi::LOBPCG::initialize(): Eigenproblem did not specify initial vectors to clone from.");
01055 
01056       int initSize = MVT::GetNumberVecs(*ivec);
01057       if (initSize > blockSize_) {
01058         // we need only the first blockSize_ vectors from ivec; get a view of them
01059         initSize = blockSize_;
01060         std::vector<int> ind(blockSize_);
01061         for (int i=0; i<blockSize_; i++) ind[i] = i;
01062         ivec = MVT::CloneView(*ivec,ind);
01063       }
01064 
01065       // assign ivec to first part of X_ 
01066       if (initSize > 0) {
01067         std::vector<int> ind(initSize);
01068         for (int i=0; i<initSize; i++) ind[i] = i;
01069         MVT::SetBlock(*ivec,ind,*X_);
01070       }
01071       // fill the rest of X_ with random
01072       if (blockSize_ > initSize) {
01073         std::vector<int> ind(blockSize_ - initSize);
01074         for (int i=0; i<blockSize_ - initSize; i++) ind[i] = initSize + i;
01075         Teuchos::RCP<MV> rX = MVT::CloneViewNonConst(*X_,ind);
01076         MVT::MvRandom(*rX);
01077         rX = Teuchos::null;
01078       }
01079 
01080       // put data in MX
01081       if (hasM_) {
01082         Teuchos::TimeMonitor lcltimer( *timerMOp_ );
01083         OPT::Apply(*MOp_,*X_,*MX_);
01084         count_ApplyM_ += blockSize_;
01085       }
01086   
01087       // remove auxVecs from X_ and normalize it
01088       if (numAuxVecs_ > 0) {
01089         Teuchos::TimeMonitor lcltimer( *timerOrtho_ );
01090         Teuchos::Array<Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> > > dummy;
01091         int rank = orthman_->projectAndNormalizeMat(*X_,auxVecs_,dummy,Teuchos::null,MX_);
01092         TEST_FOR_EXCEPTION(rank != blockSize_, LOBPCGInitFailure,
01093                            "Anasazi::LOBPCG::initialize(): Couldn't generate initial basis of full rank.");
01094       }
01095       else {
01096         Teuchos::TimeMonitor lcltimer( *timerOrtho_ );
01097         int rank = orthman_->normalizeMat(*X_,Teuchos::null,MX_);
01098         TEST_FOR_EXCEPTION(rank != blockSize_, LOBPCGInitFailure,
01099                            "Anasazi::LOBPCG::initialize(): Couldn't generate initial basis of full rank.");
01100       }
01101 
01102       // put data in KX
01103       {
01104         Teuchos::TimeMonitor lcltimer( *timerOp_ );
01105         OPT::Apply(*Op_,*X_,*KX_);
01106         count_ApplyOp_ += blockSize_;
01107       }
01108     } // end if (newstate.X != Teuchos::null)
01109 
01110 
01111     //----------------------------------------
01112     // set up Ritz values
01113     //----------------------------------------
01114     theta_.resize(3*blockSize_,NANVAL);
01115     if (newstate.T != Teuchos::null) {
01116       TEST_FOR_EXCEPTION( (signed int)(newstate.T->size()) < blockSize_,
01117                           std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.T must contain at least block size Ritz values.");
01118       for (int i=0; i<blockSize_; i++) {
01119         theta_[i] = (*newstate.T)[i];
01120       }
01121     }
01122     else {
01123       // get ritz vecs/vals
01124       Teuchos::SerialDenseMatrix<int,ScalarType> KK(blockSize_,blockSize_),
01125                                                  MM(blockSize_,blockSize_),
01126                                                   S(blockSize_,blockSize_);
01127       {
01128         Teuchos::TimeMonitor lcltimer( *timerLocalProj_ );
01129         // project K
01130         MVT::MvTransMv(ONE,*X_,*KX_,KK);
01131         // project M
01132         MVT::MvTransMv(ONE,*X_,*MX_,MM);
01133         nevLocal_ = blockSize_;
01134       }
01135 
01136       // solve the projected problem
01137       {
01138         Teuchos::TimeMonitor lcltimer( *timerDS_ );
01139         Utils::directSolver(blockSize_, KK, Teuchos::rcpFromRef(MM), S, theta_, nevLocal_, 1);
01140         TEST_FOR_EXCEPTION(nevLocal_ != blockSize_,LOBPCGInitFailure,
01141                            "Anasazi::LOBPCG::initialize(): Initial Ritz analysis did not produce enough Ritz pairs to initialize algorithm.");
01142       }
01143 
01144       // We only have blockSize_ ritz pairs, ergo we do not need to select.
01145       // However, we still require them to be ordered correctly
01146       {
01147         Teuchos::TimeMonitor lcltimer( *timerSort_ );
01148 
01149         std::vector<int> order(blockSize_);
01150         // 
01151         // sort the first blockSize_ values in theta_
01152         sm_->sort(theta_, Teuchos::rcpFromRef(order), blockSize_);   // don't catch exception
01153         //
01154         // apply the same ordering to the primitive ritz vectors
01155         Utils::permuteVectors(order,S);
01156       }
01157 
01158       // update the solution, use R for storage
01159       {
01160         Teuchos::TimeMonitor lcltimer( *timerLocalUpdate_ );
01161         // X <- X*S
01162         MVT::MvAddMv( ONE, *X_, ZERO, *X_, *R_ );        
01163         MVT::MvTimesMatAddMv( ONE, *R_, S, ZERO, *X_ );
01164         // KX <- KX*S
01165         MVT::MvAddMv( ONE, *KX_, ZERO, *KX_, *R_ );        
01166         MVT::MvTimesMatAddMv( ONE, *R_, S, ZERO, *KX_ );
01167         if (hasM_) {
01168           // MX <- MX*S
01169           MVT::MvAddMv( ONE, *MX_, ZERO, *MX_, *R_ );        
01170           MVT::MvTimesMatAddMv( ONE, *R_, S, ZERO, *MX_ );
01171         }
01172       }
01173     }
01174 
01175     //----------------------------------------
01176     // compute R
01177     //----------------------------------------
01178     if (newstate.R != Teuchos::null) {
01179       TEST_FOR_EXCEPTION( MVT::GetVecLength(*newstate.R) != MVT::GetVecLength(*R_),
01180                           std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.R not correct." );
01181       TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.R) < blockSize_,
01182                           std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.R must have blockSize number of vectors." );
01183       MVT::SetBlock(*newstate.R,bsind,*R_);
01184     }
01185     else {
01186       Teuchos::TimeMonitor lcltimer( *timerCompRes_ );
01187       // form R <- KX - MX*T
01188       MVT::MvAddMv(ZERO,*KX_,ONE,*KX_,*R_);
01189       Teuchos::SerialDenseMatrix<int,ScalarType> T(blockSize_,blockSize_);
01190       for (int i=0; i<blockSize_; i++) T(i,i) = theta_[i];
01191       MVT::MvTimesMatAddMv(-ONE,*MX_,T,ONE,*R_);
01192     }
01193 
01194     // R has been updated; mark the norms as out-of-date
01195     Rnorms_current_ = false;
01196     R2norms_current_ = false;
01197   
01198     // put data in P,KP,MP: P is not used to set theta
01199     if (newstate.P != Teuchos::null) {
01200       TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.P) < blockSize_ ,
01201                           std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.P must have blockSize number of vectors." );
01202       TEST_FOR_EXCEPTION( MVT::GetVecLength(*newstate.P) != MVT::GetVecLength(*P_),
01203                           std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.P not correct." );
01204       hasP_ = true;
01205 
01206       // set P_
01207       MVT::SetBlock(*newstate.P,bsind,*P_);
01208 
01209       // set/compute KP_
01210       if (newstate.KP != Teuchos::null) {
01211         TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.KP) < blockSize_,
01212                             std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.KP must have blockSize number of vectors." );
01213         TEST_FOR_EXCEPTION( MVT::GetVecLength(*newstate.KP) != MVT::GetVecLength(*KP_),
01214                             std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.KP not correct." );
01215         MVT::SetBlock(*newstate.KP,bsind,*KP_);
01216       }
01217       else {
01218         Teuchos::TimeMonitor lcltimer( *timerOp_ );
01219         OPT::Apply(*Op_,*P_,*KP_);
01220         count_ApplyOp_ += blockSize_;
01221       }
01222 
01223       // set/compute MP_
01224       if (hasM_) {
01225         if (newstate.MP != Teuchos::null) {
01226           TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.MP) < blockSize_,
01227                               std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.MP must have blockSize number of vectors." );
01228           TEST_FOR_EXCEPTION( MVT::GetVecLength(*newstate.MP) != MVT::GetVecLength(*MP_),
01229                               std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.MP not correct." );
01230           MVT::SetBlock(*newstate.MP,bsind,*MP_);
01231         }
01232         else {
01233           Teuchos::TimeMonitor lcltimer( *timerMOp_ );
01234           OPT::Apply(*MOp_,*P_,*MP_);
01235           count_ApplyM_ += blockSize_;
01236         }
01237       }
01238     }
01239     else {
01240       hasP_ = false;
01241     }
01242 
01243     // finally, we are initialized
01244     initialized_ = true;
01245 
01246     if (om_->isVerbosity( Debug ) ) {
01247       // Check almost everything here
01248       CheckList chk;
01249       chk.checkX = true;
01250       chk.checkKX = true;
01251       chk.checkMX = true;
01252       chk.checkP = true;
01253       chk.checkKP = true;
01254       chk.checkMP = true;
01255       chk.checkR = true;
01256       chk.checkQ = true;
01257       om_->print( Debug, accuracyCheck(chk, ": after initialize()") );
01258     }
01259 
01260   }
01261 
01262   template <class ScalarType, class MV, class OP>
01263   void LOBPCG<ScalarType,MV,OP>::initialize()
01264   {
01265     LOBPCGState<ScalarType,MV> empty;
01266     initialize(empty);
01267   }
01268 
01269 
01271   // Instruct the solver to use full orthogonalization
01272   template <class ScalarType, class MV, class OP>
01273   void LOBPCG<ScalarType,MV,OP>::setFullOrtho (bool fullOrtho)
01274   {
01275     if ( fullOrtho_ == true || initialized_ == false || fullOrtho == fullOrtho_ ) {
01276       // state is already orthogonalized or solver is not initialized
01277       fullOrtho_ = fullOrtho;
01278     }
01279     else {
01280       // solver is initialized, state is not fully orthogonalized, and user has requested full orthogonalization
01281       // ergo, we must throw away data in P
01282       fullOrtho_ = true;
01283       hasP_ = false;
01284     }
01285 
01286     // the user has called setFullOrtho, so the class has been instantiated
01287     // ergo, the data has already been allocated, i.e., setBlockSize() has been called
01288     // if it is already allocated, it should be the proper size
01289     if (fullOrtho_ && tmpmvec_ == Teuchos::null) {
01290       // allocated the workspace
01291       tmpmvec_ = MVT::Clone(*X_,blockSize_);
01292     }
01293     else if (fullOrtho_==false) {
01294       // free the workspace
01295       tmpmvec_ = Teuchos::null;
01296     }
01297   }
01298 
01299 
01301   // Perform LOBPCG iterations until the StatusTest tells us to stop.
01302   template <class ScalarType, class MV, class OP>
01303   void LOBPCG<ScalarType,MV,OP>::iterate () 
01304   {
01305     //
01306     // Allocate/initialize data structures
01307     //
01308     if (initialized_ == false) {
01309       initialize();
01310     }
01311 
01312     //
01313     // Miscellaneous definitions
01314     const int oneBlock    =   blockSize_;
01315     const int twoBlocks   = 2*blockSize_;
01316     const int threeBlocks = 3*blockSize_;
01317 
01318     std::vector<int> indblock1(blockSize_), indblock2(blockSize_), indblock3(blockSize_);
01319     for (int i=0; i<blockSize_; i++) {
01320       indblock1[i] = i;
01321       indblock2[i] = i +  blockSize_;
01322       indblock3[i] = i + 2*blockSize_;
01323     }
01324 
01325     //
01326     // Define dense projected/local matrices
01327     //   KK = Local stiffness matrix               (size: 3*blockSize_ x 3*blockSize_)
01328     //   MM = Local mass matrix                    (size: 3*blockSize_ x 3*blockSize_)
01329     //    S = Local eigenvectors                   (size: 3*blockSize_ x 3*blockSize_)
01330     Teuchos::SerialDenseMatrix<int,ScalarType> KK( threeBlocks, threeBlocks ), 
01331                                                MM( threeBlocks, threeBlocks ),
01332                                                 S( threeBlocks, threeBlocks );
01333 
01334     while (tester_->checkStatus(this) != Passed) {
01335 
01336       // Print information on current status
01337       if (om_->isVerbosity(Debug)) {
01338         currentStatus( om_->stream(Debug) );
01339       }
01340       else if (om_->isVerbosity(IterationDetails)) {
01341         currentStatus( om_->stream(IterationDetails) );
01342       }
01343 
01344       // increment iteration counter
01345       iter_++;
01346 
01347       // Apply the preconditioner on the residuals: H <- Prec*R
01348       if (Prec_ != Teuchos::null) {
01349         Teuchos::TimeMonitor lcltimer( *timerPrec_ );
01350         OPT::Apply( *Prec_, *R_, *H_ );   // don't catch the exception
01351         count_ApplyPrec_ += blockSize_;
01352       }
01353       else {
01354         MVT::MvAddMv(ONE,*R_,ZERO,*R_,*H_);
01355       }
01356 
01357       // Apply the mass matrix on H
01358       if (hasM_) {
01359         Teuchos::TimeMonitor lcltimer( *timerMOp_ );
01360         OPT::Apply( *MOp_, *H_, *MH_);    // don't catch the exception
01361         count_ApplyM_ += blockSize_;
01362       }
01363 
01364       // orthogonalize H against the auxiliary vectors
01365       // optionally: orthogonalize H against X and P ([X P] is already orthonormal)
01366       Teuchos::Array<Teuchos::RCP<const MV> > Q;
01367       Q = auxVecs_;
01368       if (fullOrtho_) {
01369         // X and P are not contiguous, so there is not much point in putting them under 
01370         // a single multivector view
01371         Q.push_back(X_);
01372         if (hasP_) {
01373           Q.push_back(P_);
01374         }
01375       }
01376       {
01377         Teuchos::TimeMonitor lcltimer( *timerOrtho_ );
01378         Teuchos::Array<Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> > > dummyC = 
01379           Teuchos::tuple<Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> > >(Teuchos::null);
01380         int rank = orthman_->projectAndNormalizeMat(*H_,Q,dummyC,Teuchos::null,MH_);
01381         // our views are currently in place; it is safe to throw an exception
01382         TEST_FOR_EXCEPTION(rank != blockSize_,LOBPCGOrthoFailure,
01383                            "Anasazi::LOBPCG::iterate(): unable to compute orthonormal basis for H.");
01384       }
01385 
01386       if (om_->isVerbosity( Debug ) ) {
01387         CheckList chk;
01388         chk.checkH = true;
01389         chk.checkMH = true;
01390         om_->print( Debug, accuracyCheck(chk, ": after ortho H") );
01391       }
01392       else if (om_->isVerbosity( OrthoDetails ) ) {
01393         CheckList chk;
01394         chk.checkH = true;
01395         chk.checkMH = true;
01396         om_->print( OrthoDetails, accuracyCheck(chk,": after ortho H") );
01397       }
01398 
01399       // Apply the stiffness matrix to H
01400       {
01401         Teuchos::TimeMonitor lcltimer( *timerOp_ );
01402         OPT::Apply( *Op_, *H_, *KH_);   // don't catch the exception
01403         count_ApplyOp_ += blockSize_;
01404       }
01405 
01406       if (hasP_) {
01407         nevLocal_ = threeBlocks;
01408       }
01409       else {
01410         nevLocal_ = twoBlocks;
01411       }
01412 
01413       //
01414       // we need bases: [X H P] and [H P] (only need the latter if fullOrtho == false)
01415       // we need to perform the following operations:
01416       //    X' [KX KH KP]
01417       //    X' [MX MH MP]
01418       //    H' [KH KP]
01419       //    H' [MH MP]
01420       //    P' [KP]
01421       //    P' [MP]
01422       //    [X H P] CX
01423       //    [X H P] CP    if  fullOrtho
01424       //      [H P] CP    if !fullOrtho
01425       //
01426       // since M[X H P] is potentially the same memory as [X H P], and 
01427       // because we are not allowed to have overlapping non-const views of 
01428       // a multivector, we will now abandon our non-const views in favor of 
01429       // const views
01430       //
01431       X_ = Teuchos::null;
01432       KX_ = Teuchos::null;
01433       MX_ = Teuchos::null;
01434       H_ = Teuchos::null;
01435       KH_ = Teuchos::null;
01436       MH_ = Teuchos::null;
01437       P_ = Teuchos::null;
01438       KP_ = Teuchos::null;
01439       MP_ = Teuchos::null;
01440       Teuchos::RCP<const MV> cX, cH, cXHP, cHP, cK_XHP, cK_HP, cM_XHP, cM_HP, cP, cK_P, cM_P;
01441       {
01442         cX = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(V_),indblock1);
01443         cH = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(V_),indblock2);
01444 
01445         std::vector<int> indXHP(nevLocal_);
01446         for (int i=0; i<nevLocal_; i++) {
01447           indXHP[i] = i;
01448         }
01449         cXHP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(V_),indXHP);
01450         cK_XHP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(KV_),indXHP);
01451         if (hasM_) {
01452           cM_XHP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(MV_),indXHP);
01453         }
01454         else {
01455           cM_XHP = cXHP;
01456         }
01457 
01458         std::vector<int> indHP(nevLocal_-blockSize_);
01459         for (int i=blockSize_; i<nevLocal_; i++) {
01460           indHP[i-blockSize_] = i;
01461         }
01462         cHP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(V_),indHP);
01463         cK_HP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(KV_),indHP);
01464         if (hasM_) {
01465           cM_HP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(MV_),indHP);
01466         }
01467         else {
01468           cM_HP = cHP;
01469         }
01470 
01471         if (nevLocal_ == threeBlocks) {
01472           cP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(V_),indblock3);
01473           cK_P = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(KV_),indblock3);
01474           if (hasM_) {
01475             cM_P = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(MV_),indblock3);
01476           }
01477           else {
01478             cM_P = cP;
01479           }
01480         }
01481       }
01482 
01483       //
01484       //----------------------------------------
01485       // Form "local" mass and stiffness matrices
01486       //----------------------------------------
01487       {
01488         // We will form only the block upper triangular part of 
01489         // [X H P]' K [X H P]  and  [X H P]' M [X H P]
01490         // Get the necessary views into KK and MM:
01491         //      [--K1--]        [--M1--]
01492         // KK = [  -K2-]   MM = [  -M2-]
01493         //      [    K3]        [    M3]
01494         // 
01495         // It is okay to declare a zero-area view of a Teuchos::SerialDenseMatrix
01496         //
01497         Teuchos::SerialDenseMatrix<int,ScalarType> 
01498           K1(Teuchos::View,KK,blockSize_,nevLocal_             ,0*blockSize_,0*blockSize_),
01499           K2(Teuchos::View,KK,blockSize_,nevLocal_-1*blockSize_,1*blockSize_,1*blockSize_),
01500           K3(Teuchos::View,KK,blockSize_,nevLocal_-2*blockSize_,2*blockSize_,2*blockSize_),
01501           M1(Teuchos::View,MM,blockSize_,nevLocal_             ,0*blockSize_,0*blockSize_),
01502           M2(Teuchos::View,MM,blockSize_,nevLocal_-1*blockSize_,1*blockSize_,1*blockSize_),
01503           M3(Teuchos::View,MM,blockSize_,nevLocal_-2*blockSize_,2*blockSize_,2*blockSize_);
01504         {
01505           Teuchos::TimeMonitor lcltimer( *timerLocalProj_ );
01506           MVT::MvTransMv( ONE, *cX, *cK_XHP, K1 );
01507           MVT::MvTransMv( ONE, *cX, *cM_XHP, M1 );
01508           MVT::MvTransMv( ONE, *cH, *cK_HP , K2 );
01509           MVT::MvTransMv( ONE, *cH, *cM_HP , M2 );
01510           if (nevLocal_ == threeBlocks) {
01511             MVT::MvTransMv( ONE, *cP, *cK_P, K3 );
01512             MVT::MvTransMv( ONE, *cP, *cM_P, M3 );
01513           }
01514         }
01515       }
01516       // below, we only need bases [X H P] and [H P] and friends
01517       // furthermore, we only need [H P] and friends if fullOrtho == false
01518       // clear the others now
01519       cX   = Teuchos::null;
01520       cH   = Teuchos::null;
01521       cP   = Teuchos::null;
01522       cK_P = Teuchos::null;
01523       cM_P = Teuchos::null;
01524       if (fullOrtho_ == true) {
01525         cHP = Teuchos::null;
01526         cK_HP = Teuchos::null;
01527         cM_HP = Teuchos::null;
01528       }
01529 
01530       //
01531       //---------------------------------------------------
01532       // Perform a spectral decomposition of (KK,MM)
01533       //---------------------------------------------------
01534       //
01535       // Get pointers to relevant part of KK, MM and S for Rayleigh-Ritz analysis
01536       Teuchos::SerialDenseMatrix<int,ScalarType> lclKK(Teuchos::View,KK,nevLocal_,nevLocal_), 
01537                                                  lclMM(Teuchos::View,MM,nevLocal_,nevLocal_),
01538                                                   lclS(Teuchos::View, S,nevLocal_,nevLocal_);
01539       {
01540         Teuchos::TimeMonitor lcltimer( *timerDS_ );
01541         int localSize = nevLocal_;
01542         Utils::directSolver(localSize, lclKK, Teuchos::rcpFromRef(lclMM), lclS, theta_, nevLocal_, 0);
01543         // localSize tells directSolver() how big KK,MM are
01544         // however, directSolver() may choose to use only the principle submatrices of KK,MM 
01545         // because of loss of MM-orthogonality in the projected eigenvectors
01546         // nevLocal_ tells us how much it used, telling us the effective localSize 
01547         // (i.e., how much of KK,MM used by directSolver)
01548         // we will not tolerate any indefiniteness, and will throw an exception if it was 
01549         // detected by directSolver
01550         //
01551         if (nevLocal_ != localSize) {
01552           // before throwing the exception, and thereby leaving iterate(), setup the views again
01553           // first, clear the const views
01554           cXHP   = Teuchos::null;
01555           cK_XHP = Teuchos::null;
01556           cM_XHP = Teuchos::null;
01557           cHP    = Teuchos::null;
01558           cK_HP  = Teuchos::null;
01559           cM_HP  = Teuchos::null;
01560           setupViews();
01561         }
01562         TEST_FOR_EXCEPTION(nevLocal_ != localSize, LOBPCGRitzFailure, 
01563             "Anasazi::LOBPCG::iterate(): indefiniteness detected in projected mass matrix." );
01564       }
01565 
01566       //
01567       //---------------------------------------------------
01568       // Sort the ritz values using the sort manager
01569       //---------------------------------------------------
01570       Teuchos::LAPACK<int,ScalarType> lapack;
01571       Teuchos::BLAS<int,ScalarType> blas;
01572       {
01573         Teuchos::TimeMonitor lcltimer( *timerSort_ );
01574 
01575         std::vector<int> order(nevLocal_);
01576         // 
01577         // Sort the first nevLocal_ values in theta_
01578         sm_->sort(theta_, Teuchos::rcpFromRef(order), nevLocal_);   // don't catch exception
01579         //
01580         // Sort the primitive ritz vectors
01581         Utils::permuteVectors(order,lclS);
01582       }
01583 
01584       //
01585       //----------------------------------------
01586       // Compute coefficients for X and P under [X H P]
01587       //----------------------------------------
01588       // Before computing X,P, optionally perform orthogonalization per Hetmaniuk,Lehoucq paper
01589       // CX will be the coefficients of [X,H,P] for new X, CP for new P
01590       // The paper suggests orthogonalizing CP against CX and orthonormalizing CP, w.r.t. MM
01591       // Here, we will also orthonormalize CX.
01592       // This is accomplished using the Cholesky factorization of [CX CP]^H lclMM [CX CP]
01593       Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> > CX, CP;
01594       if (fullOrtho_) {
01595         // build orthonormal basis for (  0  ) that is MM orthogonal to ( S11 )
01596         //                             ( S21 )                          ( S21 )
01597         //                             ( S31 )                          ( S31 )
01598         // Do this using Cholesky factorization of ( S11  0  )
01599         //                                         ( S21 S21 )
01600         //                                         ( S31 S31 )
01601         //           ( S11  0  )
01602         // Build C = ( S21 S21 )
01603         //           ( S31 S31 )
01604         Teuchos::SerialDenseMatrix<int,ScalarType> C(nevLocal_,twoBlocks),
01605                                                 MMC(nevLocal_,twoBlocks),
01606                                                 CMMC(twoBlocks  ,twoBlocks);
01607 
01608         // first block of rows: ( S11 0 )
01609         for (int j=0; j<oneBlock; j++) {
01610           for (int i=0; i<oneBlock; i++) {
01611             // CX
01612             C(i,j) = lclS(i,j);
01613             // CP
01614             C(i,j+oneBlock) = ZERO;
01615           }
01616         }
01617         // second block of rows: (S21 S21)
01618         for (int j=0; j<oneBlock; j++) {
01619           for (int i=oneBlock; i<twoBlocks; i++) {
01620             // CX
01621             C(i,j)          = lclS(i,j);
01622             // CP
01623             C(i,j+oneBlock) = lclS(i,j);
01624           }
01625         }
01626         // third block of rows
01627         if (nevLocal_ == threeBlocks) {
01628           for (int j=0; j<oneBlock; j++) {
01629             for (int i=twoBlocks; i<threeBlocks; i++) {
01630               // CX
01631               C(i,j)          = lclS(i,j);
01632               // CP
01633               C(i,j+oneBlock) = lclS(i,j);
01634             }
01635           }
01636         }
01637 
01638         // compute MMC = lclMM*C
01639         {
01640           int teuchosret;
01641           teuchosret = MMC.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,ONE,lclMM,C,ZERO);
01642           TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,
01643               "Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply");
01644           // compute CMMC = C^H*MMC == C^H*lclMM*C
01645           teuchosret = CMMC.multiply(Teuchos::CONJ_TRANS,Teuchos::NO_TRANS,ONE,C,MMC,ZERO);
01646           TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,
01647               "Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply");
01648         }
01649 
01650         // compute R (cholesky) of CMMC
01651         {
01652           int info;
01653           lapack.POTRF('U',twoBlocks,CMMC.values(),CMMC.stride(),&info);
01654           // our views ARE NOT currently in place; we must reestablish them before throwing an exception
01655           if (info != 0) {
01656             cXHP = Teuchos::null;
01657             cHP = Teuchos::null;
01658             cK_XHP = Teuchos::null;
01659             cK_HP = Teuchos::null;
01660             cM_XHP = Teuchos::null;
01661             cM_HP = Teuchos::null;
01662             setupViews();
01663           }
01664           TEST_FOR_EXCEPTION(info != 0, LOBPCGOrthoFailure, 
01665               "Anasazi::LOBPCG::iterate(): Cholesky factorization failed during full orthogonalization.");
01666         }
01667         // compute C = C inv(R)
01668         blas.TRSM(Teuchos::RIGHT_SIDE,Teuchos::UPPER_TRI,Teuchos::NO_TRANS,Teuchos::NON_UNIT_DIAG,
01669                   nevLocal_,twoBlocks,ONE,CMMC.values(),CMMC.stride(),C.values(),C.stride());
01670         // put C(:,0:oneBlock-1) into CX
01671         CX = Teuchos::rcp( new Teuchos::SerialDenseMatrix<int,ScalarType>(Teuchos::Copy,C,nevLocal_,oneBlock,0,0) );
01672         // put C(:,oneBlock:twoBlocks-1) into CP
01673         CP = Teuchos::rcp( new Teuchos::SerialDenseMatrix<int,ScalarType>(Teuchos::Copy,C,nevLocal_,oneBlock,0,oneBlock) );
01674 
01675         // check the results
01676         if (om_->isVerbosity( Debug ) ) {
01677           Teuchos::SerialDenseMatrix<int,ScalarType> tmp1(nevLocal_,oneBlock),
01678                                                      tmp2(oneBlock,oneBlock);
01679           MagnitudeType tmp;
01680           int teuchosret;
01681           std::stringstream os;
01682           os.precision(2);
01683           os.setf(std::ios::scientific, std::ios::floatfield);
01684 
01685           os << " Checking Full Ortho: iteration " << iter_ << std::endl;
01686 
01687           // check CX^T MM CX == I
01688           // compute tmp1 = MM*CX
01689           teuchosret = tmp1.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,ONE,lclMM,*CX,ZERO);
01690           TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,
01691               "Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply");
01692           // compute tmp2 = CX^H*tmp1 == CX^H*MM*CX
01693           teuchosret = tmp2.multiply(Teuchos::CONJ_TRANS,Teuchos::NO_TRANS,ONE,*CX,tmp1,ZERO);
01694           TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,
01695               "Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply");
01696           // subtrace tmp2 - I == CX^H * MM * CX - I
01697           for (int i=0; i<oneBlock; i++) tmp2(i,i) -= ONE;
01698           tmp = tmp2.normFrobenius();          
01699           os << " >> Error in CX^H MM CX == I : " << tmp << std::endl;
01700 
01701           // check CP^T MM CP == I
01702           // compute tmp1 = MM*CP
01703           teuchosret = tmp1.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,ONE,lclMM,*CP,ZERO);
01704           TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,
01705               "Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply");
01706           // compute tmp2 = CP^H*tmp1 == CP^H*MM*CP
01707           teuchosret = tmp2.multiply(Teuchos::CONJ_TRANS,Teuchos::NO_TRANS,ONE,*CP,tmp1,ZERO);
01708           TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,
01709               "Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply");
01710           // subtrace tmp2 - I == CP^H * MM * CP - I
01711           for (int i=0; i<oneBlock; i++) tmp2(i,i) -= ONE;
01712           tmp = tmp2.normFrobenius();          
01713           os << " >> Error in CP^H MM CP == I : " << tmp << std::endl;
01714 
01715           // check CX^T MM CP == 0
01716           // compute tmp1 = MM*CP
01717           teuchosret = tmp1.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,ONE,lclMM,*CP,ZERO);
01718           TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,"Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply");
01719           // compute tmp2 = CX^H*tmp1 == CX^H*MM*CP
01720           teuchosret = tmp2.multiply(Teuchos::CONJ_TRANS,Teuchos::NO_TRANS,ONE,*CX,tmp1,ZERO);
01721           TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,"Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply");
01722           // subtrace tmp2 == CX^H * MM * CP
01723           tmp = tmp2.normFrobenius();          
01724           os << " >> Error in CX^H MM CP == 0 : " << tmp << std::endl;
01725 
01726           os << std::endl;
01727           om_->print(Debug,os.str());
01728         }
01729       }
01730       else {
01731         //     [S11 ... ...]
01732         // S = [S21 ... ...]
01733         //     [S31 ... ...]
01734         //
01735         // CX = [S11]
01736         //      [S21]
01737         //      [S31]   ->  X = [X H P] CX
01738         //      
01739         // CP = [S21]   ->  P =   [H P] CP
01740         //      [S31]
01741         //
01742         CX = Teuchos::rcp( new Teuchos::SerialDenseMatrix<int,ScalarType>(Teuchos::Copy,lclS,nevLocal_         ,oneBlock,0       ,0) );
01743         CP = Teuchos::rcp( new Teuchos::SerialDenseMatrix<int,ScalarType>(Teuchos::Copy,lclS,nevLocal_-oneBlock,oneBlock,oneBlock,0) );
01744       }
01745 
01746       //
01747       //----------------------------------------
01748       // Compute new X and new P
01749       //----------------------------------------
01750       // Note: Use R as a temporary work space and (if full ortho) tmpMV as well
01751       {
01752         Teuchos::TimeMonitor lcltimer( *timerLocalUpdate_ );
01753 
01754         // if full ortho, then CX and CP are dense
01755         // we multiply [X H P]*CX into tmpMV
01756         //             [X H P]*CP into R
01757         // then put V(:,firstblock) <- tmpMV
01758         //          V(:,thirdblock) <- R
01759         //
01760         // if no full ortho, then [H P]*CP doesn't reference first block (X) 
01761         // of V, so that we can modify it before computing P
01762         // so we multiply [X H P]*CX into R
01763         //                V(:,firstblock) <- R
01764         //       multiply [H P]*CP into R
01765         //                V(:,thirdblock) <- R
01766         //
01767         // mutatis mutandis for K[XP] and M[XP]
01768         //
01769         // use SetBlock to do the assignments into V_
01770         //
01771         // in either case, views are only allowed to be overlapping
01772         // if they are const, and it should be assume that SetBlock
01773         // creates a view of the associated part
01774         //
01775         // we have from above const-pointers to [KM]XHP, [KM]HP and (if hasP) [KM]P
01776         //
01777         if (fullOrtho_) {
01778           // X,P
01779           MVT::MvTimesMatAddMv(ONE,*cXHP,*CX,ZERO,*tmpmvec_);
01780           MVT::MvTimesMatAddMv(ONE,*cXHP,*CP,ZERO,*R_);
01781           cXHP = Teuchos::null;
01782           MVT::SetBlock(*tmpmvec_,indblock1,*V_);
01783           MVT::SetBlock(*R_      ,indblock3,*V_);
01784           // KX,KP
01785           MVT::MvTimesMatAddMv(ONE,*cK_XHP,*CX,ZERO,*tmpmvec_);
01786           MVT::MvTimesMatAddMv(ONE,*cK_XHP,*CP,ZERO,*R_);
01787           cK_XHP = Teuchos::null;
01788           MVT::SetBlock(*tmpmvec_,indblock1,*KV_);
01789           MVT::SetBlock(*R_      ,indblock3,*KV_);
01790           // MX,MP
01791           if (hasM_) {
01792             MVT::MvTimesMatAddMv(ONE,*cM_XHP,*CX,ZERO,*tmpmvec_);
01793             MVT::MvTimesMatAddMv(ONE,*cM_XHP,*CP,ZERO,*R_);
01794             cM_XHP = Teuchos::null;
01795             MVT::SetBlock(*tmpmvec_,indblock1,*MV_);
01796             MVT::SetBlock(*R_      ,indblock3,*MV_);
01797           }
01798           else {
01799             cM_XHP = Teuchos::null;
01800           }
01801         }
01802         else {
01803           // X,P
01804           MVT::MvTimesMatAddMv(ONE,*cXHP,*CX,ZERO,*R_);
01805           cXHP = Teuchos::null;
01806           MVT::SetBlock(*R_,indblock1,*V_);
01807           MVT::MvTimesMatAddMv(ONE,*cHP,*CP,ZERO,*R_);
01808           cHP = Teuchos::null;
01809           MVT::SetBlock(*R_,indblock3,*V_);
01810           // KX,KP
01811           MVT::MvTimesMatAddMv(ONE,*cK_XHP,*CX,ZERO,*R_);
01812           cK_XHP = Teuchos::null;
01813           MVT::SetBlock(*R_,indblock1,*KV_);
01814           MVT::MvTimesMatAddMv(ONE,*cK_HP,*CP,ZERO,*R_);
01815           cK_HP = Teuchos::null;
01816           MVT::SetBlock(*R_,indblock3,*KV_);
01817           // MX,MP
01818           if (hasM_) {
01819             MVT::MvTimesMatAddMv(ONE,*cM_XHP,*CX,ZERO,*R_);
01820             cM_XHP = Teuchos::null;
01821             MVT::SetBlock(*R_,indblock1,*MV_);
01822             MVT::MvTimesMatAddMv(ONE,*cM_HP,*CP,ZERO,*R_);
01823             cM_HP = Teuchos::null;
01824             MVT::SetBlock(*R_,indblock3,*MV_);
01825           }
01826           else {
01827             cM_XHP = Teuchos::null;
01828             cM_HP = Teuchos::null;
01829           }
01830         }
01831       } // end timing block
01832       // done with coefficient matrices
01833       CX = Teuchos::null;
01834       CP = Teuchos::null;
01835 
01836       //
01837       // we now have a P direction
01838       hasP_ = true;
01839 
01840       // debugging check: all of our const views should have been cleared by now
01841       // if not, we have a logic error above
01842       TEST_FOR_EXCEPTION(   cXHP != Teuchos::null || cK_XHP != Teuchos::null || cM_XHP != Teuchos::null
01843                           || cHP != Teuchos::null ||  cK_HP != Teuchos::null || cM_HP  != Teuchos::null
01844                           ||  cP != Teuchos::null ||   cK_P != Teuchos::null || cM_P   != Teuchos::null,
01845                           std::logic_error,
01846                           "Anasazi::BlockKrylovSchur::iterate(): const views were not all cleared! Something went wrong!" );
01847 
01848       //
01849       // recreate our const MV views of X,H,P and friends
01850       setupViews();
01851 
01852       //
01853       // Compute the new residuals, explicitly
01854       {
01855         Teuchos::TimeMonitor lcltimer( *timerCompRes_ );
01856         MVT::MvAddMv( ONE, *KX_, ZERO, *KX_, *R_ );
01857         Teuchos::SerialDenseMatrix<int,ScalarType> T( blockSize_, blockSize_ );
01858         for (int i = 0; i < blockSize_; i++) {
01859           T(i,i) = theta_[i];
01860         }
01861         MVT::MvTimesMatAddMv( -ONE, *MX_, T, ONE, *R_ );
01862       }
01863 
01864       // R has been updated; mark the norms as out-of-date
01865       Rnorms_current_ = false;
01866       R2norms_current_ = false;
01867 
01868       // When required, monitor some orthogonalities
01869       if (om_->isVerbosity( Debug ) ) {
01870         // Check almost everything here
01871         CheckList chk;
01872         chk.checkX = true;
01873         chk.checkKX = true;
01874         chk.checkMX = true;
01875         chk.checkP = true;
01876         chk.checkKP = true;
01877         chk.checkMP = true;
01878         chk.checkR = true;
01879         om_->print( Debug, accuracyCheck(chk, ": after local update") );
01880       }
01881       else if (om_->isVerbosity( OrthoDetails )) {
01882         CheckList chk;
01883         chk.checkX = true;
01884         chk.checkP = true;
01885         chk.checkR = true;
01886         om_->print( OrthoDetails, accuracyCheck(chk, ": after local update") );
01887       }
01888     } // end while (statusTest == false)
01889   }
01890 
01891 
01893   // compute/return residual M-norms
01894   template <class ScalarType, class MV, class OP>
01895   std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> 
01896   LOBPCG<ScalarType,MV,OP>::getResNorms() {
01897     if (Rnorms_current_ == false) {
01898       // Update the residual norms
01899       orthman_->norm(*R_,Rnorms_);
01900       Rnorms_current_ = true;
01901     }
01902     return Rnorms_;
01903   }
01904 
01905   
01907   // compute/return residual 2-norms
01908   template <class ScalarType, class MV, class OP>
01909   std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> 
01910   LOBPCG<ScalarType,MV,OP>::getRes2Norms() {
01911     if (R2norms_current_ == false) {
01912       // Update the residual 2-norms 
01913       MVT::MvNorm(*R_,R2norms_);
01914       R2norms_current_ = true;
01915     }
01916     return R2norms_;
01917   }
01918 
01919 
01920 
01921 
01923   // Check accuracy, orthogonality, and other debugging stuff
01924   // 
01925   // bools specify which tests we want to run (instead of running more than we actually care about)
01926   //
01927   // we don't bother checking the following because they are computed explicitly:
01928   //    H == Prec*R
01929   //   KH == K*H
01930   //
01931   // 
01932   // checkX : X orthonormal
01933   //          orthogonal to auxvecs
01934   // checkMX: check MX == M*X
01935   // checkKX: check KX == K*X
01936   // checkP : if fullortho P orthonormal and orthogonal to X
01937   //          orthogonal to auxvecs
01938   // checkMP: check MP == M*P
01939   // checkKP: check KP == K*P
01940   // checkH : if fullortho H orthonormal and orthogonal to X and P
01941   //          orthogonal to auxvecs
01942   // checkMH: check MH == M*H
01943   // checkR : check R orthogonal to X
01944   // checkQ : check that auxiliary vectors are actually orthonormal
01945   //
01946   // TODO: 
01947   //  add checkTheta 
01948   //
01949   template <class ScalarType, class MV, class OP>
01950   std::string LOBPCG<ScalarType,MV,OP>::accuracyCheck( const CheckList &chk, const std::string &where ) const 
01951   {
01952     using std::endl;
01953 
01954     std::stringstream os;
01955     os.precision(2);
01956     os.setf(std::ios::scientific, std::ios::floatfield);
01957     MagnitudeType tmp;
01958 
01959     os << " Debugging checks: iteration " << iter_ << where << endl;
01960 
01961     // X and friends
01962     if (chk.checkX && initialized_) {
01963       tmp = orthman_->orthonormError(*X_);
01964       os << " >> Error in X^H M X == I : " << tmp << endl;
01965       for (Array_size_type i=0; i<auxVecs_.size(); i++) {
01966         tmp = orthman_->orthogError(*X_,*auxVecs_[i]);
01967         os << " >> Error in X^H M Q[" << i << "] == 0 : " << tmp << endl;
01968       }
01969     }
01970     if (chk.checkMX && hasM_ && initialized_) {
01971       tmp = Utils::errorEquality(*X_, *MX_, MOp_);
01972       os << " >> Error in MX == M*X    : " << tmp << endl;
01973     }
01974     if (chk.checkKX && initialized_) {
01975       tmp = Utils::errorEquality(*X_, *KX_, Op_);
01976       os << " >> Error in KX == K*X    : " << tmp << endl;
01977     }
01978 
01979     // P and friends
01980     if (chk.checkP && hasP_ && initialized_) {
01981       if (fullOrtho_) {
01982         tmp = orthman_->orthonormError(*P_);
01983         os << " >> Error in P^H M P == I : " << tmp << endl;
01984         tmp = orthman_->orthogError(*P_,*X_);
01985         os << " >> Error in P^H M X == 0 : " << tmp << endl;
01986       }
01987       for (Array_size_type i=0; i<auxVecs_.size(); i++) {
01988         tmp = orthman_->orthogError(*P_,*auxVecs_[i]);
01989         os << " >> Error in P^H M Q[" << i << "] == 0 : " << tmp << endl;
01990       }
01991     }
01992     if (chk.checkMP && hasM_ && hasP_ && initialized_) {
01993       tmp = Utils::errorEquality(*P_, *MP_, MOp_);
01994       os << " >> Error in MP == M*P    : " << tmp << endl;
01995     }
01996     if (chk.checkKP && hasP_ && initialized_) {
01997       tmp = Utils::errorEquality(*P_, *KP_, Op_);
01998       os << " >> Error in KP == K*P    : " << tmp << endl;
01999     }
02000 
02001     // H and friends
02002     if (chk.checkH && initialized_) {
02003       if (fullOrtho_) {
02004         tmp = orthman_->orthonormError(*H_);
02005         os << " >> Error in H^H M H == I : " << tmp << endl;
02006         tmp = orthman_->orthogError(*H_,*X_);
02007         os << " >> Error in H^H M X == 0 : " << tmp << endl;
02008         if (hasP_) {
02009           tmp = orthman_->orthogError(*H_,*P_);
02010           os << " >> Error in H^H M P == 0 : " << tmp << endl;
02011         }
02012       }
02013       for (Array_size_type i=0; i<auxVecs_.size(); i++) {
02014         tmp = orthman_->orthogError(*H_,*auxVecs_[i]);
02015         os << " >> Error in H^H M Q[" << i << "] == 0 : " << tmp << endl;
02016       }
02017     }
02018     if (chk.checkMH && hasM_ && initialized_) {
02019       tmp = Utils::errorEquality(*H_, *MH_, MOp_);
02020       os << " >> Error in MH == M*H    : " << tmp << endl;
02021     }
02022 
02023     // R: this is not M-orthogonality, but standard euclidean orthogonality
02024     if (chk.checkR && initialized_) {
02025       Teuchos::SerialDenseMatrix<int,ScalarType> xTx(blockSize_,blockSize_);
02026       MVT::MvTransMv(ONE,*X_,*R_,xTx);
02027       tmp = xTx.normFrobenius();
02028       MVT::MvTransMv(ONE,*R_,*R_,xTx);
02029       double normR = xTx.normFrobenius();
02030       os << " >> RelError in X^H R == 0: " << tmp/normR << endl;
02031     }
02032 
02033     // Q
02034     if (chk.checkQ) {
02035       for (Array_size_type i=0; i<auxVecs_.size(); i++) {
02036         tmp = orthman_->orthonormError(*auxVecs_[i]);
02037         os << " >> Error in Q[" << i << "]^H M Q[" << i << "] == I : " << tmp << endl;
02038         for (Array_size_type j=i+1; j<auxVecs_.size(); j++) {
02039           tmp = orthman_->orthogError(*auxVecs_[i],*auxVecs_[j]);
02040           os << " >> Error in Q[" << i << "]^H M Q[" << j << "] == 0 : " << tmp << endl;
02041         }
02042       }
02043     }
02044 
02045     os << endl;
02046 
02047     return os.str();
02048   }
02049 
02050 
02052   // Print the current status of the solver
02053   template <class ScalarType, class MV, class OP>
02054   void 
02055   LOBPCG<ScalarType,MV,OP>::currentStatus(std::ostream &os) 
02056   {
02057     using std::endl;
02058 
02059     os.setf(std::ios::scientific, std::ios::floatfield);  
02060     os.precision(6);
02061     os <<endl;
02062     os <<"================================================================================" << endl;
02063     os << endl;
02064     os <<"                              LOBPCG Solver Status" << endl;
02065     os << endl;
02066     os <<"The solver is "<<(initialized_ ? "initialized." : "not initialized.") << endl;
02067     os <<"The number of iterations performed is " << iter_       << endl;
02068     os <<"The current block size is             " << blockSize_  << endl;
02069     os <<"The number of auxiliary vectors is    " << numAuxVecs_ << endl;
02070     os <<"The number of operations Op*x   is " << count_ApplyOp_   << endl;
02071     os <<"The number of operations M*x    is " << count_ApplyM_    << endl;
02072     os <<"The number of operations Prec*x is " << count_ApplyPrec_ << endl;
02073 
02074     os.setf(std::ios_base::right, std::ios_base::adjustfield);
02075 
02076     if (initialized_) {
02077       os << endl;
02078       os <<"CURRENT EIGENVALUE ESTIMATES             "<<endl;
02079       os << std::setw(20) << "Eigenvalue" 
02080          << std::setw(20) << "Residual(M)"
02081          << std::setw(20) << "Residual(2)"
02082          << endl;
02083       os <<"--------------------------------------------------------------------------------"<<endl;
02084       for (int i=0; i<blockSize_; i++) {
02085         os << std::setw(20) << theta_[i];
02086         if (Rnorms_current_) os << std::setw(20) << Rnorms_[i];
02087         else os << std::setw(20) << "not current";
02088         if (R2norms_current_) os << std::setw(20) << R2norms_[i];
02089         else os << std::setw(20) << "not current";
02090         os << endl;
02091       }
02092     }
02093     os <<"================================================================================" << endl;
02094     os << endl;
02095   }
02096 
02098   // are we initialized or not?
02099   template <class ScalarType, class MV, class OP>
02100   bool LOBPCG<ScalarType,MV,OP>::isInitialized() const { 
02101     return initialized_; 
02102   }
02103 
02104 
02106   // is P valid or not?
02107   template <class ScalarType, class MV, class OP>
02108   bool LOBPCG<ScalarType,MV,OP>::hasP() {
02109     return hasP_;
02110   }
02111   
02113   // is full orthogonalization enabled or not?
02114   template <class ScalarType, class MV, class OP>
02115   bool LOBPCG<ScalarType,MV,OP>::getFullOrtho() const { 
02116     return(fullOrtho_); 
02117   }
02118 
02119   
02121   // return the current auxilliary vectors
02122   template <class ScalarType, class MV, class OP>
02123   Teuchos::Array<Teuchos::RCP<const MV> > LOBPCG<ScalarType,MV,OP>::getAuxVecs() const {
02124     return auxVecs_;
02125   }
02126 
02128   // return the current block size
02129   template <class ScalarType, class MV, class OP>
02130   int LOBPCG<ScalarType,MV,OP>::getBlockSize() const {
02131     return(blockSize_); 
02132   }
02133 
02135   // return the current eigenproblem
02136   template <class ScalarType, class MV, class OP>
02137   const Eigenproblem<ScalarType,MV,OP>& LOBPCG<ScalarType,MV,OP>::getProblem() const { 
02138     return(*problem_); 
02139   }
02140 
02141   
02143   // return the max subspace dimension
02144   template <class ScalarType, class MV, class OP>
02145   int LOBPCG<ScalarType,MV,OP>::getMaxSubspaceDim() const {
02146     return 3*blockSize_;
02147   }
02148 
02150   // return the current subspace dimension
02151   template <class ScalarType, class MV, class OP>
02152   int LOBPCG<ScalarType,MV,OP>::getCurSubspaceDim() const {
02153     if (!initialized_) return 0;
02154     return nevLocal_;
02155   }
02156 
02157   
02159   // return the current ritz residual norms
02160   template <class ScalarType, class MV, class OP>
02161   std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> 
02162   LOBPCG<ScalarType,MV,OP>::getRitzRes2Norms() 
02163   {
02164     return this->getRes2Norms();
02165   }
02166 
02167   
02169   // return the current compression indices
02170   template <class ScalarType, class MV, class OP>
02171   std::vector<int> LOBPCG<ScalarType,MV,OP>::getRitzIndex() {
02172     std::vector<int> ret(nevLocal_,0);
02173     return ret;
02174   }
02175 
02176   
02178   // return the current ritz values
02179   template <class ScalarType, class MV, class OP>
02180   std::vector<Value<ScalarType> > LOBPCG<ScalarType,MV,OP>::getRitzValues() { 
02181     std::vector<Value<ScalarType> > ret(nevLocal_);
02182     for (int i=0; i<nevLocal_; i++) {
02183       ret[i].realpart = theta_[i];
02184       ret[i].imagpart = ZERO;
02185     }
02186     return ret;
02187   }
02188 
02190   // Set a new StatusTest for the solver.
02191   template <class ScalarType, class MV, class OP>
02192   void LOBPCG<ScalarType,MV,OP>::setStatusTest(Teuchos::RCP<StatusTest<ScalarType,MV,OP> > test) {
02193     TEST_FOR_EXCEPTION(test == Teuchos::null,std::invalid_argument,
02194         "Anasazi::LOBPCG::setStatusTest() was passed a null StatusTest.");
02195     tester_ = test;
02196   }
02197 
02199   // Get the current StatusTest used by the solver.
02200   template <class ScalarType, class MV, class OP>
02201   Teuchos::RCP<StatusTest<ScalarType,MV,OP> > LOBPCG<ScalarType,MV,OP>::getStatusTest() const {
02202     return tester_;
02203   }
02204   
02206   // return the current ritz vectors
02207   template <class ScalarType, class MV, class OP>
02208   Teuchos::RCP<const MV> LOBPCG<ScalarType,MV,OP>::getRitzVectors() {
02209     return X_;
02210   }
02211 
02212   
02214   // reset the iteration counter
02215   template <class ScalarType, class MV, class OP>
02216   void LOBPCG<ScalarType,MV,OP>::resetNumIters() {
02217     iter_=0; 
02218   }
02219 
02220   
02222   // return the number of iterations
02223   template <class ScalarType, class MV, class OP>
02224   int LOBPCG<ScalarType,MV,OP>::getNumIters() const { 
02225     return(iter_); 
02226   }
02227 
02228   
02230   // return the state
02231   template <class ScalarType, class MV, class OP>
02232   LOBPCGState<ScalarType,MV> LOBPCG<ScalarType,MV,OP>::getState() const {
02233     LOBPCGState<ScalarType,MV> state;
02234     state.V = V_;
02235     state.KV = KV_;
02236     state.X = X_;
02237     state.KX = KX_;
02238     state.P = P_;
02239     state.KP = KP_;
02240     state.H = H_;
02241     state.KH = KH_;
02242     state.R = R_;
02243     state.T = Teuchos::rcp(new std::vector<MagnitudeType>(theta_));
02244     if (hasM_) {
02245       state.MV = MV_;
02246       state.MX = MX_;
02247       state.MP = MP_;
02248       state.MH = MH_;
02249     }
02250     else {
02251       state.MX = Teuchos::null;
02252       state.MP = Teuchos::null;
02253       state.MH = Teuchos::null;
02254     }
02255     return state;
02256   }
02257 
02258 } // end Anasazi namespace
02259 
02260 #endif // ANASAZI_LOBPCG_HPP
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends
Generated on Wed Apr 13 09:56:58 2011 for Anasazi by  doxygen 1.6.3