AnasaziLOBPCG.hpp

Go to the documentation of this file.
00001 // @HEADER
00002 // ***********************************************************************
00003 //
00004 //                 Anasazi: Block Eigensolvers Package
00005 //                 Copyright (2004) Sandia Corporation
00006 //
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 //
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00025 //
00026 // ***********************************************************************
00027 // @HEADER
00028 
00029 
00034 /*
00035     LOBPCG contains local storage of up to 10*blockSize_ vectors, representing 10 entities
00036       X,H,P,R
00037       KX,KH,KP  (product of K and the above)
00038       MX,MH,MP  (product of M and the above, not allocated if we don't have an M matrix)
00039     If full orthogonalization is enabled, one extra multivector of blockSize_ vectors is required to 
00040     compute the local update of X and P.
00041     
00042     A solver is bound to an eigenproblem at declaration.
00043     Other solver parameters (e.g., block size, auxiliary vectors) can be changed dynamically.
00044     
00045     The orthogonalization manager is used to project away from the auxiliary vectors.
00046     If full orthogonalization is enabled, the orthogonalization manager is also used to construct an M orthonormal basis.
00047     The orthogonalization manager is subclass of MatOrthoManager, which LOBPCG assumes to be defined by the M inner product.
00048     LOBPCG will not work correctly if the orthomanager uses a different inner product.
00049  */
00050 
00051 
00052 #ifndef ANASAZI_LOBPCG_HPP
00053 #define ANASAZI_LOBPCG_HPP
00054 
00055 #include "AnasaziTypes.hpp"
00056 
00057 #include "AnasaziEigensolver.hpp"
00058 #include "AnasaziMultiVecTraits.hpp"
00059 #include "AnasaziOperatorTraits.hpp"
00060 #include "Teuchos_ScalarTraits.hpp"
00061 
00062 #include "AnasaziMatOrthoManager.hpp"
00063 #include "AnasaziSolverUtils.hpp"
00064 
00065 #include "Teuchos_LAPACK.hpp"
00066 #include "Teuchos_BLAS.hpp"
00067 #include "Teuchos_SerialDenseMatrix.hpp"
00068 #include "Teuchos_ParameterList.hpp"
00069 #include "Teuchos_TimeMonitor.hpp"
00070 
00091 namespace Anasazi {
00092 
00094 
00095 
00100   template <class ScalarType, class MultiVector>
00101   struct LOBPCGState {
00103     Teuchos::RCP<const MultiVector> V; 
00105     Teuchos::RCP<const MultiVector> KV; 
00107     Teuchos::RCP<const MultiVector> MV;
00108 
00110     Teuchos::RCP<const MultiVector> X; 
00112     Teuchos::RCP<const MultiVector> KX; 
00114     Teuchos::RCP<const MultiVector> MX;
00115 
00117     Teuchos::RCP<const MultiVector> P; 
00119     Teuchos::RCP<const MultiVector> KP; 
00121     Teuchos::RCP<const MultiVector> MP;
00122 
00127     Teuchos::RCP<const MultiVector> H; 
00129     Teuchos::RCP<const MultiVector> KH; 
00131     Teuchos::RCP<const MultiVector> MH;
00132 
00134     Teuchos::RCP<const MultiVector> R;
00135 
00137     Teuchos::RCP<const std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> > T;
00138 
00139     LOBPCGState() : 
00140       V(Teuchos::null),KV(Teuchos::null),MV(Teuchos::null),
00141       X(Teuchos::null),KX(Teuchos::null),MX(Teuchos::null),
00142       P(Teuchos::null),KP(Teuchos::null),MP(Teuchos::null),
00143       H(Teuchos::null),KH(Teuchos::null),MH(Teuchos::null),
00144       R(Teuchos::null),T(Teuchos::null) {};
00145   };
00146 
00148 
00150 
00151 
00165   class LOBPCGRitzFailure : public AnasaziError {public:
00166     LOBPCGRitzFailure(const std::string& what_arg) : AnasaziError(what_arg)
00167     {}};
00168 
00181   class LOBPCGInitFailure : public AnasaziError {public:
00182     LOBPCGInitFailure(const std::string& what_arg) : AnasaziError(what_arg)
00183     {}};
00184 
00201   class LOBPCGOrthoFailure : public AnasaziError {public:
00202     LOBPCGOrthoFailure(const std::string& what_arg) : AnasaziError(what_arg)
00203     {}};
00204 
00206 
00207 
00208   template <class ScalarType, class MV, class OP>
00209   class LOBPCG : public Eigensolver<ScalarType,MV,OP> { 
00210   public:
00211     
00213 
00214     
00222     LOBPCG( const Teuchos::RCP<Eigenproblem<ScalarType,MV,OP> > &problem, 
00223             const Teuchos::RCP<SortManager<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> > &sorter,
00224             const Teuchos::RCP<OutputManager<ScalarType> > &printer,
00225             const Teuchos::RCP<StatusTest<ScalarType,MV,OP> > &tester,
00226             const Teuchos::RCP<MatOrthoManager<ScalarType,MV,OP> > &ortho,
00227             Teuchos::ParameterList &params 
00228           );
00229     
00231     virtual ~LOBPCG() {};
00232 
00234 
00236 
00237 
00264     void iterate();
00265 
00291     void initialize(LOBPCGState<ScalarType,MV> newstate);
00292 
00296     void initialize();
00297 
00317     bool isInitialized() const;
00318 
00329     LOBPCGState<ScalarType,MV> getState() const;
00330 
00332 
00334 
00335 
00337     int getNumIters() const;
00338 
00340     void resetNumIters();
00341 
00349     Teuchos::RCP<const MV> getRitzVectors();
00350 
00356     std::vector<Value<ScalarType> > getRitzValues();
00357 
00365     std::vector<int> getRitzIndex();
00366 
00367 
00373     std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> getResNorms();
00374 
00375 
00381     std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> getRes2Norms();
00382 
00383 
00391     std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> getRitzRes2Norms();
00392 
00393 
00402     int getCurSubspaceDim() const;
00403 
00407     int getMaxSubspaceDim() const;
00408 
00410 
00412 
00413 
00415     void setStatusTest(Teuchos::RCP<StatusTest<ScalarType,MV,OP> > test);
00416 
00418     Teuchos::RCP<StatusTest<ScalarType,MV,OP> > getStatusTest() const;
00419 
00421     const Eigenproblem<ScalarType,MV,OP>& getProblem() const;
00422 
00423 
00432     void setBlockSize(int blockSize);
00433 
00434 
00436     int getBlockSize() const;
00437 
00438 
00450     void setAuxVecs(const Teuchos::Array<Teuchos::RCP<const MV> > &auxvecs);
00451 
00453     Teuchos::Array<Teuchos::RCP<const MV> > getAuxVecs() const;
00454 
00456 
00458 
00459 
00465     void setFullOrtho(bool fullOrtho);
00466 
00468     bool getFullOrtho() const;
00469     
00471     bool hasP();
00472 
00474     
00476 
00477 
00479     void currentStatus(std::ostream &os);
00480 
00482 
00483   private:
00484     //
00485     //
00486     //
00487     void setupViews();
00488     //
00489     // Convenience typedefs
00490     //
00491     typedef SolverUtils<ScalarType,MV,OP> Utils;
00492     typedef MultiVecTraits<ScalarType,MV> MVT;
00493     typedef OperatorTraits<ScalarType,MV,OP> OPT;
00494     typedef Teuchos::ScalarTraits<ScalarType> SCT;
00495     typedef typename SCT::magnitudeType MagnitudeType;
00496     const MagnitudeType ONE;  
00497     const MagnitudeType ZERO; 
00498     const MagnitudeType NANVAL;
00499     //
00500     // Internal structs
00501     //
00502     struct CheckList {
00503       bool checkX, checkMX, checkKX;
00504       bool checkH, checkMH;
00505       bool checkP, checkMP, checkKP;
00506       bool checkR, checkQ;
00507       CheckList() : checkX(false),checkMX(false),checkKX(false),
00508                     checkH(false),checkMH(false),
00509                     checkP(false),checkMP(false),checkKP(false),
00510                     checkR(false),checkQ(false) {};
00511     };
00512     //
00513     // Internal methods
00514     //
00515     std::string accuracyCheck(const CheckList &chk, const std::string &where) const;
00516     //
00517     // Classes inputed through constructor that define the eigenproblem to be solved.
00518     //
00519     const Teuchos::RCP<Eigenproblem<ScalarType,MV,OP> >     problem_;
00520     const Teuchos::RCP<SortManager<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> > sm_;
00521     const Teuchos::RCP<OutputManager<ScalarType> >          om_;
00522     Teuchos::RCP<StatusTest<ScalarType,MV,OP> >       tester_;
00523     const Teuchos::RCP<MatOrthoManager<ScalarType,MV,OP> >  orthman_;
00524     //
00525     // Information obtained from the eigenproblem
00526     //
00527     Teuchos::RCP<const OP> Op_;
00528     Teuchos::RCP<const OP> MOp_;
00529     Teuchos::RCP<const OP> Prec_;
00530     bool hasM_;
00531     //
00532     // Internal timers
00533     //
00534     Teuchos::RCP<Teuchos::Time> timerOp_, timerMOp_, timerPrec_,
00535                                 timerSort_, 
00536                                 timerLocalProj_, timerDS_,
00537                                 timerLocalUpdate_, timerCompRes_,
00538                                 timerOrtho_, timerInit_;
00539     //
00540     // Counters
00541     //
00542     // Number of operator applications
00543     int count_ApplyOp_, count_ApplyM_, count_ApplyPrec_;
00544 
00545     //
00546     // Algorithmic parameters.
00547     //
00548     // blockSize_ is the solver block size
00549     int blockSize_;
00550     //
00551     // fullOrtho_ dictates whether the orthogonalization procedures specified by Hetmaniuk and Lehoucq should
00552     // be activated (see citations at the top of this file)
00553     bool fullOrtho_;
00554 
00555     //
00556     // Current solver state
00557     //
00558     // initialized_ specifies that the basis vectors have been initialized and the iterate() routine
00559     // is capable of running; _initialize is controlled  by the initialize() member method
00560     // For the implications of the state of initialized_, please see documentation for initialize()
00561     bool initialized_;
00562     //
00563     // nevLocal_ reflects how much of the current basis is valid (0 <= nevLocal_ <= 3*blockSize_)
00564     // this tells us how many of the values in theta_ are valid Ritz values
00565     int nevLocal_;
00566     //
00567     // hasP_ tells us whether there is valid data in P (and KP,MP)
00568     bool hasP_;
00569     //
00570     // State Multivecs
00571     // V_, KV_ MV_  and  R_ are primary pointers to allocated multivectors
00572     Teuchos::RCP<MV> V_, KV_, MV_, R_;
00573     // the rest are multivector views into V_, KV_ and MV_
00574     Teuchos::RCP<MV> X_, KX_, MX_,
00575                      H_, KH_, MH_,
00576                      P_, KP_, MP_;
00577 
00578     //
00579     // if fullOrtho_ == true, then we must produce the following on every iteration:
00580     // [newX newP] = [X H P] [CX;CP]
00581     // the structure of [CX;CP] when using full orthogonalization does not allow us to 
00582     // do this in situ, and R_ does not have enough storage for newX and newP. therefore, 
00583     // we must allocate additional storage for this.
00584     // otherwise, when not using full orthogonalization, the structure
00585     // [newX newP] = [X H P] [CX1  0 ]
00586     //                       [CX2 CP2]  allows us to work using only R as work space
00587     //                       [CX3 CP3] 
00588     Teuchos::RCP<MV> tmpmvec_;        
00589     // 
00590     // auxiliary vectors
00591     Teuchos::Array<Teuchos::RCP<const MV> > auxVecs_;
00592     int numAuxVecs_;
00593     //
00594     // Number of iterations that have been performed.
00595     int iter_;
00596     // 
00597     // Current eigenvalues, residual norms
00598     std::vector<MagnitudeType> theta_, Rnorms_, R2norms_;
00599     // 
00600     // are the residual norms current with the residual?
00601     bool Rnorms_current_, R2norms_current_;
00602 
00603   };
00604 
00605 
00606 
00607 
00609   // Constructor
00610   template <class ScalarType, class MV, class OP>
00611   LOBPCG<ScalarType,MV,OP>::LOBPCG(
00612         const Teuchos::RCP<Eigenproblem<ScalarType,MV,OP> > &problem, 
00613         const Teuchos::RCP<SortManager<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> > &sorter,
00614         const Teuchos::RCP<OutputManager<ScalarType> > &printer,
00615         const Teuchos::RCP<StatusTest<ScalarType,MV,OP> > &tester,
00616         const Teuchos::RCP<MatOrthoManager<ScalarType,MV,OP> > &ortho,
00617         Teuchos::ParameterList &params
00618         ) :
00619     ONE(Teuchos::ScalarTraits<MagnitudeType>::one()),
00620     ZERO(Teuchos::ScalarTraits<MagnitudeType>::zero()),
00621     NANVAL(Teuchos::ScalarTraits<MagnitudeType>::nan()),
00622     // problem, tools
00623     problem_(problem), 
00624     sm_(sorter),
00625     om_(printer),
00626     tester_(tester),
00627     orthman_(ortho),
00628     // timers, counters
00629     timerOp_(Teuchos::TimeMonitor::getNewTimer("LOBPCG::Operation Op*x")),
00630     timerMOp_(Teuchos::TimeMonitor::getNewTimer("LOBPCG::Operation M*x")),
00631     timerPrec_(Teuchos::TimeMonitor::getNewTimer("LOBPCG::Operation Prec*x")),
00632     timerSort_(Teuchos::TimeMonitor::getNewTimer("LOBPCG::Sorting eigenvalues")),
00633     timerLocalProj_(Teuchos::TimeMonitor::getNewTimer("LOBPCG::Local projection")),
00634     timerDS_(Teuchos::TimeMonitor::getNewTimer("LOBPCG::Direct solve")),
00635     timerLocalUpdate_(Teuchos::TimeMonitor::getNewTimer("LOBPCG::Local update")),
00636     timerCompRes_(Teuchos::TimeMonitor::getNewTimer("LOBPCG::Computing residuals")),
00637     timerOrtho_(Teuchos::TimeMonitor::getNewTimer("LOBPCG::Orthogonalization")),
00638     timerInit_(Teuchos::TimeMonitor::getNewTimer("LOBPCG::Initialization")),
00639     count_ApplyOp_(0),
00640     count_ApplyM_(0),
00641     count_ApplyPrec_(0),
00642     // internal data
00643     blockSize_(0),
00644     fullOrtho_(params.get("Full Ortho", true)),
00645     initialized_(false),
00646     nevLocal_(0),
00647     hasP_(false),
00648     auxVecs_( Teuchos::Array<Teuchos::RCP<const MV> >(0) ), 
00649     numAuxVecs_(0),
00650     iter_(0),
00651     Rnorms_current_(false),
00652     R2norms_current_(false)
00653   {     
00654     TEST_FOR_EXCEPTION(problem_ == Teuchos::null,std::invalid_argument,
00655                        "Anasazi::LOBPCG::constructor: user passed null problem pointer.");
00656     TEST_FOR_EXCEPTION(sm_ == Teuchos::null,std::invalid_argument,
00657                        "Anasazi::LOBPCG::constructor: user passed null sort manager pointer.");
00658     TEST_FOR_EXCEPTION(om_ == Teuchos::null,std::invalid_argument,
00659                        "Anasazi::LOBPCG::constructor: user passed null output manager pointer.");
00660     TEST_FOR_EXCEPTION(tester_ == Teuchos::null,std::invalid_argument,
00661                        "Anasazi::LOBPCG::constructor: user passed null status test pointer.");
00662     TEST_FOR_EXCEPTION(orthman_ == Teuchos::null,std::invalid_argument,
00663                        "Anasazi::LOBPCG::constructor: user passed null orthogonalization manager pointer.");
00664     TEST_FOR_EXCEPTION(problem_->isProblemSet() == false, std::invalid_argument,
00665                        "Anasazi::LOBPCG::constructor: problem is not set.");
00666     TEST_FOR_EXCEPTION(problem_->isHermitian() == false, std::invalid_argument,
00667                        "Anasazi::LOBPCG::constructor: problem is not Hermitian; LOBPCG requires Hermitian problem.");
00668 
00669     // get the problem operators
00670     Op_   = problem_->getOperator();
00671     TEST_FOR_EXCEPTION(Op_ == Teuchos::null, std::invalid_argument,
00672                        "Anasazi::LOBPCG::constructor: problem provides no operator.");
00673     MOp_  = problem_->getM();
00674     Prec_ = problem_->getPrec();
00675     hasM_ = (MOp_ != Teuchos::null);
00676 
00677     // set the block size and allocate data
00678     int bs = params.get("Block Size", problem_->getNEV());
00679     setBlockSize(bs);
00680   }
00681 
00682 
00684   // Set the block size and make necessary adjustments.
00685   template <class ScalarType, class MV, class OP>
00686   void LOBPCG<ScalarType,MV,OP>::setBlockSize (int newBS) 
00687   {
00688     // time spent here counts towards timerInit_
00689     Teuchos::TimeMonitor lcltimer( *timerInit_ );
00690 
00691     // This routine only allocates space; it doesn't not perform any computation
00692     // if size is decreased, take the first newBS vectors of all and leave state as is
00693     // otherwise, grow/allocate space and set solver to unitialized
00694 
00695     Teuchos::RCP<const MV> tmp;
00696     // grab some Multivector to Clone
00697     // in practice, getInitVec() should always provide this, but it is possible to use a 
00698     // Eigenproblem with nothing in getInitVec() by manually initializing with initialize(); 
00699     // in case of that strange scenario, we will try to Clone from R_ because it is smaller 
00700     // than V_, and we don't want to keep V_ around longer than necessary
00701     if (blockSize_ > 0) {
00702       tmp = R_;
00703     }
00704     else {
00705       tmp = problem_->getInitVec();
00706       TEST_FOR_EXCEPTION(tmp == Teuchos::null,std::logic_error,
00707                          "Anasazi::LOBPCG::setBlockSize(): eigenproblem did not specify initial vectors to clone from.");
00708     }
00709     
00710     TEST_FOR_EXCEPTION(newBS <= 0 || newBS > MVT::GetVecLength(*tmp), std::invalid_argument, "Anasazi::LOBPCG::setBlockSize(): block size must be strictly positive.");
00711     if (newBS == blockSize_) {
00712       // do nothing
00713       return;
00714     }
00715     else if (newBS < blockSize_ && initialized_) {
00716       //
00717       // shrink vectors
00718 
00719       // release views so we can modify the bases
00720       X_ = Teuchos::null;
00721       KX_ = Teuchos::null;
00722       MX_ = Teuchos::null;
00723       H_ = Teuchos::null;
00724       KH_ = Teuchos::null;
00725       MH_ = Teuchos::null;
00726       P_ = Teuchos::null;
00727       KP_ = Teuchos::null;
00728       MP_ = Teuchos::null;
00729 
00730       // make new indices vectors
00731       std::vector<int> newind(newBS), oldind(newBS);
00732       for (int i=0; i<newBS; i++) {
00733         newind[i] = i;
00734         oldind[i] = i;
00735       }
00736 
00737       Teuchos::RCP<MV> newV, newMV, newKV, newR, src;
00738       // allocate R and newV
00739       newR = MVT::Clone(*tmp,newBS);
00740       newV = MVT::Clone(*tmp,newBS*3);
00741       newKV = MVT::Clone(*tmp,newBS*3);
00742       if (hasM_) {
00743         newMV = MVT::Clone(*tmp,newBS*3);
00744       }
00745 
00746       //
00747       // if we are initialized, we want to pull the data from V_ into newV:
00748       //           bs  |  bs  | bs 
00749       // newV  = [newX | **** |newP ]
00750       // newKV = [newKX| **** |newKP]
00751       // newMV = [newMX| **** |newMP]
00752       // where 
00753       //          oldbs   |  oldbs  |   oldbs   
00754       //  V_ = [newX  *** | ******* | newP  ***]
00755       // KV_ = [newKX *** | ******* | newKP ***]
00756       // MV_ = [newMX *** | ******* | newMP ***]
00757       //
00758       // we don't care to copy the data corresponding to H
00759       // we will not copy the M data if !hasM_, because it doesn't exist
00760       //
00761 
00762       // these are shrink operations which preserve their data
00763       theta_.resize(3*newBS);
00764       Rnorms_.resize(newBS);
00765       R2norms_.resize(newBS);
00766 
00767       // copy residual vectors: oldind,newind currently contains [0,...,newBS-1]
00768       src = MVT::CloneView(*R_,newind);
00769       MVT::SetBlock(*src,newind,*newR);
00770       // free old memory and point to new memory
00771       R_ = newR;
00772 
00773       // copy in order: newX newKX newMX, then newP newKP newMP
00774       // for  X: [0,bs-1] <-- [0,bs-1] 
00775       src = MVT::CloneView(*V_,oldind);
00776       MVT::SetBlock(*src,newind,*newV);
00777       src = MVT::CloneView(*KV_,oldind);
00778       MVT::SetBlock(*src,newind,*newKV);
00779       if (hasM_) {
00780         src = MVT::CloneView(*MV_,oldind);
00781         MVT::SetBlock(*src,newind,*newMV);
00782       }
00783       // for  P: [2*bs, 3*bs-1] <-- [2*oldbs, 2*oldbs+bs-1] 
00784       for (int i=0; i<newBS; i++) {
00785         newind[i] += 2*newBS;
00786         oldind[i] += 2*blockSize_;
00787       }
00788       src = MVT::CloneView(*V_,oldind);
00789       MVT::SetBlock(*src,newind,*newV);
00790       src = MVT::CloneView(*KV_,oldind);
00791       MVT::SetBlock(*src,newind,*newKV);
00792       if (hasM_) {
00793         src = MVT::CloneView(*MV_,oldind);
00794         MVT::SetBlock(*src,newind,*newMV);
00795       }
00796 
00797       // release temp view
00798       src = Teuchos::null;
00799 
00800       // release old allocations and point at new memory
00801       V_ = newV;
00802       KV_ = newKV;
00803       if (hasM_) {
00804         MV_ = newMV;
00805       }
00806       else {
00807         MV_ = V_;
00808       }
00809     }
00810     else {  
00811       // newBS > blockSize_  or  not initialized
00812       // this is also the scenario for our initial call to setBlockSize(), in the constructor
00813       initialized_ = false;
00814       hasP_ = false;
00815 
00816       // release views
00817       X_ = Teuchos::null;
00818       KX_ = Teuchos::null;
00819       MX_ = Teuchos::null;
00820       H_ = Teuchos::null;
00821       KH_ = Teuchos::null;
00822       MH_ = Teuchos::null;
00823       P_ = Teuchos::null;
00824       KP_ = Teuchos::null;
00825       MP_ = Teuchos::null;
00826 
00827       // free allocated storage
00828       R_ = Teuchos::null;
00829       V_ = Teuchos::null;
00830 
00831       // allocate scalar vectors
00832       theta_.resize(3*newBS,NANVAL);
00833       Rnorms_.resize(newBS,NANVAL);
00834       R2norms_.resize(newBS,NANVAL);
00835       
00836       // clone multivectors off of tmp
00837       R_ = MVT::Clone(*tmp,newBS);
00838       V_ = MVT::Clone(*tmp,newBS*3);
00839       KV_ = MVT::Clone(*tmp,newBS*3);
00840       if (hasM_) {
00841         MV_ = MVT::Clone(*tmp,newBS*3);
00842       }
00843       else {
00844         MV_ = V_;
00845       }
00846     }
00847 
00848     // allocate tmp space
00849     tmpmvec_ = Teuchos::null;
00850     if (fullOrtho_) {
00851       tmpmvec_ = MVT::Clone(*tmp,newBS);
00852     }
00853 
00854     // set new block size
00855     blockSize_ = newBS;
00856 
00857     // setup new views
00858     setupViews();
00859   }
00860 
00861 
00863   // Setup views into V,KV,MV
00864   template <class ScalarType, class MV, class OP>
00865   void LOBPCG<ScalarType,MV,OP>::setupViews() 
00866   {
00867     std::vector<int> ind(blockSize_);
00868 
00869     for (int i=0; i<blockSize_; i++) {
00870       ind[i] = i;
00871     }
00872     X_  = MVT::CloneView(*V_,ind);
00873     KX_ = MVT::CloneView(*KV_,ind);
00874     if (hasM_) {
00875       MX_ = MVT::CloneView(*MV_,ind);
00876     }
00877     else {
00878       MX_ = X_;
00879     }
00880 
00881     for (int i=0; i<blockSize_; i++) {
00882       ind[i] += blockSize_;
00883     }
00884     H_  = MVT::CloneView(*V_,ind);
00885     KH_ = MVT::CloneView(*KV_,ind);
00886     if (hasM_) {
00887       MH_ = MVT::CloneView(*MV_,ind);
00888     }
00889     else {
00890       MH_ = H_;
00891     }
00892 
00893     for (int i=0; i<blockSize_; i++) {
00894       ind[i] += blockSize_;
00895     }
00896     P_  = MVT::CloneView(*V_,ind);
00897     KP_ = MVT::CloneView(*KV_,ind);
00898     if (hasM_) {
00899       MP_ = MVT::CloneView(*MV_,ind);
00900     }
00901     else {
00902       MP_ = P_;
00903     }
00904   }
00905 
00906 
00908   // Set the auxiliary vectors
00909   template <class ScalarType, class MV, class OP>
00910   void LOBPCG<ScalarType,MV,OP>::setAuxVecs(const Teuchos::Array<Teuchos::RCP<const MV> > &auxvecs) {
00911     typedef typename Teuchos::Array<Teuchos::RCP<const MV> >::iterator tarcpmv;
00912 
00913     // set new auxiliary vectors
00914     auxVecs_ = auxvecs;
00915     
00916     numAuxVecs_ = 0;
00917     for (tarcpmv i=auxVecs_.begin(); i != auxVecs_.end(); i++) {
00918       numAuxVecs_ += MVT::GetNumberVecs(**i);
00919     }
00920     
00921     // If the solver has been initialized, X and P are not necessarily orthogonal to new auxiliary vectors
00922     if (numAuxVecs_ > 0 && initialized_) {
00923       initialized_ = false;
00924       hasP_ = false;
00925     }
00926 
00927     if (om_->isVerbosity( Debug ) ) {
00928       // Check almost everything here
00929       CheckList chk;
00930       chk.checkQ = true;
00931       om_->print( Debug, accuracyCheck(chk, ": in setAuxVecs()") );
00932     }
00933   }
00934 
00935 
00937   /* Initialize the state of the solver
00938    * 
00939    * POST-CONDITIONS:
00940    *
00941    * initialized_ == true
00942    * X is orthonormal, orthogonal to auxVecs_
00943    * KX = Op*X
00944    * MX = M*X if hasM_
00945    * theta_ contains Ritz values of X
00946    * R = KX - MX*diag(theta_)
00947    * if hasP() == true,
00948    *   P orthogonal to auxVecs_
00949    *   if fullOrtho_ == true,
00950    *     P orthonormal and orthogonal to X
00951    *   KP = Op*P
00952    *   MP = M*P
00953    */
00954   template <class ScalarType, class MV, class OP>
00955   void LOBPCG<ScalarType,MV,OP>::initialize(LOBPCGState<ScalarType,MV> newstate)
00956   {
00957     // NOTE: memory has been allocated by setBlockSize(). Use SetBlock below; do not Clone
00958     // NOTE: Overall time spent in this routine is counted to timerInit_; portions will also be counted towards other primitives
00959 
00960     Teuchos::TimeMonitor inittimer( *timerInit_ );
00961 
00962     std::vector<int> bsind(blockSize_);
00963     for (int i=0; i<blockSize_; i++) bsind[i] = i;
00964 
00965     // in LOBPCG, X (the subspace iterate) is primary
00966     // the order of dependence follows like so.
00967     // --init->                 X
00968     //    --op apply->          MX,KX
00969     //       --ritz analysis->  theta
00970     //          --optional->    P,MP,KP
00971     // 
00972     // if the user specifies all data for a level, we will accept it.
00973     // otherwise, we will generate the whole level, and all subsequent levels.
00974     //
00975     // the data members are ordered based on dependence, and the levels are
00976     // partitioned according to the amount of work required to produce the
00977     // items in a level.
00978     //
00979     // inconsitent multivectors widths and lengths will not be tolerated, and
00980     // will be treated with exceptions.
00981 
00982     // set up X, KX, MX: get them from "state" if user specified them
00983 
00984     //----------------------------------------
00985     // set up X, MX, KX
00986     //----------------------------------------
00987     if (newstate.X != Teuchos::null) {
00988       TEST_FOR_EXCEPTION( MVT::GetVecLength(*newstate.X) != MVT::GetVecLength(*X_),
00989                           std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.X not correct." );
00990       // newstate.X must have blockSize_ vectors; any more will be ignored
00991       TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.X) < blockSize_,
00992                           std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.X must have at least block size vectors.");
00993 
00994       // put X data in X_
00995       MVT::SetBlock(*newstate.X,bsind,*X_);
00996 
00997       // put MX data in MX_
00998       if (hasM_) {
00999         if (newstate.MX != Teuchos::null) {
01000           TEST_FOR_EXCEPTION( MVT::GetVecLength(*newstate.MX) != MVT::GetVecLength(*MX_),
01001                               std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.MX not correct." );
01002           // newstate.MX must have blockSize_ vectors; any more will be ignored
01003           TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.MX) < blockSize_,
01004                               std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.MX must have at least block size vectors.");
01005           MVT::SetBlock(*newstate.MX,bsind,*MX_);
01006         }
01007         else {
01008           // user didn't specify MX, compute it
01009           {
01010             Teuchos::TimeMonitor lcltimer( *timerMOp_ );
01011             OPT::Apply(*MOp_,*X_,*MX_);
01012             count_ApplyM_ += blockSize_;
01013           }
01014           // we generated MX; we will generate R as well
01015           newstate.R = Teuchos::null;
01016         }
01017       }
01018   
01019       // put data in KX
01020       if (newstate.KX != Teuchos::null) {
01021         TEST_FOR_EXCEPTION( MVT::GetVecLength(*newstate.KX) != MVT::GetVecLength(*KX_),
01022                             std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.KX not correct." );
01023         // newstate.KX must have blockSize_ vectors; any more will be ignored
01024         TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.KX) < blockSize_,
01025                             std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.KX must have at least block size vectors.");
01026         MVT::SetBlock(*newstate.KX,bsind,*KX_);
01027       }
01028       else {
01029         // user didn't specify KX, compute it
01030         {
01031           Teuchos::TimeMonitor lcltimer( *timerOp_ );
01032           OPT::Apply(*Op_,*X_,*KX_);
01033           count_ApplyOp_ += blockSize_;
01034         }
01035         // we generated KX; we will generate R as well
01036         newstate.R = Teuchos::null;
01037       }
01038     }
01039     else {
01040       // user did not specify X
01041       // we will initialize X, compute KX and MX, and compute R
01042       //
01043       // clear state so we won't use any data from it below
01044       newstate.P  = Teuchos::null;
01045       newstate.KP = Teuchos::null;
01046       newstate.MP = Teuchos::null;
01047       newstate.R  = Teuchos::null;
01048       newstate.T  = Teuchos::null;
01049 
01050       // generate a basis and projectAndNormalize
01051       Teuchos::RCP<const MV> ivec = problem_->getInitVec();
01052       TEST_FOR_EXCEPTION(ivec == Teuchos::null,std::logic_error,
01053                          "Anasazi::LOBPCG::initialize(): Eigenproblem did not specify initial vectors to clone from.");
01054 
01055       int initSize = MVT::GetNumberVecs(*ivec);
01056       if (initSize > blockSize_) {
01057         // we need only the first blockSize_ vectors from ivec; get a view of them
01058         initSize = blockSize_;
01059         std::vector<int> ind(blockSize_);
01060         for (int i=0; i<blockSize_; i++) ind[i] = i;
01061         ivec = MVT::CloneView(*ivec,ind);
01062       }
01063 
01064       // assign ivec to first part of X_ 
01065       if (initSize > 0) {
01066         std::vector<int> ind(initSize);
01067         for (int i=0; i<initSize; i++) ind[i] = i;
01068         MVT::SetBlock(*ivec,ind,*X_);
01069       }
01070       // fill the rest of X_ with random
01071       if (blockSize_ > initSize) {
01072         std::vector<int> ind(blockSize_ - initSize);
01073         for (int i=0; i<blockSize_ - initSize; i++) ind[i] = initSize + i;
01074         Teuchos::RCP<MV> rX = MVT::CloneView(*X_,ind);
01075         MVT::MvRandom(*rX);
01076         rX = Teuchos::null;
01077       }
01078 
01079       // put data in MX
01080       if (hasM_) {
01081         Teuchos::TimeMonitor lcltimer( *timerMOp_ );
01082         OPT::Apply(*MOp_,*X_,*MX_);
01083         count_ApplyM_ += blockSize_;
01084       }
01085   
01086       // remove auxVecs from X_ and normalize it
01087       if (numAuxVecs_ > 0) {
01088         Teuchos::TimeMonitor lcltimer( *timerOrtho_ );
01089         Teuchos::Array<Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> > > dummy;
01090         int rank = orthman_->projectAndNormalizeMat(*X_,auxVecs_,dummy,Teuchos::null,MX_);
01091         TEST_FOR_EXCEPTION(rank != blockSize_, LOBPCGInitFailure,
01092                            "Anasazi::LOBPCG::initialize(): Couldn't generate initial basis of full rank.");
01093       }
01094       else {
01095         Teuchos::TimeMonitor lcltimer( *timerOrtho_ );
01096         int rank = orthman_->normalizeMat(*X_,Teuchos::null,MX_);
01097         TEST_FOR_EXCEPTION(rank != blockSize_, LOBPCGInitFailure,
01098                            "Anasazi::LOBPCG::initialize(): Couldn't generate initial basis of full rank.");
01099       }
01100 
01101       // put data in KX
01102       {
01103         Teuchos::TimeMonitor lcltimer( *timerOp_ );
01104         OPT::Apply(*Op_,*X_,*KX_);
01105         count_ApplyOp_ += blockSize_;
01106       }
01107     } // end if (newstate.X != Teuchos::null)
01108 
01109 
01110     //----------------------------------------
01111     // set up Ritz values
01112     //----------------------------------------
01113     theta_.resize(3*blockSize_,NANVAL);
01114     if (newstate.T != Teuchos::null) {
01115       TEST_FOR_EXCEPTION( (signed int)(newstate.T->size()) < blockSize_,
01116                           std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.T must contain at least block size Ritz values.");
01117       for (int i=0; i<blockSize_; i++) {
01118         theta_[i] = (*newstate.T)[i];
01119       }
01120     }
01121     else {
01122       // get ritz vecs/vals
01123       Teuchos::SerialDenseMatrix<int,ScalarType> KK(blockSize_,blockSize_),
01124                                                  MM(blockSize_,blockSize_),
01125                                                   S(blockSize_,blockSize_);
01126       {
01127         Teuchos::TimeMonitor lcltimer( *timerLocalProj_ );
01128         // project K
01129         MVT::MvTransMv(ONE,*X_,*KX_,KK);
01130         // project M
01131         MVT::MvTransMv(ONE,*X_,*MX_,MM);
01132         nevLocal_ = blockSize_;
01133       }
01134 
01135       // solve the projected problem
01136       {
01137         Teuchos::TimeMonitor lcltimer( *timerDS_ );
01138         Utils::directSolver(blockSize_, KK, Teuchos::rcpFromRef(MM), S, theta_, nevLocal_, 1);
01139         TEST_FOR_EXCEPTION(nevLocal_ != blockSize_,LOBPCGInitFailure,
01140                            "Anasazi::LOBPCG::initialize(): Initial Ritz analysis did not produce enough Ritz pairs to initialize algorithm.");
01141       }
01142 
01143       // We only have blockSize_ ritz pairs, ergo we do not need to select.
01144       // However, we still require them to be ordered correctly
01145       {
01146         Teuchos::TimeMonitor lcltimer( *timerSort_ );
01147 
01148         std::vector<int> order(blockSize_);
01149         // 
01150         // sort the first blockSize_ values in theta_
01151         sm_->sort(theta_, Teuchos::rcpFromRef(order), blockSize_);   // don't catch exception
01152         //
01153         // apply the same ordering to the primitive ritz vectors
01154         Utils::permuteVectors(order,S);
01155       }
01156 
01157       // update the solution, use R for storage
01158       {
01159         Teuchos::TimeMonitor lcltimer( *timerLocalUpdate_ );
01160         // X <- X*S
01161         MVT::MvAddMv( ONE, *X_, ZERO, *X_, *R_ );        
01162         MVT::MvTimesMatAddMv( ONE, *R_, S, ZERO, *X_ );
01163         // KX <- KX*S
01164         MVT::MvAddMv( ONE, *KX_, ZERO, *KX_, *R_ );        
01165         MVT::MvTimesMatAddMv( ONE, *R_, S, ZERO, *KX_ );
01166         if (hasM_) {
01167           // MX <- MX*S
01168           MVT::MvAddMv( ONE, *MX_, ZERO, *MX_, *R_ );        
01169           MVT::MvTimesMatAddMv( ONE, *R_, S, ZERO, *MX_ );
01170         }
01171       }
01172     }
01173 
01174     //----------------------------------------
01175     // compute R
01176     //----------------------------------------
01177     if (newstate.R != Teuchos::null) {
01178       TEST_FOR_EXCEPTION( MVT::GetVecLength(*newstate.R) != MVT::GetVecLength(*R_),
01179                           std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.R not correct." );
01180       TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.R) < blockSize_,
01181                           std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.R must have blockSize number of vectors." );
01182       MVT::SetBlock(*newstate.R,bsind,*R_);
01183     }
01184     else {
01185       Teuchos::TimeMonitor lcltimer( *timerCompRes_ );
01186       // form R <- KX - MX*T
01187       MVT::MvAddMv(ZERO,*KX_,ONE,*KX_,*R_);
01188       Teuchos::SerialDenseMatrix<int,ScalarType> T(blockSize_,blockSize_);
01189       for (int i=0; i<blockSize_; i++) T(i,i) = theta_[i];
01190       MVT::MvTimesMatAddMv(-ONE,*MX_,T,ONE,*R_);
01191     }
01192 
01193     // R has been updated; mark the norms as out-of-date
01194     Rnorms_current_ = false;
01195     R2norms_current_ = false;
01196   
01197     // put data in P,KP,MP: P is not used to set theta
01198     if (newstate.P != Teuchos::null) {
01199       TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.P) < blockSize_ ,
01200                           std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.P must have blockSize number of vectors." );
01201       TEST_FOR_EXCEPTION( MVT::GetVecLength(*newstate.P) != MVT::GetVecLength(*P_),
01202                           std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.P not correct." );
01203       hasP_ = true;
01204 
01205       // set P_
01206       MVT::SetBlock(*newstate.P,bsind,*P_);
01207 
01208       // set/compute KP_
01209       if (newstate.KP != Teuchos::null) {
01210         TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.KP) < blockSize_,
01211                             std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.KP must have blockSize number of vectors." );
01212         TEST_FOR_EXCEPTION( MVT::GetVecLength(*newstate.KP) != MVT::GetVecLength(*KP_),
01213                             std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.KP not correct." );
01214         MVT::SetBlock(*newstate.KP,bsind,*KP_);
01215       }
01216       else {
01217         Teuchos::TimeMonitor lcltimer( *timerOp_ );
01218         OPT::Apply(*Op_,*P_,*KP_);
01219         count_ApplyOp_ += blockSize_;
01220       }
01221 
01222       // set/compute MP_
01223       if (hasM_) {
01224         if (newstate.MP != Teuchos::null) {
01225           TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.MP) < blockSize_,
01226                               std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.MP must have blockSize number of vectors." );
01227           TEST_FOR_EXCEPTION( MVT::GetVecLength(*newstate.MP) != MVT::GetVecLength(*MP_),
01228                               std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.MP not correct." );
01229           MVT::SetBlock(*newstate.MP,bsind,*MP_);
01230         }
01231         else {
01232           Teuchos::TimeMonitor lcltimer( *timerMOp_ );
01233           OPT::Apply(*MOp_,*P_,*MP_);
01234           count_ApplyM_ += blockSize_;
01235         }
01236       }
01237     }
01238     else {
01239       hasP_ = false;
01240     }
01241 
01242     // finally, we are initialized
01243     initialized_ = true;
01244 
01245     if (om_->isVerbosity( Debug ) ) {
01246       // Check almost everything here
01247       CheckList chk;
01248       chk.checkX = true;
01249       chk.checkKX = true;
01250       chk.checkMX = true;
01251       chk.checkP = true;
01252       chk.checkKP = true;
01253       chk.checkMP = true;
01254       chk.checkR = true;
01255       chk.checkQ = true;
01256       om_->print( Debug, accuracyCheck(chk, ": after initialize()") );
01257     }
01258 
01259   }
01260 
01261   template <class ScalarType, class MV, class OP>
01262   void LOBPCG<ScalarType,MV,OP>::initialize()
01263   {
01264     LOBPCGState<ScalarType,MV> empty;
01265     initialize(empty);
01266   }
01267 
01268 
01270   // Instruct the solver to use full orthogonalization
01271   template <class ScalarType, class MV, class OP>
01272   void LOBPCG<ScalarType,MV,OP>::setFullOrtho (bool fullOrtho)
01273   {
01274     if ( fullOrtho_ == true || initialized_ == false || fullOrtho == fullOrtho_ ) {
01275       // state is already orthogonalized or solver is not initialized
01276       fullOrtho_ = fullOrtho;
01277     }
01278     else {
01279       // solver is initialized, state is not fully orthogonalized, and user has requested full orthogonalization
01280       // ergo, we must throw away data in P
01281       fullOrtho_ = true;
01282       hasP_ = false;
01283     }
01284 
01285     // the user has called setFullOrtho, so the class has been instantiated
01286     // ergo, the data has already been allocated, i.e., setBlockSize() has been called
01287     // if it is already allocated, it should be the proper size
01288     if (fullOrtho_ && tmpmvec_ == Teuchos::null) {
01289       // allocated the workspace
01290       tmpmvec_ = MVT::Clone(*X_,blockSize_);
01291     }
01292     else if (fullOrtho_==false) {
01293       // free the workspace
01294       tmpmvec_ = Teuchos::null;
01295     }
01296   }
01297 
01298 
01300   // Perform LOBPCG iterations until the StatusTest tells us to stop.
01301   template <class ScalarType, class MV, class OP>
01302   void LOBPCG<ScalarType,MV,OP>::iterate () 
01303   {
01304     //
01305     // Allocate/initialize data structures
01306     //
01307     if (initialized_ == false) {
01308       initialize();
01309     }
01310 
01311     //
01312     // Miscellaneous definitions
01313     const int oneBlock    =   blockSize_;
01314     const int twoBlocks   = 2*blockSize_;
01315     const int threeBlocks = 3*blockSize_;
01316 
01317     std::vector<int> indblock1(blockSize_), indblock2(blockSize_), indblock3(blockSize_);
01318     for (int i=0; i<blockSize_; i++) {
01319       indblock1[i] = i;
01320       indblock2[i] = i +  blockSize_;
01321       indblock3[i] = i + 2*blockSize_;
01322     }
01323 
01324     //
01325     // Define dense projected/local matrices
01326     //   KK = Local stiffness matrix               (size: 3*blockSize_ x 3*blockSize_)
01327     //   MM = Local mass matrix                    (size: 3*blockSize_ x 3*blockSize_)
01328     //    S = Local eigenvectors                   (size: 3*blockSize_ x 3*blockSize_)
01329     Teuchos::SerialDenseMatrix<int,ScalarType> KK( threeBlocks, threeBlocks ), 
01330                                                MM( threeBlocks, threeBlocks ),
01331                                                 S( threeBlocks, threeBlocks );
01332 
01333     while (tester_->checkStatus(this) != Passed) {
01334 
01335       // Print information on current status
01336       if (om_->isVerbosity(Debug)) {
01337         currentStatus( om_->stream(Debug) );
01338       }
01339       else if (om_->isVerbosity(IterationDetails)) {
01340         currentStatus( om_->stream(IterationDetails) );
01341       }
01342 
01343       // increment iteration counter
01344       iter_++;
01345 
01346       // Apply the preconditioner on the residuals: H <- Prec*R
01347       if (Prec_ != Teuchos::null) {
01348         Teuchos::TimeMonitor lcltimer( *timerPrec_ );
01349         OPT::Apply( *Prec_, *R_, *H_ );   // don't catch the exception
01350         count_ApplyPrec_ += blockSize_;
01351       }
01352       else {
01353         MVT::MvAddMv(ONE,*R_,ZERO,*R_,*H_);
01354       }
01355 
01356       // Apply the mass matrix on H
01357       if (hasM_) {
01358         Teuchos::TimeMonitor lcltimer( *timerMOp_ );
01359         OPT::Apply( *MOp_, *H_, *MH_);    // don't catch the exception
01360         count_ApplyM_ += blockSize_;
01361       }
01362 
01363       // orthogonalize H against the auxiliary vectors
01364       // optionally: orthogonalize H against X and P ([X P] is already orthonormal)
01365       Teuchos::Array<Teuchos::RCP<const MV> > Q;
01366       Q = auxVecs_;
01367       if (fullOrtho_) {
01368         // X and P are not contiguous, so there is not much point in putting them under 
01369         // a single multivector view
01370         Q.push_back(X_);
01371         if (hasP_) {
01372           Q.push_back(P_);
01373         }
01374       }
01375       {
01376         Teuchos::TimeMonitor lcltimer( *timerOrtho_ );
01377         Teuchos::Array<Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> > > dummyC = 
01378           Teuchos::tuple<Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> > >(Teuchos::null);
01379         int rank = orthman_->projectAndNormalizeMat(*H_,Q,dummyC,Teuchos::null,MH_);
01380         // our views are currently in place; it is safe to throw an exception
01381         TEST_FOR_EXCEPTION(rank != blockSize_,LOBPCGOrthoFailure,
01382                            "Anasazi::LOBPCG::iterate(): unable to compute orthonormal basis for H.");
01383       }
01384 
01385       if (om_->isVerbosity( Debug ) ) {
01386         CheckList chk;
01387         chk.checkH = true;
01388         chk.checkMH = true;
01389         om_->print( Debug, accuracyCheck(chk, ": after ortho H") );
01390       }
01391       else if (om_->isVerbosity( OrthoDetails ) ) {
01392         CheckList chk;
01393         chk.checkH = true;
01394         chk.checkMH = true;
01395         om_->print( OrthoDetails, accuracyCheck(chk,": after ortho H") );
01396       }
01397 
01398       // Apply the stiffness matrix to H
01399       {
01400         Teuchos::TimeMonitor lcltimer( *timerOp_ );
01401         OPT::Apply( *Op_, *H_, *KH_);   // don't catch the exception
01402         count_ApplyOp_ += blockSize_;
01403       }
01404 
01405       if (hasP_) {
01406         nevLocal_ = threeBlocks;
01407       }
01408       else {
01409         nevLocal_ = twoBlocks;
01410       }
01411 
01412       //
01413       // we need bases: [X H P] and [H P] (only need the latter if fullOrtho == false)
01414       // we need to perform the following operations:
01415       //    X' [KX KH KP]
01416       //    X' [MX MH MP]
01417       //    H' [KH KP]
01418       //    H' [MH MP]
01419       //    P' [KP]
01420       //    P' [MP]
01421       //    [X H P] CX
01422       //    [X H P] CP    if  fullOrtho
01423       //      [H P] CP    if !fullOrtho
01424       //
01425       // since M[X H P] is potentially the same memory as [X H P], and 
01426       // because we are not allowed to have overlapping non-const views of 
01427       // a multivector, we will now abandon our non-const views in favor of 
01428       // const views
01429       //
01430       X_ = Teuchos::null;
01431       KX_ = Teuchos::null;
01432       MX_ = Teuchos::null;
01433       H_ = Teuchos::null;
01434       KH_ = Teuchos::null;
01435       MH_ = Teuchos::null;
01436       P_ = Teuchos::null;
01437       KP_ = Teuchos::null;
01438       MP_ = Teuchos::null;
01439       Teuchos::RCP<const MV> cX, cH, cXHP, cHP, cK_XHP, cK_HP, cM_XHP, cM_HP, cP, cK_P, cM_P;
01440       {
01441         cX = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(V_),indblock1);
01442         cH = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(V_),indblock2);
01443 
01444         std::vector<int> indXHP(nevLocal_);
01445         for (int i=0; i<nevLocal_; i++) {
01446           indXHP[i] = i;
01447         }
01448         cXHP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(V_),indXHP);
01449         cK_XHP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(KV_),indXHP);
01450         if (hasM_) {
01451           cM_XHP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(MV_),indXHP);
01452         }
01453         else {
01454           cM_XHP = cXHP;
01455         }
01456 
01457         std::vector<int> indHP(nevLocal_-blockSize_);
01458         for (int i=blockSize_; i<nevLocal_; i++) {
01459           indHP[i-blockSize_] = i;
01460         }
01461         cHP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(V_),indHP);
01462         cK_HP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(KV_),indHP);
01463         if (hasM_) {
01464           cM_HP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(MV_),indHP);
01465         }
01466         else {
01467           cM_HP = cHP;
01468         }
01469 
01470         if (nevLocal_ == threeBlocks) {
01471           cP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(V_),indblock3);
01472           cK_P = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(KV_),indblock3);
01473           if (hasM_) {
01474             cM_P = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(MV_),indblock3);
01475           }
01476           else {
01477             cM_P = cP;
01478           }
01479         }
01480       }
01481 
01482       //
01483       //----------------------------------------
01484       // Form "local" mass and stiffness matrices
01485       //----------------------------------------
01486       {
01487         // We will form only the block upper triangular part of 
01488         // [X H P]' K [X H P]  and  [X H P]' M [X H P]
01489         // Get the necessary views into KK and MM:
01490         //      [--K1--]        [--M1--]
01491         // KK = [  -K2-]   MM = [  -M2-]
01492         //      [    K3]        [    M3]
01493         // 
01494         // It is okay to declare a zero-area view of a Teuchos::SerialDenseMatrix
01495         //
01496         Teuchos::SerialDenseMatrix<int,ScalarType> 
01497           K1(Teuchos::View,KK,blockSize_,nevLocal_             ,0*blockSize_,0*blockSize_),
01498           K2(Teuchos::View,KK,blockSize_,nevLocal_-1*blockSize_,1*blockSize_,1*blockSize_),
01499           K3(Teuchos::View,KK,blockSize_,nevLocal_-2*blockSize_,2*blockSize_,2*blockSize_),
01500           M1(Teuchos::View,MM,blockSize_,nevLocal_             ,0*blockSize_,0*blockSize_),
01501           M2(Teuchos::View,MM,blockSize_,nevLocal_-1*blockSize_,1*blockSize_,1*blockSize_),
01502           M3(Teuchos::View,MM,blockSize_,nevLocal_-2*blockSize_,2*blockSize_,2*blockSize_);
01503         {
01504           Teuchos::TimeMonitor lcltimer( *timerLocalProj_ );
01505           MVT::MvTransMv( ONE, *cX, *cK_XHP, K1 );
01506           MVT::MvTransMv( ONE, *cX, *cM_XHP, M1 );
01507           MVT::MvTransMv( ONE, *cH, *cK_HP , K2 );
01508           MVT::MvTransMv( ONE, *cH, *cM_HP , M2 );
01509           if (nevLocal_ == threeBlocks) {
01510             MVT::MvTransMv( ONE, *cP, *cK_P, K3 );
01511             MVT::MvTransMv( ONE, *cP, *cM_P, M3 );
01512           }
01513         }
01514       }
01515       // below, we only need bases [X H P] and [H P] and friends
01516       // furthermore, we only need [H P] and friends if fullOrtho == false
01517       // clear the others now
01518       cX   = Teuchos::null;
01519       cH   = Teuchos::null;
01520       cP   = Teuchos::null;
01521       cK_P = Teuchos::null;
01522       cM_P = Teuchos::null;
01523       if (fullOrtho_ == true) {
01524         cHP = Teuchos::null;
01525         cK_HP = Teuchos::null;
01526         cM_HP = Teuchos::null;
01527       }
01528 
01529       //
01530       //---------------------------------------------------
01531       // Perform a spectral decomposition of (KK,MM)
01532       //---------------------------------------------------
01533       //
01534       // Get pointers to relevant part of KK, MM and S for Rayleigh-Ritz analysis
01535       Teuchos::SerialDenseMatrix<int,ScalarType> lclKK(Teuchos::View,KK,nevLocal_,nevLocal_), 
01536                                                  lclMM(Teuchos::View,MM,nevLocal_,nevLocal_),
01537                                                   lclS(Teuchos::View, S,nevLocal_,nevLocal_);
01538       {
01539         Teuchos::TimeMonitor lcltimer( *timerDS_ );
01540         int localSize = nevLocal_;
01541         Utils::directSolver(localSize, lclKK, Teuchos::rcpFromRef(lclMM), lclS, theta_, nevLocal_, 0);
01542         // localSize tells directSolver() how big KK,MM are
01543         // however, directSolver() may choose to use only the principle submatrices of KK,MM 
01544         // because of loss of MM-orthogonality in the projected eigenvectors
01545         // nevLocal_ tells us how much it used, telling us the effective localSize 
01546         // (i.e., how much of KK,MM used by directSolver)
01547         // we will not tolerate any indefiniteness, and will throw an exception if it was 
01548         // detected by directSolver
01549         //
01550         if (nevLocal_ != localSize) {
01551           // before throwing the exception, and thereby leaving iterate(), setup the views again
01552           // first, clear the const views
01553           cXHP   = Teuchos::null;
01554           cK_XHP = Teuchos::null;
01555           cM_XHP = Teuchos::null;
01556           cHP    = Teuchos::null;
01557           cK_HP  = Teuchos::null;
01558           cM_HP  = Teuchos::null;
01559           setupViews();
01560         }
01561         TEST_FOR_EXCEPTION(nevLocal_ != localSize, LOBPCGRitzFailure, 
01562             "Anasazi::LOBPCG::iterate(): indefiniteness detected in projected mass matrix." );
01563       }
01564 
01565       //
01566       //---------------------------------------------------
01567       // Sort the ritz values using the sort manager
01568       //---------------------------------------------------
01569       Teuchos::LAPACK<int,ScalarType> lapack;
01570       Teuchos::BLAS<int,ScalarType> blas;
01571       {
01572         Teuchos::TimeMonitor lcltimer( *timerSort_ );
01573 
01574         std::vector<int> order(nevLocal_);
01575         // 
01576         // Sort the first nevLocal_ values in theta_
01577         sm_->sort(theta_, Teuchos::rcpFromRef(order), nevLocal_);   // don't catch exception
01578         //
01579         // Sort the primitive ritz vectors
01580         Utils::permuteVectors(order,lclS);
01581       }
01582 
01583       //
01584       //----------------------------------------
01585       // Compute coefficients for X and P under [X H P]
01586       //----------------------------------------
01587       // Before computing X,P, optionally perform orthogonalization per Hetmaniuk,Lehoucq paper
01588       // CX will be the coefficients of [X,H,P] for new X, CP for new P
01589       // The paper suggests orthogonalizing CP against CX and orthonormalizing CP, w.r.t. MM
01590       // Here, we will also orthonormalize CX.
01591       // This is accomplished using the Cholesky factorization of [CX CP]^H lclMM [CX CP]
01592       Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> > CX, CP;
01593       if (fullOrtho_) {
01594         // build orthonormal basis for (  0  ) that is MM orthogonal to ( S11 )
01595         //                             ( S21 )                          ( S21 )
01596         //                             ( S31 )                          ( S31 )
01597         // Do this using Cholesky factorization of ( S11  0  )
01598         //                                         ( S21 S21 )
01599         //                                         ( S31 S31 )
01600         //           ( S11  0  )
01601         // Build C = ( S21 S21 )
01602         //           ( S31 S31 )
01603         Teuchos::SerialDenseMatrix<int,ScalarType> C(nevLocal_,twoBlocks),
01604                                                 MMC(nevLocal_,twoBlocks),
01605                                                 CMMC(twoBlocks  ,twoBlocks);
01606 
01607         // first block of rows: ( S11 0 )
01608         for (int j=0; j<oneBlock; j++) {
01609           for (int i=0; i<oneBlock; i++) {
01610             // CX
01611             C(i,j) = lclS(i,j);
01612             // CP
01613             C(i,j+oneBlock) = ZERO;
01614           }
01615         }
01616         // second block of rows: (S21 S21)
01617         for (int j=0; j<oneBlock; j++) {
01618           for (int i=oneBlock; i<twoBlocks; i++) {
01619             // CX
01620             C(i,j)          = lclS(i,j);
01621             // CP
01622             C(i,j+oneBlock) = lclS(i,j);
01623           }
01624         }
01625         // third block of rows
01626         if (nevLocal_ == threeBlocks) {
01627           for (int j=0; j<oneBlock; j++) {
01628             for (int i=twoBlocks; i<threeBlocks; i++) {
01629               // CX
01630               C(i,j)          = lclS(i,j);
01631               // CP
01632               C(i,j+oneBlock) = lclS(i,j);
01633             }
01634           }
01635         }
01636 
01637         // compute MMC = lclMM*C
01638         {
01639           int teuchosret;
01640           teuchosret = MMC.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,ONE,lclMM,C,ZERO);
01641           TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,
01642               "Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply");
01643           // compute CMMC = C^H*MMC == C^H*lclMM*C
01644           teuchosret = CMMC.multiply(Teuchos::CONJ_TRANS,Teuchos::NO_TRANS,ONE,C,MMC,ZERO);
01645           TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,
01646               "Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply");
01647         }
01648 
01649         // compute R (cholesky) of CMMC
01650         {
01651           int info;
01652           lapack.POTRF('U',twoBlocks,CMMC.values(),CMMC.stride(),&info);
01653           // our views ARE NOT currently in place; we must reestablish them before throwing an exception
01654           if (info != 0) {
01655             cXHP = Teuchos::null;
01656             cHP = Teuchos::null;
01657             cK_XHP = Teuchos::null;
01658             cK_HP = Teuchos::null;
01659             cM_XHP = Teuchos::null;
01660             cM_HP = Teuchos::null;
01661             setupViews();
01662           }
01663           TEST_FOR_EXCEPTION(info != 0, LOBPCGOrthoFailure, 
01664               "Anasazi::LOBPCG::iterate(): Cholesky factorization failed during full orthogonalization.");
01665         }
01666         // compute C = C inv(R)
01667         blas.TRSM(Teuchos::RIGHT_SIDE,Teuchos::UPPER_TRI,Teuchos::NO_TRANS,Teuchos::NON_UNIT_DIAG,
01668                   nevLocal_,twoBlocks,ONE,CMMC.values(),CMMC.stride(),C.values(),C.stride());
01669         // put C(:,0:oneBlock-1) into CX
01670         CX = Teuchos::rcp( new Teuchos::SerialDenseMatrix<int,ScalarType>(Teuchos::Copy,C,nevLocal_,oneBlock,0,0) );
01671         // put C(:,oneBlock:twoBlocks-1) into CP
01672         CP = Teuchos::rcp( new Teuchos::SerialDenseMatrix<int,ScalarType>(Teuchos::Copy,C,nevLocal_,oneBlock,0,oneBlock) );
01673 
01674         // check the results
01675         if (om_->isVerbosity( Debug ) ) {
01676           Teuchos::SerialDenseMatrix<int,ScalarType> tmp1(nevLocal_,oneBlock),
01677                                                      tmp2(oneBlock,oneBlock);
01678           MagnitudeType tmp;
01679           int teuchosret;
01680           std::stringstream os;
01681           os.precision(2);
01682           os.setf(std::ios::scientific, std::ios::floatfield);
01683 
01684           os << " Checking Full Ortho: iteration " << iter_ << std::endl;
01685 
01686           // check CX^T MM CX == I
01687           // compute tmp1 = MM*CX
01688           teuchosret = tmp1.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,ONE,lclMM,*CX,ZERO);
01689           TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,
01690               "Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply");
01691           // compute tmp2 = CX^H*tmp1 == CX^H*MM*CX
01692           teuchosret = tmp2.multiply(Teuchos::CONJ_TRANS,Teuchos::NO_TRANS,ONE,*CX,tmp1,ZERO);
01693           TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,
01694               "Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply");
01695           // subtrace tmp2 - I == CX^H * MM * CX - I
01696           for (int i=0; i<oneBlock; i++) tmp2(i,i) -= ONE;
01697           tmp = tmp2.normFrobenius();          
01698           os << " >> Error in CX^H MM CX == I : " << tmp << std::endl;
01699 
01700           // check CP^T MM CP == I
01701           // compute tmp1 = MM*CP
01702           teuchosret = tmp1.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,ONE,lclMM,*CP,ZERO);
01703           TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,
01704               "Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply");
01705           // compute tmp2 = CP^H*tmp1 == CP^H*MM*CP
01706           teuchosret = tmp2.multiply(Teuchos::CONJ_TRANS,Teuchos::NO_TRANS,ONE,*CP,tmp1,ZERO);
01707           TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,
01708               "Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply");
01709           // subtrace tmp2 - I == CP^H * MM * CP - I
01710           for (int i=0; i<oneBlock; i++) tmp2(i,i) -= ONE;
01711           tmp = tmp2.normFrobenius();          
01712           os << " >> Error in CP^H MM CP == I : " << tmp << std::endl;
01713 
01714           // check CX^T MM CP == 0
01715           // compute tmp1 = MM*CP
01716           teuchosret = tmp1.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,ONE,lclMM,*CP,ZERO);
01717           TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,"Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply");
01718           // compute tmp2 = CX^H*tmp1 == CX^H*MM*CP
01719           teuchosret = tmp2.multiply(Teuchos::CONJ_TRANS,Teuchos::NO_TRANS,ONE,*CX,tmp1,ZERO);
01720           TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,"Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply");
01721           // subtrace tmp2 == CX^H * MM * CP
01722           tmp = tmp2.normFrobenius();          
01723           os << " >> Error in CX^H MM CP == 0 : " << tmp << std::endl;
01724 
01725           os << std::endl;
01726           om_->print(Debug,os.str());
01727         }
01728       }
01729       else {
01730         //     [S11 ... ...]
01731         // S = [S21 ... ...]
01732         //     [S31 ... ...]
01733         //
01734         // CX = [S11]
01735         //      [S21]
01736         //      [S31]   ->  X = [X H P] CX
01737         //      
01738         // CP = [S21]   ->  P =   [H P] CP
01739         //      [S31]
01740         //
01741         CX = Teuchos::rcp( new Teuchos::SerialDenseMatrix<int,ScalarType>(Teuchos::Copy,lclS,nevLocal_         ,oneBlock,0       ,0) );
01742         CP = Teuchos::rcp( new Teuchos::SerialDenseMatrix<int,ScalarType>(Teuchos::Copy,lclS,nevLocal_-oneBlock,oneBlock,oneBlock,0) );
01743       }
01744 
01745       //
01746       //----------------------------------------
01747       // Compute new X and new P
01748       //----------------------------------------
01749       // Note: Use R as a temporary work space and (if full ortho) tmpMV as well
01750       {
01751         Teuchos::TimeMonitor lcltimer( *timerLocalUpdate_ );
01752 
01753         // if full ortho, then CX and CP are dense
01754         // we multiply [X H P]*CX into tmpMV
01755         //             [X H P]*CP into R
01756         // then put V(:,firstblock) <- tmpMV
01757         //          V(:,thirdblock) <- R
01758         //
01759         // if no full ortho, then [H P]*CP doesn't reference first block (X) 
01760         // of V, so that we can modify it before computing P
01761         // so we multiply [X H P]*CX into R
01762         //                V(:,firstblock) <- R
01763         //       multiply [H P]*CP into R
01764         //                V(:,thirdblock) <- R
01765         //
01766         // mutatis mutandis for K[XP] and M[XP]
01767         //
01768         // use SetBlock to do the assignments into V_
01769         //
01770         // in either case, views are only allowed to be overlapping
01771         // if they are const, and it should be assume that SetBlock
01772         // creates a view of the associated part
01773         //
01774         // we have from above const-pointers to [KM]XHP, [KM]HP and (if hasP) [KM]P
01775         //
01776         if (fullOrtho_) {
01777           // X,P
01778           MVT::MvTimesMatAddMv(ONE,*cXHP,*CX,ZERO,*tmpmvec_);
01779           MVT::MvTimesMatAddMv(ONE,*cXHP,*CP,ZERO,*R_);
01780           cXHP = Teuchos::null;
01781           MVT::SetBlock(*tmpmvec_,indblock1,*V_);
01782           MVT::SetBlock(*R_      ,indblock3,*V_);
01783           // KX,KP
01784           MVT::MvTimesMatAddMv(ONE,*cK_XHP,*CX,ZERO,*tmpmvec_);
01785           MVT::MvTimesMatAddMv(ONE,*cK_XHP,*CP,ZERO,*R_);
01786           cK_XHP = Teuchos::null;
01787           MVT::SetBlock(*tmpmvec_,indblock1,*KV_);
01788           MVT::SetBlock(*R_      ,indblock3,*KV_);
01789           // MX,MP
01790           if (hasM_) {
01791             MVT::MvTimesMatAddMv(ONE,*cM_XHP,*CX,ZERO,*tmpmvec_);
01792             MVT::MvTimesMatAddMv(ONE,*cM_XHP,*CP,ZERO,*R_);
01793             cM_XHP = Teuchos::null;
01794             MVT::SetBlock(*tmpmvec_,indblock1,*MV_);
01795             MVT::SetBlock(*R_      ,indblock3,*MV_);
01796           }
01797           else {
01798             cM_XHP = Teuchos::null;
01799           }
01800         }
01801         else {
01802           // X,P
01803           MVT::MvTimesMatAddMv(ONE,*cXHP,*CX,ZERO,*R_);
01804           cXHP = Teuchos::null;
01805           MVT::SetBlock(*R_,indblock1,*V_);
01806           MVT::MvTimesMatAddMv(ONE,*cHP,*CP,ZERO,*R_);
01807           cHP = Teuchos::null;
01808           MVT::SetBlock(*R_,indblock3,*V_);
01809           // KX,KP
01810           MVT::MvTimesMatAddMv(ONE,*cK_XHP,*CX,ZERO,*R_);
01811           cK_XHP = Teuchos::null;
01812           MVT::SetBlock(*R_,indblock1,*KV_);
01813           MVT::MvTimesMatAddMv(ONE,*cK_HP,*CP,ZERO,*R_);
01814           cK_HP = Teuchos::null;
01815           MVT::SetBlock(*R_,indblock3,*KV_);
01816           // MX,MP
01817           if (hasM_) {
01818             MVT::MvTimesMatAddMv(ONE,*cM_XHP,*CX,ZERO,*R_);
01819             cM_XHP = Teuchos::null;
01820             MVT::SetBlock(*R_,indblock1,*MV_);
01821             MVT::MvTimesMatAddMv(ONE,*cM_HP,*CP,ZERO,*R_);
01822             cM_HP = Teuchos::null;
01823             MVT::SetBlock(*R_,indblock3,*MV_);
01824           }
01825           else {
01826             cM_XHP = Teuchos::null;
01827             cM_HP = Teuchos::null;
01828           }
01829         }
01830       } // end timing block
01831       // done with coefficient matrices
01832       CX = Teuchos::null;
01833       CP = Teuchos::null;
01834 
01835       //
01836       // we now have a P direction
01837       hasP_ = true;
01838 
01839       // debugging check: all of our const views should have been cleared by now
01840       // if not, we have a logic error above
01841       TEST_FOR_EXCEPTION(   cXHP != Teuchos::null || cK_XHP != Teuchos::null || cM_XHP != Teuchos::null
01842                           || cHP != Teuchos::null ||  cK_HP != Teuchos::null || cM_HP  != Teuchos::null
01843                           ||  cP != Teuchos::null ||   cK_P != Teuchos::null || cM_P   != Teuchos::null,
01844                           std::logic_error,
01845                           "Anasazi::BlockKrylovSchur::iterate(): const views were not all cleared! Something went wrong!" );
01846 
01847       //
01848       // recreate our const MV views of X,H,P and friends
01849       setupViews();
01850 
01851       //
01852       // Compute the new residuals, explicitly
01853       {
01854         Teuchos::TimeMonitor lcltimer( *timerCompRes_ );
01855         MVT::MvAddMv( ONE, *KX_, ZERO, *KX_, *R_ );
01856         Teuchos::SerialDenseMatrix<int,ScalarType> T( blockSize_, blockSize_ );
01857         for (int i = 0; i < blockSize_; i++) {
01858           T(i,i) = theta_[i];
01859         }
01860         MVT::MvTimesMatAddMv( -ONE, *MX_, T, ONE, *R_ );
01861       }
01862 
01863       // R has been updated; mark the norms as out-of-date
01864       Rnorms_current_ = false;
01865       R2norms_current_ = false;
01866 
01867       // When required, monitor some orthogonalities
01868       if (om_->isVerbosity( Debug ) ) {
01869         // Check almost everything here
01870         CheckList chk;
01871         chk.checkX = true;
01872         chk.checkKX = true;
01873         chk.checkMX = true;
01874         chk.checkP = true;
01875         chk.checkKP = true;
01876         chk.checkMP = true;
01877         chk.checkR = true;
01878         om_->print( Debug, accuracyCheck(chk, ": after local update") );
01879       }
01880       else if (om_->isVerbosity( OrthoDetails )) {
01881         CheckList chk;
01882         chk.checkX = true;
01883         chk.checkP = true;
01884         chk.checkR = true;
01885         om_->print( OrthoDetails, accuracyCheck(chk, ": after local update") );
01886       }
01887     } // end while (statusTest == false)
01888   }
01889 
01890 
01892   // compute/return residual M-norms
01893   template <class ScalarType, class MV, class OP>
01894   std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> 
01895   LOBPCG<ScalarType,MV,OP>::getResNorms() {
01896     if (Rnorms_current_ == false) {
01897       // Update the residual norms
01898       orthman_->norm(*R_,Rnorms_);
01899       Rnorms_current_ = true;
01900     }
01901     return Rnorms_;
01902   }
01903 
01904   
01906   // compute/return residual 2-norms
01907   template <class ScalarType, class MV, class OP>
01908   std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> 
01909   LOBPCG<ScalarType,MV,OP>::getRes2Norms() {
01910     if (R2norms_current_ == false) {
01911       // Update the residual 2-norms 
01912       MVT::MvNorm(*R_,R2norms_);
01913       R2norms_current_ = true;
01914     }
01915     return R2norms_;
01916   }
01917 
01918 
01919 
01920 
01922   // Check accuracy, orthogonality, and other debugging stuff
01923   // 
01924   // bools specify which tests we want to run (instead of running more than we actually care about)
01925   //
01926   // we don't bother checking the following because they are computed explicitly:
01927   //    H == Prec*R
01928   //   KH == K*H
01929   //
01930   // 
01931   // checkX : X orthonormal
01932   //          orthogonal to auxvecs
01933   // checkMX: check MX == M*X
01934   // checkKX: check KX == K*X
01935   // checkP : if fullortho P orthonormal and orthogonal to X
01936   //          orthogonal to auxvecs
01937   // checkMP: check MP == M*P
01938   // checkKP: check KP == K*P
01939   // checkH : if fullortho H orthonormal and orthogonal to X and P
01940   //          orthogonal to auxvecs
01941   // checkMH: check MH == M*H
01942   // checkR : check R orthogonal to X
01943   // checkQ : check that auxiliary vectors are actually orthonormal
01944   //
01945   // TODO: 
01946   //  add checkTheta 
01947   //
01948   template <class ScalarType, class MV, class OP>
01949   std::string LOBPCG<ScalarType,MV,OP>::accuracyCheck( const CheckList &chk, const std::string &where ) const 
01950   {
01951     using std::endl;
01952 
01953     std::stringstream os;
01954     os.precision(2);
01955     os.setf(std::ios::scientific, std::ios::floatfield);
01956     MagnitudeType tmp;
01957 
01958     os << " Debugging checks: iteration " << iter_ << where << endl;
01959 
01960     // X and friends
01961     if (chk.checkX && initialized_) {
01962       tmp = orthman_->orthonormError(*X_);
01963       os << " >> Error in X^H M X == I : " << tmp << endl;
01964       for (unsigned int i=0; i<auxVecs_.size(); i++) {
01965         tmp = orthman_->orthogError(*X_,*auxVecs_[i]);
01966         os << " >> Error in X^H M Q[" << i << "] == 0 : " << tmp << endl;
01967       }
01968     }
01969     if (chk.checkMX && hasM_ && initialized_) {
01970       tmp = Utils::errorEquality(*X_, *MX_, MOp_);
01971       os << " >> Error in MX == M*X    : " << tmp << endl;
01972     }
01973     if (chk.checkKX && initialized_) {
01974       tmp = Utils::errorEquality(*X_, *KX_, Op_);
01975       os << " >> Error in KX == K*X    : " << tmp << endl;
01976     }
01977 
01978     // P and friends
01979     if (chk.checkP && hasP_ && initialized_) {
01980       if (fullOrtho_) {
01981         tmp = orthman_->orthonormError(*P_);
01982         os << " >> Error in P^H M P == I : " << tmp << endl;
01983         tmp = orthman_->orthogError(*P_,*X_);
01984         os << " >> Error in P^H M X == 0 : " << tmp << endl;
01985       }
01986       for (unsigned int i=0; i<auxVecs_.size(); i++) {
01987         tmp = orthman_->orthogError(*P_,*auxVecs_[i]);
01988         os << " >> Error in P^H M Q[" << i << "] == 0 : " << tmp << endl;
01989       }
01990     }
01991     if (chk.checkMP && hasM_ && hasP_ && initialized_) {
01992       tmp = Utils::errorEquality(*P_, *MP_, MOp_);
01993       os << " >> Error in MP == M*P    : " << tmp << endl;
01994     }
01995     if (chk.checkKP && hasP_ && initialized_) {
01996       tmp = Utils::errorEquality(*P_, *KP_, Op_);
01997       os << " >> Error in KP == K*P    : " << tmp << endl;
01998     }
01999 
02000     // H and friends
02001     if (chk.checkH && initialized_) {
02002       if (fullOrtho_) {
02003         tmp = orthman_->orthonormError(*H_);
02004         os << " >> Error in H^H M H == I : " << tmp << endl;
02005         tmp = orthman_->orthogError(*H_,*X_);
02006         os << " >> Error in H^H M X == 0 : " << tmp << endl;
02007         if (hasP_) {
02008           tmp = orthman_->orthogError(*H_,*P_);
02009           os << " >> Error in H^H M P == 0 : " << tmp << endl;
02010         }
02011       }
02012       for (unsigned int i=0; i<auxVecs_.size(); i++) {
02013         tmp = orthman_->orthogError(*H_,*auxVecs_[i]);
02014         os << " >> Error in H^H M Q[" << i << "] == 0 : " << tmp << endl;
02015       }
02016     }
02017     if (chk.checkMH && hasM_ && initialized_) {
02018       tmp = Utils::errorEquality(*H_, *MH_, MOp_);
02019       os << " >> Error in MH == M*H    : " << tmp << endl;
02020     }
02021 
02022     // R: this is not M-orthogonality, but standard euclidean orthogonality
02023     if (chk.checkR && initialized_) {
02024       Teuchos::SerialDenseMatrix<int,ScalarType> xTx(blockSize_,blockSize_);
02025       MVT::MvTransMv(ONE,*X_,*R_,xTx);
02026       tmp = xTx.normFrobenius();
02027       MVT::MvTransMv(ONE,*R_,*R_,xTx);
02028       double normR = xTx.normFrobenius();
02029       os << " >> RelError in X^H R == 0: " << tmp/normR << endl;
02030     }
02031 
02032     // Q
02033     if (chk.checkQ) {
02034       for (unsigned int i=0; i<auxVecs_.size(); i++) {
02035         tmp = orthman_->orthonormError(*auxVecs_[i]);
02036         os << " >> Error in Q[" << i << "]^H M Q[" << i << "] == I : " << tmp << endl;
02037         for (unsigned int j=i+1; j<auxVecs_.size(); j++) {
02038           tmp = orthman_->orthogError(*auxVecs_[i],*auxVecs_[j]);
02039           os << " >> Error in Q[" << i << "]^H M Q[" << j << "] == 0 : " << tmp << endl;
02040         }
02041       }
02042     }
02043 
02044     os << endl;
02045 
02046     return os.str();
02047   }
02048 
02049 
02051   // Print the current status of the solver
02052   template <class ScalarType, class MV, class OP>
02053   void 
02054   LOBPCG<ScalarType,MV,OP>::currentStatus(std::ostream &os) 
02055   {
02056     using std::endl;
02057 
02058     os.setf(std::ios::scientific, std::ios::floatfield);  
02059     os.precision(6);
02060     os <<endl;
02061     os <<"================================================================================" << endl;
02062     os << endl;
02063     os <<"                              LOBPCG Solver Status" << endl;
02064     os << endl;
02065     os <<"The solver is "<<(initialized_ ? "initialized." : "not initialized.") << endl;
02066     os <<"The number of iterations performed is " << iter_       << endl;
02067     os <<"The current block size is             " << blockSize_  << endl;
02068     os <<"The number of auxiliary vectors is    " << numAuxVecs_ << endl;
02069     os <<"The number of operations Op*x   is " << count_ApplyOp_   << endl;
02070     os <<"The number of operations M*x    is " << count_ApplyM_    << endl;
02071     os <<"The number of operations Prec*x is " << count_ApplyPrec_ << endl;
02072 
02073     os.setf(std::ios_base::right, std::ios_base::adjustfield);
02074 
02075     if (initialized_) {
02076       os << endl;
02077       os <<"CURRENT EIGENVALUE ESTIMATES             "<<endl;
02078       os << std::setw(20) << "Eigenvalue" 
02079          << std::setw(20) << "Residual(M)"
02080          << std::setw(20) << "Residual(2)"
02081          << endl;
02082       os <<"--------------------------------------------------------------------------------"<<endl;
02083       for (int i=0; i<blockSize_; i++) {
02084         os << std::setw(20) << theta_[i];
02085         if (Rnorms_current_) os << std::setw(20) << Rnorms_[i];
02086         else os << std::setw(20) << "not current";
02087         if (R2norms_current_) os << std::setw(20) << R2norms_[i];
02088         else os << std::setw(20) << "not current";
02089         os << endl;
02090       }
02091     }
02092     os <<"================================================================================" << endl;
02093     os << endl;
02094   }
02095 
02097   // are we initialized or not?
02098   template <class ScalarType, class MV, class OP>
02099   bool LOBPCG<ScalarType,MV,OP>::isInitialized() const { 
02100     return initialized_; 
02101   }
02102 
02103 
02105   // is P valid or not?
02106   template <class ScalarType, class MV, class OP>
02107   bool LOBPCG<ScalarType,MV,OP>::hasP() {
02108     return hasP_;
02109   }
02110   
02112   // is full orthogonalization enabled or not?
02113   template <class ScalarType, class MV, class OP>
02114   bool LOBPCG<ScalarType,MV,OP>::getFullOrtho() const { 
02115     return(fullOrtho_); 
02116   }
02117 
02118   
02120   // return the current auxilliary vectors
02121   template <class ScalarType, class MV, class OP>
02122   Teuchos::Array<Teuchos::RCP<const MV> > LOBPCG<ScalarType,MV,OP>::getAuxVecs() const {
02123     return auxVecs_;
02124   }
02125 
02127   // return the current block size
02128   template <class ScalarType, class MV, class OP>
02129   int LOBPCG<ScalarType,MV,OP>::getBlockSize() const {
02130     return(blockSize_); 
02131   }
02132 
02134   // return the current eigenproblem
02135   template <class ScalarType, class MV, class OP>
02136   const Eigenproblem<ScalarType,MV,OP>& LOBPCG<ScalarType,MV,OP>::getProblem() const { 
02137     return(*problem_); 
02138   }
02139 
02140   
02142   // return the max subspace dimension
02143   template <class ScalarType, class MV, class OP>
02144   int LOBPCG<ScalarType,MV,OP>::getMaxSubspaceDim() const {
02145     return 3*blockSize_;
02146   }
02147 
02149   // return the current subspace dimension
02150   template <class ScalarType, class MV, class OP>
02151   int LOBPCG<ScalarType,MV,OP>::getCurSubspaceDim() const {
02152     if (!initialized_) return 0;
02153     return nevLocal_;
02154   }
02155 
02156   
02158   // return the current ritz residual norms
02159   template <class ScalarType, class MV, class OP>
02160   std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> 
02161   LOBPCG<ScalarType,MV,OP>::getRitzRes2Norms() 
02162   {
02163     return this->getRes2Norms();
02164   }
02165 
02166   
02168   // return the current compression indices
02169   template <class ScalarType, class MV, class OP>
02170   std::vector<int> LOBPCG<ScalarType,MV,OP>::getRitzIndex() {
02171     std::vector<int> ret(nevLocal_,0);
02172     return ret;
02173   }
02174 
02175   
02177   // return the current ritz values
02178   template <class ScalarType, class MV, class OP>
02179   std::vector<Value<ScalarType> > LOBPCG<ScalarType,MV,OP>::getRitzValues() { 
02180     std::vector<Value<ScalarType> > ret(nevLocal_);
02181     for (int i=0; i<nevLocal_; i++) {
02182       ret[i].realpart = theta_[i];
02183       ret[i].imagpart = ZERO;
02184     }
02185     return ret;
02186   }
02187 
02189   // Set a new StatusTest for the solver.
02190   template <class ScalarType, class MV, class OP>
02191   void LOBPCG<ScalarType,MV,OP>::setStatusTest(Teuchos::RCP<StatusTest<ScalarType,MV,OP> > test) {
02192     TEST_FOR_EXCEPTION(test == Teuchos::null,std::invalid_argument,
02193         "Anasazi::LOBPCG::setStatusTest() was passed a null StatusTest.");
02194     tester_ = test;
02195   }
02196 
02198   // Get the current StatusTest used by the solver.
02199   template <class ScalarType, class MV, class OP>
02200   Teuchos::RCP<StatusTest<ScalarType,MV,OP> > LOBPCG<ScalarType,MV,OP>::getStatusTest() const {
02201     return tester_;
02202   }
02203   
02205   // return the current ritz vectors
02206   template <class ScalarType, class MV, class OP>
02207   Teuchos::RCP<const MV> LOBPCG<ScalarType,MV,OP>::getRitzVectors() {
02208     return X_;
02209   }
02210 
02211   
02213   // reset the iteration counter
02214   template <class ScalarType, class MV, class OP>
02215   void LOBPCG<ScalarType,MV,OP>::resetNumIters() {
02216     iter_=0; 
02217   }
02218 
02219   
02221   // return the number of iterations
02222   template <class ScalarType, class MV, class OP>
02223   int LOBPCG<ScalarType,MV,OP>::getNumIters() const { 
02224     return(iter_); 
02225   }
02226 
02227   
02229   // return the state
02230   template <class ScalarType, class MV, class OP>
02231   LOBPCGState<ScalarType,MV> LOBPCG<ScalarType,MV,OP>::getState() const {
02232     LOBPCGState<ScalarType,MV> state;
02233     state.V = V_;
02234     state.KV = KV_;
02235     state.X = X_;
02236     state.KX = KX_;
02237     state.P = P_;
02238     state.KP = KP_;
02239     state.H = H_;
02240     state.KH = KH_;
02241     state.R = R_;
02242     state.T = Teuchos::rcp(new std::vector<MagnitudeType>(theta_));
02243     if (hasM_) {
02244       state.MV = MV_;
02245       state.MX = MX_;
02246       state.MP = MP_;
02247       state.MH = MH_;
02248     }
02249     else {
02250       state.MX = Teuchos::null;
02251       state.MP = Teuchos::null;
02252       state.MH = Teuchos::null;
02253     }
02254     return state;
02255   }
02256 
02257 } // end Anasazi namespace
02258 
02259 #endif // ANASAZI_LOBPCG_HPP

Generated on Wed May 12 21:24:34 2010 for Anasazi by  doxygen 1.4.7