Anasazi Version of the Day
AnasaziLOBPCG.hpp
Go to the documentation of this file.
00001 // @HEADER
00002 // ***********************************************************************
00003 //
00004 //                 Anasazi: Block Eigensolvers Package
00005 //                 Copyright (2004) Sandia Corporation
00006 //
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 //
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00025 //
00026 // ***********************************************************************
00027 // @HEADER
00028 
00029 
00034 /*
00035     LOBPCG contains local storage of up to 10*blockSize_ vectors, representing 10 entities
00036       X,H,P,R
00037       KX,KH,KP  (product of K and the above)
00038       MX,MH,MP  (product of M and the above, not allocated if we don't have an M matrix)
00039     If full orthogonalization is enabled, one extra multivector of blockSize_ vectors is required to 
00040     compute the local update of X and P.
00041     
00042     A solver is bound to an eigenproblem at declaration.
00043     Other solver parameters (e.g., block size, auxiliary vectors) can be changed dynamically.
00044     
00045     The orthogonalization manager is used to project away from the auxiliary vectors.
00046     If full orthogonalization is enabled, the orthogonalization manager is also used to construct an M orthonormal basis.
00047     The orthogonalization manager is subclass of MatOrthoManager, which LOBPCG assumes to be defined by the M inner product.
00048     LOBPCG will not work correctly if the orthomanager uses a different inner product.
00049  */
00050 
00051 
00052 #ifndef ANASAZI_LOBPCG_HPP
00053 #define ANASAZI_LOBPCG_HPP
00054 
00055 #include "AnasaziTypes.hpp"
00056 
00057 #include "AnasaziEigensolver.hpp"
00058 #include "AnasaziMultiVecTraits.hpp"
00059 #include "AnasaziOperatorTraits.hpp"
00060 #include "Teuchos_ScalarTraits.hpp"
00061 
00062 #include "AnasaziMatOrthoManager.hpp"
00063 #include "AnasaziSolverUtils.hpp"
00064 
00065 #include "Teuchos_LAPACK.hpp"
00066 #include "Teuchos_BLAS.hpp"
00067 #include "Teuchos_SerialDenseMatrix.hpp"
00068 #include "Teuchos_ParameterList.hpp"
00069 #include "Teuchos_TimeMonitor.hpp"
00070 
00091 namespace Anasazi {
00092 
00094 
00095 
00100   template <class ScalarType, class MultiVector>
00101   struct LOBPCGState {
00103     Teuchos::RCP<const MultiVector> V; 
00105     Teuchos::RCP<const MultiVector> KV; 
00107     Teuchos::RCP<const MultiVector> MV;
00108 
00110     Teuchos::RCP<const MultiVector> X; 
00112     Teuchos::RCP<const MultiVector> KX; 
00114     Teuchos::RCP<const MultiVector> MX;
00115 
00117     Teuchos::RCP<const MultiVector> P; 
00119     Teuchos::RCP<const MultiVector> KP; 
00121     Teuchos::RCP<const MultiVector> MP;
00122 
00127     Teuchos::RCP<const MultiVector> H; 
00129     Teuchos::RCP<const MultiVector> KH; 
00131     Teuchos::RCP<const MultiVector> MH;
00132 
00134     Teuchos::RCP<const MultiVector> R;
00135 
00137     Teuchos::RCP<const std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> > T;
00138 
00139     LOBPCGState() : 
00140       V(Teuchos::null),KV(Teuchos::null),MV(Teuchos::null),
00141       X(Teuchos::null),KX(Teuchos::null),MX(Teuchos::null),
00142       P(Teuchos::null),KP(Teuchos::null),MP(Teuchos::null),
00143       H(Teuchos::null),KH(Teuchos::null),MH(Teuchos::null),
00144       R(Teuchos::null),T(Teuchos::null) {};
00145   };
00146 
00148 
00150 
00151 
00165   class LOBPCGRitzFailure : public AnasaziError {public:
00166     LOBPCGRitzFailure(const std::string& what_arg) : AnasaziError(what_arg)
00167     {}};
00168 
00181   class LOBPCGInitFailure : public AnasaziError {public:
00182     LOBPCGInitFailure(const std::string& what_arg) : AnasaziError(what_arg)
00183     {}};
00184 
00201   class LOBPCGOrthoFailure : public AnasaziError {public:
00202     LOBPCGOrthoFailure(const std::string& what_arg) : AnasaziError(what_arg)
00203     {}};
00204 
00206 
00207 
00208   template <class ScalarType, class MV, class OP>
00209   class LOBPCG : public Eigensolver<ScalarType,MV,OP> { 
00210   public:
00211     
00213 
00214     
00222     LOBPCG( const Teuchos::RCP<Eigenproblem<ScalarType,MV,OP> > &problem, 
00223             const Teuchos::RCP<SortManager<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> > &sorter,
00224             const Teuchos::RCP<OutputManager<ScalarType> > &printer,
00225             const Teuchos::RCP<StatusTest<ScalarType,MV,OP> > &tester,
00226             const Teuchos::RCP<MatOrthoManager<ScalarType,MV,OP> > &ortho,
00227             Teuchos::ParameterList &params 
00228           );
00229     
00231     virtual ~LOBPCG() {};
00232 
00234 
00236 
00237 
00264     void iterate();
00265 
00291     void initialize(LOBPCGState<ScalarType,MV> newstate);
00292 
00296     void initialize();
00297 
00317     bool isInitialized() const;
00318 
00329     LOBPCGState<ScalarType,MV> getState() const;
00330 
00332 
00334 
00335 
00337     int getNumIters() const;
00338 
00340     void resetNumIters();
00341 
00349     Teuchos::RCP<const MV> getRitzVectors();
00350 
00356     std::vector<Value<ScalarType> > getRitzValues();
00357 
00365     std::vector<int> getRitzIndex();
00366 
00367 
00373     std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> getResNorms();
00374 
00375 
00381     std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> getRes2Norms();
00382 
00383 
00391     std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> getRitzRes2Norms();
00392 
00393 
00402     int getCurSubspaceDim() const;
00403 
00407     int getMaxSubspaceDim() const;
00408 
00410 
00412 
00413 
00415     void setStatusTest(Teuchos::RCP<StatusTest<ScalarType,MV,OP> > test);
00416 
00418     Teuchos::RCP<StatusTest<ScalarType,MV,OP> > getStatusTest() const;
00419 
00421     const Eigenproblem<ScalarType,MV,OP>& getProblem() const;
00422 
00423 
00432     void setBlockSize(int blockSize);
00433 
00434 
00436     int getBlockSize() const;
00437 
00438 
00450     void setAuxVecs(const Teuchos::Array<Teuchos::RCP<const MV> > &auxvecs);
00451 
00453     Teuchos::Array<Teuchos::RCP<const MV> > getAuxVecs() const;
00454 
00456 
00458 
00459 
00465     void setFullOrtho(bool fullOrtho);
00466 
00468     bool getFullOrtho() const;
00469     
00471     bool hasP();
00472 
00474     
00476 
00477 
00479     void currentStatus(std::ostream &os);
00480 
00482 
00483   private:
00484     //
00485     //
00486     //
00487     void setupViews();
00488     //
00489     // Convenience typedefs
00490     //
00491     typedef SolverUtils<ScalarType,MV,OP> Utils;
00492     typedef MultiVecTraits<ScalarType,MV> MVT;
00493     typedef OperatorTraits<ScalarType,MV,OP> OPT;
00494     typedef Teuchos::ScalarTraits<ScalarType> SCT;
00495     typedef typename SCT::magnitudeType MagnitudeType;
00496     const MagnitudeType ONE;  
00497     const MagnitudeType ZERO; 
00498     const MagnitudeType NANVAL;
00499     //
00500     // Internal structs
00501     //
00502     struct CheckList {
00503       bool checkX, checkMX, checkKX;
00504       bool checkH, checkMH;
00505       bool checkP, checkMP, checkKP;
00506       bool checkR, checkQ;
00507       CheckList() : checkX(false),checkMX(false),checkKX(false),
00508                     checkH(false),checkMH(false),
00509                     checkP(false),checkMP(false),checkKP(false),
00510                     checkR(false),checkQ(false) {};
00511     };
00512     //
00513     // Internal methods
00514     //
00515     std::string accuracyCheck(const CheckList &chk, const std::string &where) const;
00516     //
00517     // Classes inputed through constructor that define the eigenproblem to be solved.
00518     //
00519     const Teuchos::RCP<Eigenproblem<ScalarType,MV,OP> >     problem_;
00520     const Teuchos::RCP<SortManager<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> > sm_;
00521     const Teuchos::RCP<OutputManager<ScalarType> >          om_;
00522     Teuchos::RCP<StatusTest<ScalarType,MV,OP> >       tester_;
00523     const Teuchos::RCP<MatOrthoManager<ScalarType,MV,OP> >  orthman_;
00524     //
00525     // Information obtained from the eigenproblem
00526     //
00527     Teuchos::RCP<const OP> Op_;
00528     Teuchos::RCP<const OP> MOp_;
00529     Teuchos::RCP<const OP> Prec_;
00530     bool hasM_;
00531     //
00532     // Internal timers
00533     //
00534     Teuchos::RCP<Teuchos::Time> timerOp_, timerMOp_, timerPrec_,
00535                                 timerSort_, 
00536                                 timerLocalProj_, timerDS_,
00537                                 timerLocalUpdate_, timerCompRes_,
00538                                 timerOrtho_, timerInit_;
00539     //
00540     // Counters
00541     //
00542     // Number of operator applications
00543     int count_ApplyOp_, count_ApplyM_, count_ApplyPrec_;
00544 
00545     //
00546     // Algorithmic parameters.
00547     //
00548     // blockSize_ is the solver block size
00549     int blockSize_;
00550     //
00551     // fullOrtho_ dictates whether the orthogonalization procedures specified by Hetmaniuk and Lehoucq should
00552     // be activated (see citations at the top of this file)
00553     bool fullOrtho_;
00554 
00555     //
00556     // Current solver state
00557     //
00558     // initialized_ specifies that the basis vectors have been initialized and the iterate() routine
00559     // is capable of running; _initialize is controlled  by the initialize() member method
00560     // For the implications of the state of initialized_, please see documentation for initialize()
00561     bool initialized_;
00562     //
00563     // nevLocal_ reflects how much of the current basis is valid (0 <= nevLocal_ <= 3*blockSize_)
00564     // this tells us how many of the values in theta_ are valid Ritz values
00565     int nevLocal_;
00566     //
00567     // hasP_ tells us whether there is valid data in P (and KP,MP)
00568     bool hasP_;
00569     //
00570     // State Multivecs
00571     // V_, KV_ MV_  and  R_ are primary pointers to allocated multivectors
00572     Teuchos::RCP<MV> V_, KV_, MV_, R_;
00573     // the rest are multivector views into V_, KV_ and MV_
00574     Teuchos::RCP<MV> X_, KX_, MX_,
00575                      H_, KH_, MH_,
00576                      P_, KP_, MP_;
00577 
00578     //
00579     // if fullOrtho_ == true, then we must produce the following on every iteration:
00580     // [newX newP] = [X H P] [CX;CP]
00581     // the structure of [CX;CP] when using full orthogonalization does not allow us to 
00582     // do this in situ, and R_ does not have enough storage for newX and newP. therefore, 
00583     // we must allocate additional storage for this.
00584     // otherwise, when not using full orthogonalization, the structure
00585     // [newX newP] = [X H P] [CX1  0 ]
00586     //                       [CX2 CP2]  allows us to work using only R as work space
00587     //                       [CX3 CP3] 
00588     Teuchos::RCP<MV> tmpmvec_;        
00589     // 
00590     // auxiliary vectors
00591     Teuchos::Array<Teuchos::RCP<const MV> > auxVecs_;
00592     int numAuxVecs_;
00593     //
00594     // Number of iterations that have been performed.
00595     int iter_;
00596     // 
00597     // Current eigenvalues, residual norms
00598     std::vector<MagnitudeType> theta_, Rnorms_, R2norms_;
00599     // 
00600     // are the residual norms current with the residual?
00601     bool Rnorms_current_, R2norms_current_;
00602 
00603   };
00604 
00605 
00606 
00607 
00609   // Constructor
00610   template <class ScalarType, class MV, class OP>
00611   LOBPCG<ScalarType,MV,OP>::LOBPCG(
00612         const Teuchos::RCP<Eigenproblem<ScalarType,MV,OP> > &problem, 
00613         const Teuchos::RCP<SortManager<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> > &sorter,
00614         const Teuchos::RCP<OutputManager<ScalarType> > &printer,
00615         const Teuchos::RCP<StatusTest<ScalarType,MV,OP> > &tester,
00616         const Teuchos::RCP<MatOrthoManager<ScalarType,MV,OP> > &ortho,
00617         Teuchos::ParameterList &params
00618         ) :
00619     ONE(Teuchos::ScalarTraits<MagnitudeType>::one()),
00620     ZERO(Teuchos::ScalarTraits<MagnitudeType>::zero()),
00621     NANVAL(Teuchos::ScalarTraits<MagnitudeType>::nan()),
00622     // problem, tools
00623     problem_(problem), 
00624     sm_(sorter),
00625     om_(printer),
00626     tester_(tester),
00627     orthman_(ortho),
00628     // timers, counters
00629 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
00630     timerOp_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Operation Op*x")),
00631     timerMOp_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Operation M*x")),
00632     timerPrec_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Operation Prec*x")),
00633     timerSort_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Sorting eigenvalues")),
00634     timerLocalProj_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Local projection")),
00635     timerDS_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Direct solve")),
00636     timerLocalUpdate_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Local update")),
00637     timerCompRes_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Computing residuals")),
00638     timerOrtho_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Orthogonalization")),
00639     timerInit_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Initialization")),
00640 #endif
00641     count_ApplyOp_(0),
00642     count_ApplyM_(0),
00643     count_ApplyPrec_(0),
00644     // internal data
00645     blockSize_(0),
00646     fullOrtho_(params.get("Full Ortho", true)),
00647     initialized_(false),
00648     nevLocal_(0),
00649     hasP_(false),
00650     auxVecs_( Teuchos::Array<Teuchos::RCP<const MV> >(0) ), 
00651     numAuxVecs_(0),
00652     iter_(0),
00653     Rnorms_current_(false),
00654     R2norms_current_(false)
00655   {     
00656     TEUCHOS_TEST_FOR_EXCEPTION(problem_ == Teuchos::null,std::invalid_argument,
00657                        "Anasazi::LOBPCG::constructor: user passed null problem pointer.");
00658     TEUCHOS_TEST_FOR_EXCEPTION(sm_ == Teuchos::null,std::invalid_argument,
00659                        "Anasazi::LOBPCG::constructor: user passed null sort manager pointer.");
00660     TEUCHOS_TEST_FOR_EXCEPTION(om_ == Teuchos::null,std::invalid_argument,
00661                        "Anasazi::LOBPCG::constructor: user passed null output manager pointer.");
00662     TEUCHOS_TEST_FOR_EXCEPTION(tester_ == Teuchos::null,std::invalid_argument,
00663                        "Anasazi::LOBPCG::constructor: user passed null status test pointer.");
00664     TEUCHOS_TEST_FOR_EXCEPTION(orthman_ == Teuchos::null,std::invalid_argument,
00665                        "Anasazi::LOBPCG::constructor: user passed null orthogonalization manager pointer.");
00666     TEUCHOS_TEST_FOR_EXCEPTION(problem_->isProblemSet() == false, std::invalid_argument,
00667                        "Anasazi::LOBPCG::constructor: problem is not set.");
00668     TEUCHOS_TEST_FOR_EXCEPTION(problem_->isHermitian() == false, std::invalid_argument,
00669                        "Anasazi::LOBPCG::constructor: problem is not Hermitian; LOBPCG requires Hermitian problem.");
00670 
00671     // get the problem operators
00672     Op_   = problem_->getOperator();
00673     TEUCHOS_TEST_FOR_EXCEPTION(Op_ == Teuchos::null, std::invalid_argument,
00674                        "Anasazi::LOBPCG::constructor: problem provides no operator.");
00675     MOp_  = problem_->getM();
00676     Prec_ = problem_->getPrec();
00677     hasM_ = (MOp_ != Teuchos::null);
00678 
00679     // set the block size and allocate data
00680     int bs = params.get("Block Size", problem_->getNEV());
00681     setBlockSize(bs);
00682   }
00683 
00684 
00686   // Set the block size and make necessary adjustments.
00687   template <class ScalarType, class MV, class OP>
00688   void LOBPCG<ScalarType,MV,OP>::setBlockSize (int newBS) 
00689   {
00690     // time spent here counts towards timerInit_
00691 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
00692     Teuchos::TimeMonitor lcltimer( *timerInit_ );
00693 #endif
00694 
00695     // This routine only allocates space; it doesn't not perform any computation
00696     // if size is decreased, take the first newBS vectors of all and leave state as is
00697     // otherwise, grow/allocate space and set solver to unitialized
00698 
00699     Teuchos::RCP<const MV> tmp;
00700     // grab some Multivector to Clone
00701     // in practice, getInitVec() should always provide this, but it is possible to use a 
00702     // Eigenproblem with nothing in getInitVec() by manually initializing with initialize(); 
00703     // in case of that strange scenario, we will try to Clone from R_ because it is smaller 
00704     // than V_, and we don't want to keep V_ around longer than necessary
00705     if (blockSize_ > 0) {
00706       tmp = R_;
00707     }
00708     else {
00709       tmp = problem_->getInitVec();
00710       TEUCHOS_TEST_FOR_EXCEPTION(tmp == Teuchos::null,std::logic_error,
00711                          "Anasazi::LOBPCG::setBlockSize(): eigenproblem did not specify initial vectors to clone from.");
00712     }
00713     
00714     TEUCHOS_TEST_FOR_EXCEPTION(newBS <= 0 || newBS > MVT::GetVecLength(*tmp), std::invalid_argument, "Anasazi::LOBPCG::setBlockSize(): block size must be strictly positive.");
00715     if (newBS == blockSize_) {
00716       // do nothing
00717       return;
00718     }
00719     else if (newBS < blockSize_ && initialized_) {
00720       //
00721       // shrink vectors
00722 
00723       // release views so we can modify the bases
00724       X_ = Teuchos::null;
00725       KX_ = Teuchos::null;
00726       MX_ = Teuchos::null;
00727       H_ = Teuchos::null;
00728       KH_ = Teuchos::null;
00729       MH_ = Teuchos::null;
00730       P_ = Teuchos::null;
00731       KP_ = Teuchos::null;
00732       MP_ = Teuchos::null;
00733 
00734       // make new indices vectors
00735       std::vector<int> newind(newBS), oldind(newBS);
00736       for (int i=0; i<newBS; i++) {
00737         newind[i] = i;
00738         oldind[i] = i;
00739       }
00740 
00741       Teuchos::RCP<MV> newV, newMV, newKV, newR;
00742       Teuchos::RCP<const MV> src;
00743       // allocate R and newV
00744       newR = MVT::Clone(*tmp,newBS);
00745       newV = MVT::Clone(*tmp,newBS*3);
00746       newKV = MVT::Clone(*tmp,newBS*3);
00747       if (hasM_) {
00748         newMV = MVT::Clone(*tmp,newBS*3);
00749       }
00750 
00751       //
00752       // if we are initialized, we want to pull the data from V_ into newV:
00753       //           bs  |  bs  | bs 
00754       // newV  = [newX | **** |newP ]
00755       // newKV = [newKX| **** |newKP]
00756       // newMV = [newMX| **** |newMP]
00757       // where 
00758       //          oldbs   |  oldbs  |   oldbs   
00759       //  V_ = [newX  *** | ******* | newP  ***]
00760       // KV_ = [newKX *** | ******* | newKP ***]
00761       // MV_ = [newMX *** | ******* | newMP ***]
00762       //
00763       // we don't care to copy the data corresponding to H
00764       // we will not copy the M data if !hasM_, because it doesn't exist
00765       //
00766 
00767       // these are shrink operations which preserve their data
00768       theta_.resize(3*newBS);
00769       Rnorms_.resize(newBS);
00770       R2norms_.resize(newBS);
00771 
00772       // copy residual vectors: oldind,newind currently contains [0,...,newBS-1]
00773       src = MVT::CloneView(*R_,newind);
00774       MVT::SetBlock(*src,newind,*newR);
00775       // free old memory and point to new memory
00776       R_ = newR;
00777 
00778       // copy in order: newX newKX newMX, then newP newKP newMP
00779       // for  X: [0,bs-1] <-- [0,bs-1] 
00780       src = MVT::CloneView(*V_,oldind);
00781       MVT::SetBlock(*src,newind,*newV);
00782       src = MVT::CloneView(*KV_,oldind);
00783       MVT::SetBlock(*src,newind,*newKV);
00784       if (hasM_) {
00785         src = MVT::CloneView(*MV_,oldind);
00786         MVT::SetBlock(*src,newind,*newMV);
00787       }
00788       // for  P: [2*bs, 3*bs-1] <-- [2*oldbs, 2*oldbs+bs-1] 
00789       for (int i=0; i<newBS; i++) {
00790         newind[i] += 2*newBS;
00791         oldind[i] += 2*blockSize_;
00792       }
00793       src = MVT::CloneView(*V_,oldind);
00794       MVT::SetBlock(*src,newind,*newV);
00795       src = MVT::CloneView(*KV_,oldind);
00796       MVT::SetBlock(*src,newind,*newKV);
00797       if (hasM_) {
00798         src = MVT::CloneView(*MV_,oldind);
00799         MVT::SetBlock(*src,newind,*newMV);
00800       }
00801 
00802       // release temp view
00803       src = Teuchos::null;
00804 
00805       // release old allocations and point at new memory
00806       V_ = newV;
00807       KV_ = newKV;
00808       if (hasM_) {
00809         MV_ = newMV;
00810       }
00811       else {
00812         MV_ = V_;
00813       }
00814     }
00815     else {  
00816       // newBS > blockSize_  or  not initialized
00817       // this is also the scenario for our initial call to setBlockSize(), in the constructor
00818       initialized_ = false;
00819       hasP_ = false;
00820 
00821       // release views
00822       X_ = Teuchos::null;
00823       KX_ = Teuchos::null;
00824       MX_ = Teuchos::null;
00825       H_ = Teuchos::null;
00826       KH_ = Teuchos::null;
00827       MH_ = Teuchos::null;
00828       P_ = Teuchos::null;
00829       KP_ = Teuchos::null;
00830       MP_ = Teuchos::null;
00831 
00832       // free allocated storage
00833       R_ = Teuchos::null;
00834       V_ = Teuchos::null;
00835 
00836       // allocate scalar vectors
00837       theta_.resize(3*newBS,NANVAL);
00838       Rnorms_.resize(newBS,NANVAL);
00839       R2norms_.resize(newBS,NANVAL);
00840       
00841       // clone multivectors off of tmp
00842       R_ = MVT::Clone(*tmp,newBS);
00843       V_ = MVT::Clone(*tmp,newBS*3);
00844       KV_ = MVT::Clone(*tmp,newBS*3);
00845       if (hasM_) {
00846         MV_ = MVT::Clone(*tmp,newBS*3);
00847       }
00848       else {
00849         MV_ = V_;
00850       }
00851     }
00852 
00853     // allocate tmp space
00854     tmpmvec_ = Teuchos::null;
00855     if (fullOrtho_) {
00856       tmpmvec_ = MVT::Clone(*tmp,newBS);
00857     }
00858 
00859     // set new block size
00860     blockSize_ = newBS;
00861 
00862     // setup new views
00863     setupViews();
00864   }
00865 
00866 
00868   // Setup views into V,KV,MV
00869   template <class ScalarType, class MV, class OP>
00870   void LOBPCG<ScalarType,MV,OP>::setupViews() 
00871   {
00872     std::vector<int> ind(blockSize_);
00873 
00874     for (int i=0; i<blockSize_; i++) {
00875       ind[i] = i;
00876     }
00877     X_  = MVT::CloneViewNonConst(*V_,ind);
00878     KX_ = MVT::CloneViewNonConst(*KV_,ind);
00879     if (hasM_) {
00880       MX_ = MVT::CloneViewNonConst(*MV_,ind);
00881     }
00882     else {
00883       MX_ = X_;
00884     }
00885 
00886     for (int i=0; i<blockSize_; i++) {
00887       ind[i] += blockSize_;
00888     }
00889     H_  = MVT::CloneViewNonConst(*V_,ind);
00890     KH_ = MVT::CloneViewNonConst(*KV_,ind);
00891     if (hasM_) {
00892       MH_ = MVT::CloneViewNonConst(*MV_,ind);
00893     }
00894     else {
00895       MH_ = H_;
00896     }
00897 
00898     for (int i=0; i<blockSize_; i++) {
00899       ind[i] += blockSize_;
00900     }
00901     P_  = MVT::CloneViewNonConst(*V_,ind);
00902     KP_ = MVT::CloneViewNonConst(*KV_,ind);
00903     if (hasM_) {
00904       MP_ = MVT::CloneViewNonConst(*MV_,ind);
00905     }
00906     else {
00907       MP_ = P_;
00908     }
00909   }
00910 
00911 
00913   // Set the auxiliary vectors
00914   template <class ScalarType, class MV, class OP>
00915   void LOBPCG<ScalarType,MV,OP>::setAuxVecs(const Teuchos::Array<Teuchos::RCP<const MV> > &auxvecs) {
00916     typedef typename Teuchos::Array<Teuchos::RCP<const MV> >::iterator tarcpmv;
00917 
00918     // set new auxiliary vectors
00919     auxVecs_ = auxvecs;
00920     
00921     numAuxVecs_ = 0;
00922     for (tarcpmv i=auxVecs_.begin(); i != auxVecs_.end(); i++) {
00923       numAuxVecs_ += MVT::GetNumberVecs(**i);
00924     }
00925     
00926     // If the solver has been initialized, X and P are not necessarily orthogonal to new auxiliary vectors
00927     if (numAuxVecs_ > 0 && initialized_) {
00928       initialized_ = false;
00929       hasP_ = false;
00930     }
00931 
00932     if (om_->isVerbosity( Debug ) ) {
00933       // Check almost everything here
00934       CheckList chk;
00935       chk.checkQ = true;
00936       om_->print( Debug, accuracyCheck(chk, ": in setAuxVecs()") );
00937     }
00938   }
00939 
00940 
00942   /* Initialize the state of the solver
00943    * 
00944    * POST-CONDITIONS:
00945    *
00946    * initialized_ == true
00947    * X is orthonormal, orthogonal to auxVecs_
00948    * KX = Op*X
00949    * MX = M*X if hasM_
00950    * theta_ contains Ritz values of X
00951    * R = KX - MX*diag(theta_)
00952    * if hasP() == true,
00953    *   P orthogonal to auxVecs_
00954    *   if fullOrtho_ == true,
00955    *     P orthonormal and orthogonal to X
00956    *   KP = Op*P
00957    *   MP = M*P
00958    */
00959   template <class ScalarType, class MV, class OP>
00960   void LOBPCG<ScalarType,MV,OP>::initialize(LOBPCGState<ScalarType,MV> newstate)
00961   {
00962     // NOTE: memory has been allocated by setBlockSize(). Use SetBlock below; do not Clone
00963     // NOTE: Overall time spent in this routine is counted to timerInit_; portions will also be counted towards other primitives
00964 
00965 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
00966     Teuchos::TimeMonitor inittimer( *timerInit_ );
00967 #endif
00968 
00969     std::vector<int> bsind(blockSize_);
00970     for (int i=0; i<blockSize_; i++) bsind[i] = i;
00971 
00972     // in LOBPCG, X (the subspace iterate) is primary
00973     // the order of dependence follows like so.
00974     // --init->                 X
00975     //    --op apply->          MX,KX
00976     //       --ritz analysis->  theta
00977     //          --optional->    P,MP,KP
00978     // 
00979     // if the user specifies all data for a level, we will accept it.
00980     // otherwise, we will generate the whole level, and all subsequent levels.
00981     //
00982     // the data members are ordered based on dependence, and the levels are
00983     // partitioned according to the amount of work required to produce the
00984     // items in a level.
00985     //
00986     // inconsitent multivectors widths and lengths will not be tolerated, and
00987     // will be treated with exceptions.
00988 
00989     // set up X, KX, MX: get them from "state" if user specified them
00990 
00991     //----------------------------------------
00992     // set up X, MX, KX
00993     //----------------------------------------
00994     if (newstate.X != Teuchos::null) {
00995       TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetVecLength(*newstate.X) != MVT::GetVecLength(*X_),
00996                           std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.X not correct." );
00997       // newstate.X must have blockSize_ vectors; any more will be ignored
00998       TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.X) < blockSize_,
00999                           std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.X must have at least block size vectors.");
01000 
01001       // put X data in X_
01002       MVT::SetBlock(*newstate.X,bsind,*X_);
01003 
01004       // put MX data in MX_
01005       if (hasM_) {
01006         if (newstate.MX != Teuchos::null) {
01007           TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetVecLength(*newstate.MX) != MVT::GetVecLength(*MX_),
01008                               std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.MX not correct." );
01009           // newstate.MX must have blockSize_ vectors; any more will be ignored
01010           TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.MX) < blockSize_,
01011                               std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.MX must have at least block size vectors.");
01012           MVT::SetBlock(*newstate.MX,bsind,*MX_);
01013         }
01014         else {
01015           // user didn't specify MX, compute it
01016           {
01017 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
01018             Teuchos::TimeMonitor lcltimer( *timerMOp_ );
01019 #endif
01020             OPT::Apply(*MOp_,*X_,*MX_);
01021             count_ApplyM_ += blockSize_;
01022           }
01023           // we generated MX; we will generate R as well
01024           newstate.R = Teuchos::null;
01025         }
01026       }
01027   
01028       // put data in KX
01029       if (newstate.KX != Teuchos::null) {
01030         TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetVecLength(*newstate.KX) != MVT::GetVecLength(*KX_),
01031                             std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.KX not correct." );
01032         // newstate.KX must have blockSize_ vectors; any more will be ignored
01033         TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.KX) < blockSize_,
01034                             std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.KX must have at least block size vectors.");
01035         MVT::SetBlock(*newstate.KX,bsind,*KX_);
01036       }
01037       else {
01038         // user didn't specify KX, compute it
01039         {
01040 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
01041           Teuchos::TimeMonitor lcltimer( *timerOp_ );
01042 #endif
01043           OPT::Apply(*Op_,*X_,*KX_);
01044           count_ApplyOp_ += blockSize_;
01045         }
01046         // we generated KX; we will generate R as well
01047         newstate.R = Teuchos::null;
01048       }
01049     }
01050     else {
01051       // user did not specify X
01052       // we will initialize X, compute KX and MX, and compute R
01053       //
01054       // clear state so we won't use any data from it below
01055       newstate.P  = Teuchos::null;
01056       newstate.KP = Teuchos::null;
01057       newstate.MP = Teuchos::null;
01058       newstate.R  = Teuchos::null;
01059       newstate.T  = Teuchos::null;
01060 
01061       // generate a basis and projectAndNormalize
01062       Teuchos::RCP<const MV> ivec = problem_->getInitVec();
01063       TEUCHOS_TEST_FOR_EXCEPTION(ivec == Teuchos::null,std::logic_error,
01064                          "Anasazi::LOBPCG::initialize(): Eigenproblem did not specify initial vectors to clone from.");
01065 
01066       int initSize = MVT::GetNumberVecs(*ivec);
01067       if (initSize > blockSize_) {
01068         // we need only the first blockSize_ vectors from ivec; get a view of them
01069         initSize = blockSize_;
01070         std::vector<int> ind(blockSize_);
01071         for (int i=0; i<blockSize_; i++) ind[i] = i;
01072         ivec = MVT::CloneView(*ivec,ind);
01073       }
01074 
01075       // assign ivec to first part of X_ 
01076       if (initSize > 0) {
01077         std::vector<int> ind(initSize);
01078         for (int i=0; i<initSize; i++) ind[i] = i;
01079         MVT::SetBlock(*ivec,ind,*X_);
01080       }
01081       // fill the rest of X_ with random
01082       if (blockSize_ > initSize) {
01083         std::vector<int> ind(blockSize_ - initSize);
01084         for (int i=0; i<blockSize_ - initSize; i++) ind[i] = initSize + i;
01085         Teuchos::RCP<MV> rX = MVT::CloneViewNonConst(*X_,ind);
01086         MVT::MvRandom(*rX);
01087         rX = Teuchos::null;
01088       }
01089 
01090       // put data in MX
01091       if (hasM_) {
01092 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
01093         Teuchos::TimeMonitor lcltimer( *timerMOp_ );
01094 #endif
01095         OPT::Apply(*MOp_,*X_,*MX_);
01096         count_ApplyM_ += blockSize_;
01097       }
01098   
01099       // remove auxVecs from X_ and normalize it
01100       if (numAuxVecs_ > 0) {
01101 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
01102         Teuchos::TimeMonitor lcltimer( *timerOrtho_ );
01103 #endif
01104         Teuchos::Array<Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> > > dummy;
01105         int rank = orthman_->projectAndNormalizeMat(*X_,auxVecs_,dummy,Teuchos::null,MX_);
01106         TEUCHOS_TEST_FOR_EXCEPTION(rank != blockSize_, LOBPCGInitFailure,
01107                            "Anasazi::LOBPCG::initialize(): Couldn't generate initial basis of full rank.");
01108       }
01109       else {
01110 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
01111         Teuchos::TimeMonitor lcltimer( *timerOrtho_ );
01112 #endif
01113         int rank = orthman_->normalizeMat(*X_,Teuchos::null,MX_);
01114         TEUCHOS_TEST_FOR_EXCEPTION(rank != blockSize_, LOBPCGInitFailure,
01115                            "Anasazi::LOBPCG::initialize(): Couldn't generate initial basis of full rank.");
01116       }
01117 
01118       // put data in KX
01119       {
01120 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
01121         Teuchos::TimeMonitor lcltimer( *timerOp_ );
01122 #endif
01123         OPT::Apply(*Op_,*X_,*KX_);
01124         count_ApplyOp_ += blockSize_;
01125       }
01126     } // end if (newstate.X != Teuchos::null)
01127 
01128 
01129     //----------------------------------------
01130     // set up Ritz values
01131     //----------------------------------------
01132     theta_.resize(3*blockSize_,NANVAL);
01133     if (newstate.T != Teuchos::null) {
01134       TEUCHOS_TEST_FOR_EXCEPTION( (signed int)(newstate.T->size()) < blockSize_,
01135                           std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.T must contain at least block size Ritz values.");
01136       for (int i=0; i<blockSize_; i++) {
01137         theta_[i] = (*newstate.T)[i];
01138       }
01139       nevLocal_ = blockSize_;
01140     }
01141     else {
01142       // get ritz vecs/vals
01143       Teuchos::SerialDenseMatrix<int,ScalarType> KK(blockSize_,blockSize_),
01144                                                  MM(blockSize_,blockSize_),
01145                                                   S(blockSize_,blockSize_);
01146       {
01147 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
01148         Teuchos::TimeMonitor lcltimer( *timerLocalProj_ );
01149 #endif
01150         // project K
01151         MVT::MvTransMv(ONE,*X_,*KX_,KK);
01152         // project M
01153         MVT::MvTransMv(ONE,*X_,*MX_,MM);
01154         nevLocal_ = blockSize_;
01155       }
01156 
01157       // solve the projected problem
01158       {
01159 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
01160         Teuchos::TimeMonitor lcltimer( *timerDS_ );
01161 #endif
01162         Utils::directSolver(blockSize_, KK, Teuchos::rcpFromRef(MM), S, theta_, nevLocal_, 1);
01163         TEUCHOS_TEST_FOR_EXCEPTION(nevLocal_ != blockSize_,LOBPCGInitFailure,
01164                            "Anasazi::LOBPCG::initialize(): Initial Ritz analysis did not produce enough Ritz pairs to initialize algorithm.");
01165       }
01166 
01167       // We only have blockSize_ ritz pairs, ergo we do not need to select.
01168       // However, we still require them to be ordered correctly
01169       {
01170 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
01171         Teuchos::TimeMonitor lcltimer( *timerSort_ );
01172 #endif
01173 
01174         std::vector<int> order(blockSize_);
01175         // 
01176         // sort the first blockSize_ values in theta_
01177         sm_->sort(theta_, Teuchos::rcpFromRef(order), blockSize_);   // don't catch exception
01178         //
01179         // apply the same ordering to the primitive ritz vectors
01180         Utils::permuteVectors(order,S);
01181       }
01182 
01183       // update the solution, use R for storage
01184       {
01185 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
01186         Teuchos::TimeMonitor lcltimer( *timerLocalUpdate_ );
01187 #endif
01188         // X <- X*S
01189         MVT::MvAddMv( ONE, *X_, ZERO, *X_, *R_ );        
01190         MVT::MvTimesMatAddMv( ONE, *R_, S, ZERO, *X_ );
01191         // KX <- KX*S
01192         MVT::MvAddMv( ONE, *KX_, ZERO, *KX_, *R_ );        
01193         MVT::MvTimesMatAddMv( ONE, *R_, S, ZERO, *KX_ );
01194         if (hasM_) {
01195           // MX <- MX*S
01196           MVT::MvAddMv( ONE, *MX_, ZERO, *MX_, *R_ );        
01197           MVT::MvTimesMatAddMv( ONE, *R_, S, ZERO, *MX_ );
01198         }
01199       }
01200     }
01201 
01202     //----------------------------------------
01203     // compute R
01204     //----------------------------------------
01205     if (newstate.R != Teuchos::null) {
01206       TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetVecLength(*newstate.R) != MVT::GetVecLength(*R_),
01207                           std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.R not correct." );
01208       TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.R) < blockSize_,
01209                           std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.R must have blockSize number of vectors." );
01210       MVT::SetBlock(*newstate.R,bsind,*R_);
01211     }
01212     else {
01213 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
01214       Teuchos::TimeMonitor lcltimer( *timerCompRes_ );
01215 #endif
01216       // form R <- KX - MX*T
01217       MVT::MvAddMv(ZERO,*KX_,ONE,*KX_,*R_);
01218       Teuchos::SerialDenseMatrix<int,ScalarType> T(blockSize_,blockSize_);
01219       for (int i=0; i<blockSize_; i++) T(i,i) = theta_[i];
01220       MVT::MvTimesMatAddMv(-ONE,*MX_,T,ONE,*R_);
01221     }
01222 
01223     // R has been updated; mark the norms as out-of-date
01224     Rnorms_current_ = false;
01225     R2norms_current_ = false;
01226   
01227     // put data in P,KP,MP: P is not used to set theta
01228     if (newstate.P != Teuchos::null) {
01229       TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.P) < blockSize_ ,
01230                           std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.P must have blockSize number of vectors." );
01231       TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetVecLength(*newstate.P) != MVT::GetVecLength(*P_),
01232                           std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.P not correct." );
01233       hasP_ = true;
01234 
01235       // set P_
01236       MVT::SetBlock(*newstate.P,bsind,*P_);
01237 
01238       // set/compute KP_
01239       if (newstate.KP != Teuchos::null) {
01240         TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.KP) < blockSize_,
01241                             std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.KP must have blockSize number of vectors." );
01242         TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetVecLength(*newstate.KP) != MVT::GetVecLength(*KP_),
01243                             std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.KP not correct." );
01244         MVT::SetBlock(*newstate.KP,bsind,*KP_);
01245       }
01246       else {
01247 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
01248         Teuchos::TimeMonitor lcltimer( *timerOp_ );
01249 #endif
01250         OPT::Apply(*Op_,*P_,*KP_);
01251         count_ApplyOp_ += blockSize_;
01252       }
01253 
01254       // set/compute MP_
01255       if (hasM_) {
01256         if (newstate.MP != Teuchos::null) {
01257           TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.MP) < blockSize_,
01258                               std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.MP must have blockSize number of vectors." );
01259           TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetVecLength(*newstate.MP) != MVT::GetVecLength(*MP_),
01260                               std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.MP not correct." );
01261           MVT::SetBlock(*newstate.MP,bsind,*MP_);
01262         }
01263         else {
01264 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
01265           Teuchos::TimeMonitor lcltimer( *timerMOp_ );
01266 #endif
01267           OPT::Apply(*MOp_,*P_,*MP_);
01268           count_ApplyM_ += blockSize_;
01269         }
01270       }
01271     }
01272     else {
01273       hasP_ = false;
01274     }
01275 
01276     // finally, we are initialized
01277     initialized_ = true;
01278 
01279     if (om_->isVerbosity( Debug ) ) {
01280       // Check almost everything here
01281       CheckList chk;
01282       chk.checkX = true;
01283       chk.checkKX = true;
01284       chk.checkMX = true;
01285       chk.checkP = true;
01286       chk.checkKP = true;
01287       chk.checkMP = true;
01288       chk.checkR = true;
01289       chk.checkQ = true;
01290       om_->print( Debug, accuracyCheck(chk, ": after initialize()") );
01291     }
01292 
01293   }
01294 
01295   template <class ScalarType, class MV, class OP>
01296   void LOBPCG<ScalarType,MV,OP>::initialize()
01297   {
01298     LOBPCGState<ScalarType,MV> empty;
01299     initialize(empty);
01300   }
01301 
01302 
01304   // Instruct the solver to use full orthogonalization
01305   template <class ScalarType, class MV, class OP>
01306   void LOBPCG<ScalarType,MV,OP>::setFullOrtho (bool fullOrtho)
01307   {
01308     if ( fullOrtho_ == true || initialized_ == false || fullOrtho == fullOrtho_ ) {
01309       // state is already orthogonalized or solver is not initialized
01310       fullOrtho_ = fullOrtho;
01311     }
01312     else {
01313       // solver is initialized, state is not fully orthogonalized, and user has requested full orthogonalization
01314       // ergo, we must throw away data in P
01315       fullOrtho_ = true;
01316       hasP_ = false;
01317     }
01318 
01319     // the user has called setFullOrtho, so the class has been instantiated
01320     // ergo, the data has already been allocated, i.e., setBlockSize() has been called
01321     // if it is already allocated, it should be the proper size
01322     if (fullOrtho_ && tmpmvec_ == Teuchos::null) {
01323       // allocated the workspace
01324       tmpmvec_ = MVT::Clone(*X_,blockSize_);
01325     }
01326     else if (fullOrtho_==false) {
01327       // free the workspace
01328       tmpmvec_ = Teuchos::null;
01329     }
01330   }
01331 
01332 
01334   // Perform LOBPCG iterations until the StatusTest tells us to stop.
01335   template <class ScalarType, class MV, class OP>
01336   void LOBPCG<ScalarType,MV,OP>::iterate () 
01337   {
01338     //
01339     // Allocate/initialize data structures
01340     //
01341     if (initialized_ == false) {
01342       initialize();
01343     }
01344 
01345     //
01346     // Miscellaneous definitions
01347     const int oneBlock    =   blockSize_;
01348     const int twoBlocks   = 2*blockSize_;
01349     const int threeBlocks = 3*blockSize_;
01350 
01351     std::vector<int> indblock1(blockSize_), indblock2(blockSize_), indblock3(blockSize_);
01352     for (int i=0; i<blockSize_; i++) {
01353       indblock1[i] = i;
01354       indblock2[i] = i +  blockSize_;
01355       indblock3[i] = i + 2*blockSize_;
01356     }
01357 
01358     //
01359     // Define dense projected/local matrices
01360     //   KK = Local stiffness matrix               (size: 3*blockSize_ x 3*blockSize_)
01361     //   MM = Local mass matrix                    (size: 3*blockSize_ x 3*blockSize_)
01362     //    S = Local eigenvectors                   (size: 3*blockSize_ x 3*blockSize_)
01363     Teuchos::SerialDenseMatrix<int,ScalarType> KK( threeBlocks, threeBlocks ), 
01364                                                MM( threeBlocks, threeBlocks ),
01365                                                 S( threeBlocks, threeBlocks );
01366 
01367     while (tester_->checkStatus(this) != Passed) {
01368 
01369       // Print information on current status
01370       if (om_->isVerbosity(Debug)) {
01371         currentStatus( om_->stream(Debug) );
01372       }
01373       else if (om_->isVerbosity(IterationDetails)) {
01374         currentStatus( om_->stream(IterationDetails) );
01375       }
01376 
01377       // increment iteration counter
01378       iter_++;
01379 
01380       // Apply the preconditioner on the residuals: H <- Prec*R
01381       if (Prec_ != Teuchos::null) {
01382 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
01383         Teuchos::TimeMonitor lcltimer( *timerPrec_ );
01384 #endif
01385         OPT::Apply( *Prec_, *R_, *H_ );   // don't catch the exception
01386         count_ApplyPrec_ += blockSize_;
01387       }
01388       else {
01389         MVT::MvAddMv(ONE,*R_,ZERO,*R_,*H_);
01390       }
01391 
01392       // Apply the mass matrix on H
01393       if (hasM_) {
01394 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
01395         Teuchos::TimeMonitor lcltimer( *timerMOp_ );
01396 #endif
01397         OPT::Apply( *MOp_, *H_, *MH_);    // don't catch the exception
01398         count_ApplyM_ += blockSize_;
01399       }
01400 
01401       // orthogonalize H against the auxiliary vectors
01402       // optionally: orthogonalize H against X and P ([X P] is already orthonormal)
01403       Teuchos::Array<Teuchos::RCP<const MV> > Q;
01404       Q = auxVecs_;
01405       if (fullOrtho_) {
01406         // X and P are not contiguous, so there is not much point in putting them under 
01407         // a single multivector view
01408         Q.push_back(X_);
01409         if (hasP_) {
01410           Q.push_back(P_);
01411         }
01412       }
01413       {
01414 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
01415         Teuchos::TimeMonitor lcltimer( *timerOrtho_ );
01416 #endif
01417         Teuchos::Array<Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> > > dummyC = 
01418           Teuchos::tuple<Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> > >(Teuchos::null);
01419         int rank = orthman_->projectAndNormalizeMat(*H_,Q,dummyC,Teuchos::null,MH_);
01420         // our views are currently in place; it is safe to throw an exception
01421         TEUCHOS_TEST_FOR_EXCEPTION(rank != blockSize_,LOBPCGOrthoFailure,
01422                            "Anasazi::LOBPCG::iterate(): unable to compute orthonormal basis for H.");
01423       }
01424 
01425       if (om_->isVerbosity( Debug ) ) {
01426         CheckList chk;
01427         chk.checkH = true;
01428         chk.checkMH = true;
01429         om_->print( Debug, accuracyCheck(chk, ": after ortho H") );
01430       }
01431       else if (om_->isVerbosity( OrthoDetails ) ) {
01432         CheckList chk;
01433         chk.checkH = true;
01434         chk.checkMH = true;
01435         om_->print( OrthoDetails, accuracyCheck(chk,": after ortho H") );
01436       }
01437 
01438       // Apply the stiffness matrix to H
01439       {
01440 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
01441         Teuchos::TimeMonitor lcltimer( *timerOp_ );
01442 #endif
01443         OPT::Apply( *Op_, *H_, *KH_);   // don't catch the exception
01444         count_ApplyOp_ += blockSize_;
01445       }
01446 
01447       if (hasP_) {
01448         nevLocal_ = threeBlocks;
01449       }
01450       else {
01451         nevLocal_ = twoBlocks;
01452       }
01453 
01454       //
01455       // we need bases: [X H P] and [H P] (only need the latter if fullOrtho == false)
01456       // we need to perform the following operations:
01457       //    X' [KX KH KP]
01458       //    X' [MX MH MP]
01459       //    H' [KH KP]
01460       //    H' [MH MP]
01461       //    P' [KP]
01462       //    P' [MP]
01463       //    [X H P] CX
01464       //    [X H P] CP    if  fullOrtho
01465       //      [H P] CP    if !fullOrtho
01466       //
01467       // since M[X H P] is potentially the same memory as [X H P], and 
01468       // because we are not allowed to have overlapping non-const views of 
01469       // a multivector, we will now abandon our non-const views in favor of 
01470       // const views
01471       //
01472       X_ = Teuchos::null;
01473       KX_ = Teuchos::null;
01474       MX_ = Teuchos::null;
01475       H_ = Teuchos::null;
01476       KH_ = Teuchos::null;
01477       MH_ = Teuchos::null;
01478       P_ = Teuchos::null;
01479       KP_ = Teuchos::null;
01480       MP_ = Teuchos::null;
01481       Teuchos::RCP<const MV> cX, cH, cXHP, cHP, cK_XHP, cK_HP, cM_XHP, cM_HP, cP, cK_P, cM_P;
01482       {
01483         cX = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(V_),indblock1);
01484         cH = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(V_),indblock2);
01485 
01486         std::vector<int> indXHP(nevLocal_);
01487         for (int i=0; i<nevLocal_; i++) {
01488           indXHP[i] = i;
01489         }
01490         cXHP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(V_),indXHP);
01491         cK_XHP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(KV_),indXHP);
01492         if (hasM_) {
01493           cM_XHP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(MV_),indXHP);
01494         }
01495         else {
01496           cM_XHP = cXHP;
01497         }
01498 
01499         std::vector<int> indHP(nevLocal_-blockSize_);
01500         for (int i=blockSize_; i<nevLocal_; i++) {
01501           indHP[i-blockSize_] = i;
01502         }
01503         cHP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(V_),indHP);
01504         cK_HP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(KV_),indHP);
01505         if (hasM_) {
01506           cM_HP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(MV_),indHP);
01507         }
01508         else {
01509           cM_HP = cHP;
01510         }
01511 
01512         if (nevLocal_ == threeBlocks) {
01513           cP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(V_),indblock3);
01514           cK_P = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(KV_),indblock3);
01515           if (hasM_) {
01516             cM_P = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(MV_),indblock3);
01517           }
01518           else {
01519             cM_P = cP;
01520           }
01521         }
01522       }
01523 
01524       //
01525       //----------------------------------------
01526       // Form "local" mass and stiffness matrices
01527       //----------------------------------------
01528       {
01529         // We will form only the block upper triangular part of 
01530         // [X H P]' K [X H P]  and  [X H P]' M [X H P]
01531         // Get the necessary views into KK and MM:
01532         //      [--K1--]        [--M1--]
01533         // KK = [  -K2-]   MM = [  -M2-]
01534         //      [    K3]        [    M3]
01535         // 
01536         // It is okay to declare a zero-area view of a Teuchos::SerialDenseMatrix
01537         //
01538         Teuchos::SerialDenseMatrix<int,ScalarType> 
01539           K1(Teuchos::View,KK,blockSize_,nevLocal_             ,0*blockSize_,0*blockSize_),
01540           K2(Teuchos::View,KK,blockSize_,nevLocal_-1*blockSize_,1*blockSize_,1*blockSize_),
01541           K3(Teuchos::View,KK,blockSize_,nevLocal_-2*blockSize_,2*blockSize_,2*blockSize_),
01542           M1(Teuchos::View,MM,blockSize_,nevLocal_             ,0*blockSize_,0*blockSize_),
01543           M2(Teuchos::View,MM,blockSize_,nevLocal_-1*blockSize_,1*blockSize_,1*blockSize_),
01544           M3(Teuchos::View,MM,blockSize_,nevLocal_-2*blockSize_,2*blockSize_,2*blockSize_);
01545         {
01546 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
01547           Teuchos::TimeMonitor lcltimer( *timerLocalProj_ );
01548 #endif
01549           MVT::MvTransMv( ONE, *cX, *cK_XHP, K1 );
01550           MVT::MvTransMv( ONE, *cX, *cM_XHP, M1 );
01551           MVT::MvTransMv( ONE, *cH, *cK_HP , K2 );
01552           MVT::MvTransMv( ONE, *cH, *cM_HP , M2 );
01553           if (nevLocal_ == threeBlocks) {
01554             MVT::MvTransMv( ONE, *cP, *cK_P, K3 );
01555             MVT::MvTransMv( ONE, *cP, *cM_P, M3 );
01556           }
01557         }
01558       }
01559       // below, we only need bases [X H P] and [H P] and friends
01560       // furthermore, we only need [H P] and friends if fullOrtho == false
01561       // clear the others now
01562       cX   = Teuchos::null;
01563       cH   = Teuchos::null;
01564       cP   = Teuchos::null;
01565       cK_P = Teuchos::null;
01566       cM_P = Teuchos::null;
01567       if (fullOrtho_ == true) {
01568         cHP = Teuchos::null;
01569         cK_HP = Teuchos::null;
01570         cM_HP = Teuchos::null;
01571       }
01572 
01573       //
01574       //---------------------------------------------------
01575       // Perform a spectral decomposition of (KK,MM)
01576       //---------------------------------------------------
01577       //
01578       // Get pointers to relevant part of KK, MM and S for Rayleigh-Ritz analysis
01579       Teuchos::SerialDenseMatrix<int,ScalarType> lclKK(Teuchos::View,KK,nevLocal_,nevLocal_), 
01580                                                  lclMM(Teuchos::View,MM,nevLocal_,nevLocal_),
01581                                                   lclS(Teuchos::View, S,nevLocal_,nevLocal_);
01582       {
01583 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
01584         Teuchos::TimeMonitor lcltimer( *timerDS_ );
01585 #endif
01586         int localSize = nevLocal_;
01587         Utils::directSolver(localSize, lclKK, Teuchos::rcpFromRef(lclMM), lclS, theta_, nevLocal_, 0);
01588         // localSize tells directSolver() how big KK,MM are
01589         // however, directSolver() may choose to use only the principle submatrices of KK,MM 
01590         // because of loss of MM-orthogonality in the projected eigenvectors
01591         // nevLocal_ tells us how much it used, telling us the effective localSize 
01592         // (i.e., how much of KK,MM used by directSolver)
01593         // we will not tolerate any indefiniteness, and will throw an exception if it was 
01594         // detected by directSolver
01595         //
01596         if (nevLocal_ != localSize) {
01597           // before throwing the exception, and thereby leaving iterate(), setup the views again
01598           // first, clear the const views
01599           cXHP   = Teuchos::null;
01600           cK_XHP = Teuchos::null;
01601           cM_XHP = Teuchos::null;
01602           cHP    = Teuchos::null;
01603           cK_HP  = Teuchos::null;
01604           cM_HP  = Teuchos::null;
01605           setupViews();
01606         }
01607         TEUCHOS_TEST_FOR_EXCEPTION(nevLocal_ != localSize, LOBPCGRitzFailure, 
01608             "Anasazi::LOBPCG::iterate(): indefiniteness detected in projected mass matrix." );
01609       }
01610 
01611       //
01612       //---------------------------------------------------
01613       // Sort the ritz values using the sort manager
01614       //---------------------------------------------------
01615       Teuchos::LAPACK<int,ScalarType> lapack;
01616       Teuchos::BLAS<int,ScalarType> blas;
01617       {
01618 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
01619         Teuchos::TimeMonitor lcltimer( *timerSort_ );
01620 #endif
01621 
01622         std::vector<int> order(nevLocal_);
01623         // 
01624         // Sort the first nevLocal_ values in theta_
01625         sm_->sort(theta_, Teuchos::rcpFromRef(order), nevLocal_);   // don't catch exception
01626         //
01627         // Sort the primitive ritz vectors
01628         Utils::permuteVectors(order,lclS);
01629       }
01630 
01631       //
01632       //----------------------------------------
01633       // Compute coefficients for X and P under [X H P]
01634       //----------------------------------------
01635       // Before computing X,P, optionally perform orthogonalization per Hetmaniuk,Lehoucq paper
01636       // CX will be the coefficients of [X,H,P] for new X, CP for new P
01637       // The paper suggests orthogonalizing CP against CX and orthonormalizing CP, w.r.t. MM
01638       // Here, we will also orthonormalize CX.
01639       // This is accomplished using the Cholesky factorization of [CX CP]^H lclMM [CX CP]
01640       Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> > CX, CP;
01641       if (fullOrtho_) {
01642         // build orthonormal basis for (  0  ) that is MM orthogonal to ( S11 )
01643         //                             ( S21 )                          ( S21 )
01644         //                             ( S31 )                          ( S31 )
01645         // Do this using Cholesky factorization of ( S11  0  )
01646         //                                         ( S21 S21 )
01647         //                                         ( S31 S31 )
01648         //           ( S11  0  )
01649         // Build C = ( S21 S21 )
01650         //           ( S31 S31 )
01651         Teuchos::SerialDenseMatrix<int,ScalarType> C(nevLocal_,twoBlocks),
01652                                                 MMC(nevLocal_,twoBlocks),
01653                                                 CMMC(twoBlocks  ,twoBlocks);
01654 
01655         // first block of rows: ( S11 0 )
01656         for (int j=0; j<oneBlock; j++) {
01657           for (int i=0; i<oneBlock; i++) {
01658             // CX
01659             C(i,j) = lclS(i,j);
01660             // CP
01661             C(i,j+oneBlock) = ZERO;
01662           }
01663         }
01664         // second block of rows: (S21 S21)
01665         for (int j=0; j<oneBlock; j++) {
01666           for (int i=oneBlock; i<twoBlocks; i++) {
01667             // CX
01668             C(i,j)          = lclS(i,j);
01669             // CP
01670             C(i,j+oneBlock) = lclS(i,j);
01671           }
01672         }
01673         // third block of rows
01674         if (nevLocal_ == threeBlocks) {
01675           for (int j=0; j<oneBlock; j++) {
01676             for (int i=twoBlocks; i<threeBlocks; i++) {
01677               // CX
01678               C(i,j)          = lclS(i,j);
01679               // CP
01680               C(i,j+oneBlock) = lclS(i,j);
01681             }
01682           }
01683         }
01684 
01685         // compute MMC = lclMM*C
01686         {
01687           int teuchosret;
01688           teuchosret = MMC.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,ONE,lclMM,C,ZERO);
01689           TEUCHOS_TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,
01690               "Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply");
01691           // compute CMMC = C^H*MMC == C^H*lclMM*C
01692           teuchosret = CMMC.multiply(Teuchos::CONJ_TRANS,Teuchos::NO_TRANS,ONE,C,MMC,ZERO);
01693           TEUCHOS_TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,
01694               "Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply");
01695         }
01696 
01697         // compute R (cholesky) of CMMC
01698         {
01699           int info;
01700           lapack.POTRF('U',twoBlocks,CMMC.values(),CMMC.stride(),&info);
01701           // our views ARE NOT currently in place; we must reestablish them before throwing an exception
01702           if (info != 0) {
01703             // Check symmetry of H'*K*H, this might be the first place a non-Hermitian operator will cause a problem.
01704             Teuchos::SerialDenseMatrix<int,ScalarType> K22(Teuchos::View,KK,blockSize_,blockSize_,1*blockSize_,1*blockSize_);
01705             Teuchos::SerialDenseMatrix<int,ScalarType> K22_trans( K22, Teuchos::CONJ_TRANS );
01706             K22_trans -= K22;
01707             MagnitudeType symNorm = K22_trans.normOne();
01708             MagnitudeType tol = SCT::magnitude(SCT::squareroot(SCT::eps()));
01709 
01710             cXHP = Teuchos::null;
01711             cHP = Teuchos::null;
01712             cK_XHP = Teuchos::null;
01713             cK_HP = Teuchos::null;
01714             cM_XHP = Teuchos::null;
01715             cM_HP = Teuchos::null;
01716             setupViews();
01717            
01718             std::string errMsg = "Anasazi::LOBPCG::iterate(): Cholesky factorization failed during full orthogonalization.";
01719             if ( symNorm < tol )
01720             {
01721               TEUCHOS_TEST_FOR_EXCEPTION(info != 0, LOBPCGOrthoFailure, errMsg );
01722             }
01723             else
01724             {
01725               errMsg += " Check the operator A (or K), it may not be Hermitian!";
01726               TEUCHOS_TEST_FOR_EXCEPTION(info != 0, LOBPCGOrthoFailure, errMsg );
01727             }
01728           }
01729         }
01730         // compute C = C inv(R)
01731         blas.TRSM(Teuchos::RIGHT_SIDE,Teuchos::UPPER_TRI,Teuchos::NO_TRANS,Teuchos::NON_UNIT_DIAG,
01732                   nevLocal_,twoBlocks,ONE,CMMC.values(),CMMC.stride(),C.values(),C.stride());
01733         // put C(:,0:oneBlock-1) into CX
01734         CX = Teuchos::rcp( new Teuchos::SerialDenseMatrix<int,ScalarType>(Teuchos::Copy,C,nevLocal_,oneBlock,0,0) );
01735         // put C(:,oneBlock:twoBlocks-1) into CP
01736         CP = Teuchos::rcp( new Teuchos::SerialDenseMatrix<int,ScalarType>(Teuchos::Copy,C,nevLocal_,oneBlock,0,oneBlock) );
01737 
01738         // check the results
01739         if (om_->isVerbosity( Debug ) ) {
01740           Teuchos::SerialDenseMatrix<int,ScalarType> tmp1(nevLocal_,oneBlock),
01741                                                      tmp2(oneBlock,oneBlock);
01742           MagnitudeType tmp;
01743           int teuchosret;
01744           std::stringstream os;
01745           os.precision(2);
01746           os.setf(std::ios::scientific, std::ios::floatfield);
01747 
01748           os << " Checking Full Ortho: iteration " << iter_ << std::endl;
01749 
01750           // check CX^T MM CX == I
01751           // compute tmp1 = MM*CX
01752           teuchosret = tmp1.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,ONE,lclMM,*CX,ZERO);
01753           TEUCHOS_TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,
01754               "Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply");
01755           // compute tmp2 = CX^H*tmp1 == CX^H*MM*CX
01756           teuchosret = tmp2.multiply(Teuchos::CONJ_TRANS,Teuchos::NO_TRANS,ONE,*CX,tmp1,ZERO);
01757           TEUCHOS_TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,
01758               "Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply");
01759           // subtrace tmp2 - I == CX^H * MM * CX - I
01760           for (int i=0; i<oneBlock; i++) tmp2(i,i) -= ONE;
01761           tmp = tmp2.normFrobenius();          
01762           os << " >> Error in CX^H MM CX == I : " << tmp << std::endl;
01763 
01764           // check CP^T MM CP == I
01765           // compute tmp1 = MM*CP
01766           teuchosret = tmp1.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,ONE,lclMM,*CP,ZERO);
01767           TEUCHOS_TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,
01768               "Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply");
01769           // compute tmp2 = CP^H*tmp1 == CP^H*MM*CP
01770           teuchosret = tmp2.multiply(Teuchos::CONJ_TRANS,Teuchos::NO_TRANS,ONE,*CP,tmp1,ZERO);
01771           TEUCHOS_TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,
01772               "Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply");
01773           // subtrace tmp2 - I == CP^H * MM * CP - I
01774           for (int i=0; i<oneBlock; i++) tmp2(i,i) -= ONE;
01775           tmp = tmp2.normFrobenius();          
01776           os << " >> Error in CP^H MM CP == I : " << tmp << std::endl;
01777 
01778           // check CX^T MM CP == 0
01779           // compute tmp1 = MM*CP
01780           teuchosret = tmp1.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,ONE,lclMM,*CP,ZERO);
01781           TEUCHOS_TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,"Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply");
01782           // compute tmp2 = CX^H*tmp1 == CX^H*MM*CP
01783           teuchosret = tmp2.multiply(Teuchos::CONJ_TRANS,Teuchos::NO_TRANS,ONE,*CX,tmp1,ZERO);
01784           TEUCHOS_TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,"Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply");
01785           // subtrace tmp2 == CX^H * MM * CP
01786           tmp = tmp2.normFrobenius();          
01787           os << " >> Error in CX^H MM CP == 0 : " << tmp << std::endl;
01788 
01789           os << std::endl;
01790           om_->print(Debug,os.str());
01791         }
01792       }
01793       else {
01794         //     [S11 ... ...]
01795         // S = [S21 ... ...]
01796         //     [S31 ... ...]
01797         //
01798         // CX = [S11]
01799         //      [S21]
01800         //      [S31]   ->  X = [X H P] CX
01801         //      
01802         // CP = [S21]   ->  P =   [H P] CP
01803         //      [S31]
01804         //
01805         CX = Teuchos::rcp( new Teuchos::SerialDenseMatrix<int,ScalarType>(Teuchos::Copy,lclS,nevLocal_         ,oneBlock,0       ,0) );
01806         CP = Teuchos::rcp( new Teuchos::SerialDenseMatrix<int,ScalarType>(Teuchos::Copy,lclS,nevLocal_-oneBlock,oneBlock,oneBlock,0) );
01807       }
01808 
01809       //
01810       //----------------------------------------
01811       // Compute new X and new P
01812       //----------------------------------------
01813       // Note: Use R as a temporary work space and (if full ortho) tmpMV as well
01814       {
01815 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
01816         Teuchos::TimeMonitor lcltimer( *timerLocalUpdate_ );
01817 #endif
01818 
01819         // if full ortho, then CX and CP are dense
01820         // we multiply [X H P]*CX into tmpMV
01821         //             [X H P]*CP into R
01822         // then put V(:,firstblock) <- tmpMV
01823         //          V(:,thirdblock) <- R
01824         //
01825         // if no full ortho, then [H P]*CP doesn't reference first block (X) 
01826         // of V, so that we can modify it before computing P
01827         // so we multiply [X H P]*CX into R
01828         //                V(:,firstblock) <- R
01829         //       multiply [H P]*CP into R
01830         //                V(:,thirdblock) <- R
01831         //
01832         // mutatis mutandis for K[XP] and M[XP]
01833         //
01834         // use SetBlock to do the assignments into V_
01835         //
01836         // in either case, views are only allowed to be overlapping
01837         // if they are const, and it should be assume that SetBlock
01838         // creates a view of the associated part
01839         //
01840         // we have from above const-pointers to [KM]XHP, [KM]HP and (if hasP) [KM]P
01841         //
01842         if (fullOrtho_) {
01843           // X,P
01844           MVT::MvTimesMatAddMv(ONE,*cXHP,*CX,ZERO,*tmpmvec_);
01845           MVT::MvTimesMatAddMv(ONE,*cXHP,*CP,ZERO,*R_);
01846           cXHP = Teuchos::null;
01847           MVT::SetBlock(*tmpmvec_,indblock1,*V_);
01848           MVT::SetBlock(*R_      ,indblock3,*V_);
01849           // KX,KP
01850           MVT::MvTimesMatAddMv(ONE,*cK_XHP,*CX,ZERO,*tmpmvec_);
01851           MVT::MvTimesMatAddMv(ONE,*cK_XHP,*CP,ZERO,*R_);
01852           cK_XHP = Teuchos::null;
01853           MVT::SetBlock(*tmpmvec_,indblock1,*KV_);
01854           MVT::SetBlock(*R_      ,indblock3,*KV_);
01855           // MX,MP
01856           if (hasM_) {
01857             MVT::MvTimesMatAddMv(ONE,*cM_XHP,*CX,ZERO,*tmpmvec_);
01858             MVT::MvTimesMatAddMv(ONE,*cM_XHP,*CP,ZERO,*R_);
01859             cM_XHP = Teuchos::null;
01860             MVT::SetBlock(*tmpmvec_,indblock1,*MV_);
01861             MVT::SetBlock(*R_      ,indblock3,*MV_);
01862           }
01863           else {
01864             cM_XHP = Teuchos::null;
01865           }
01866         }
01867         else {
01868           // X,P
01869           MVT::MvTimesMatAddMv(ONE,*cXHP,*CX,ZERO,*R_);
01870           cXHP = Teuchos::null;
01871           MVT::SetBlock(*R_,indblock1,*V_);
01872           MVT::MvTimesMatAddMv(ONE,*cHP,*CP,ZERO,*R_);
01873           cHP = Teuchos::null;
01874           MVT::SetBlock(*R_,indblock3,*V_);
01875           // KX,KP
01876           MVT::MvTimesMatAddMv(ONE,*cK_XHP,*CX,ZERO,*R_);
01877           cK_XHP = Teuchos::null;
01878           MVT::SetBlock(*R_,indblock1,*KV_);
01879           MVT::MvTimesMatAddMv(ONE,*cK_HP,*CP,ZERO,*R_);
01880           cK_HP = Teuchos::null;
01881           MVT::SetBlock(*R_,indblock3,*KV_);
01882           // MX,MP
01883           if (hasM_) {
01884             MVT::MvTimesMatAddMv(ONE,*cM_XHP,*CX,ZERO,*R_);
01885             cM_XHP = Teuchos::null;
01886             MVT::SetBlock(*R_,indblock1,*MV_);
01887             MVT::MvTimesMatAddMv(ONE,*cM_HP,*CP,ZERO,*R_);
01888             cM_HP = Teuchos::null;
01889             MVT::SetBlock(*R_,indblock3,*MV_);
01890           }
01891           else {
01892             cM_XHP = Teuchos::null;
01893             cM_HP = Teuchos::null;
01894           }
01895         }
01896       } // end timing block
01897       // done with coefficient matrices
01898       CX = Teuchos::null;
01899       CP = Teuchos::null;
01900 
01901       //
01902       // we now have a P direction
01903       hasP_ = true;
01904 
01905       // debugging check: all of our const views should have been cleared by now
01906       // if not, we have a logic error above
01907       TEUCHOS_TEST_FOR_EXCEPTION(   cXHP != Teuchos::null || cK_XHP != Teuchos::null || cM_XHP != Teuchos::null
01908                           || cHP != Teuchos::null ||  cK_HP != Teuchos::null || cM_HP  != Teuchos::null
01909                           ||  cP != Teuchos::null ||   cK_P != Teuchos::null || cM_P   != Teuchos::null,
01910                           std::logic_error,
01911                           "Anasazi::BlockKrylovSchur::iterate(): const views were not all cleared! Something went wrong!" );
01912 
01913       //
01914       // recreate our const MV views of X,H,P and friends
01915       setupViews();
01916 
01917       //
01918       // Compute the new residuals, explicitly
01919       {
01920 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
01921         Teuchos::TimeMonitor lcltimer( *timerCompRes_ );
01922 #endif
01923         MVT::MvAddMv( ONE, *KX_, ZERO, *KX_, *R_ );
01924         Teuchos::SerialDenseMatrix<int,ScalarType> T( blockSize_, blockSize_ );
01925         for (int i = 0; i < blockSize_; i++) {
01926           T(i,i) = theta_[i];
01927         }
01928         MVT::MvTimesMatAddMv( -ONE, *MX_, T, ONE, *R_ );
01929       }
01930 
01931       // R has been updated; mark the norms as out-of-date
01932       Rnorms_current_ = false;
01933       R2norms_current_ = false;
01934 
01935       // When required, monitor some orthogonalities
01936       if (om_->isVerbosity( Debug ) ) {
01937         // Check almost everything here
01938         CheckList chk;
01939         chk.checkX = true;
01940         chk.checkKX = true;
01941         chk.checkMX = true;
01942         chk.checkP = true;
01943         chk.checkKP = true;
01944         chk.checkMP = true;
01945         chk.checkR = true;
01946         om_->print( Debug, accuracyCheck(chk, ": after local update") );
01947       }
01948       else if (om_->isVerbosity( OrthoDetails )) {
01949         CheckList chk;
01950         chk.checkX = true;
01951         chk.checkP = true;
01952         chk.checkR = true;
01953         om_->print( OrthoDetails, accuracyCheck(chk, ": after local update") );
01954       }
01955     } // end while (statusTest == false)
01956   }
01957 
01958 
01960   // compute/return residual M-norms
01961   template <class ScalarType, class MV, class OP>
01962   std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> 
01963   LOBPCG<ScalarType,MV,OP>::getResNorms() {
01964     if (Rnorms_current_ == false) {
01965       // Update the residual norms
01966       orthman_->norm(*R_,Rnorms_);
01967       Rnorms_current_ = true;
01968     }
01969     return Rnorms_;
01970   }
01971 
01972   
01974   // compute/return residual 2-norms
01975   template <class ScalarType, class MV, class OP>
01976   std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> 
01977   LOBPCG<ScalarType,MV,OP>::getRes2Norms() {
01978     if (R2norms_current_ == false) {
01979       // Update the residual 2-norms 
01980       MVT::MvNorm(*R_,R2norms_);
01981       R2norms_current_ = true;
01982     }
01983     return R2norms_;
01984   }
01985 
01986 
01987 
01988 
01990   // Check accuracy, orthogonality, and other debugging stuff
01991   // 
01992   // bools specify which tests we want to run (instead of running more than we actually care about)
01993   //
01994   // we don't bother checking the following because they are computed explicitly:
01995   //    H == Prec*R
01996   //   KH == K*H
01997   //
01998   // 
01999   // checkX : X orthonormal
02000   //          orthogonal to auxvecs
02001   // checkMX: check MX == M*X
02002   // checkKX: check KX == K*X
02003   // checkP : if fullortho P orthonormal and orthogonal to X
02004   //          orthogonal to auxvecs
02005   // checkMP: check MP == M*P
02006   // checkKP: check KP == K*P
02007   // checkH : if fullortho H orthonormal and orthogonal to X and P
02008   //          orthogonal to auxvecs
02009   // checkMH: check MH == M*H
02010   // checkR : check R orthogonal to X
02011   // checkQ : check that auxiliary vectors are actually orthonormal
02012   //
02013   // TODO: 
02014   //  add checkTheta 
02015   //
02016   template <class ScalarType, class MV, class OP>
02017   std::string LOBPCG<ScalarType,MV,OP>::accuracyCheck( const CheckList &chk, const std::string &where ) const 
02018   {
02019     using std::endl;
02020 
02021     std::stringstream os;
02022     os.precision(2);
02023     os.setf(std::ios::scientific, std::ios::floatfield);
02024     MagnitudeType tmp;
02025 
02026     os << " Debugging checks: iteration " << iter_ << where << endl;
02027 
02028     // X and friends
02029     if (chk.checkX && initialized_) {
02030       tmp = orthman_->orthonormError(*X_);
02031       os << " >> Error in X^H M X == I : " << tmp << endl;
02032       for (Array_size_type i=0; i<auxVecs_.size(); i++) {
02033         tmp = orthman_->orthogError(*X_,*auxVecs_[i]);
02034         os << " >> Error in X^H M Q[" << i << "] == 0 : " << tmp << endl;
02035       }
02036     }
02037     if (chk.checkMX && hasM_ && initialized_) {
02038       tmp = Utils::errorEquality(*X_, *MX_, MOp_);
02039       os << " >> Error in MX == M*X    : " << tmp << endl;
02040     }
02041     if (chk.checkKX && initialized_) {
02042       tmp = Utils::errorEquality(*X_, *KX_, Op_);
02043       os << " >> Error in KX == K*X    : " << tmp << endl;
02044     }
02045 
02046     // P and friends
02047     if (chk.checkP && hasP_ && initialized_) {
02048       if (fullOrtho_) {
02049         tmp = orthman_->orthonormError(*P_);
02050         os << " >> Error in P^H M P == I : " << tmp << endl;
02051         tmp = orthman_->orthogError(*P_,*X_);
02052         os << " >> Error in P^H M X == 0 : " << tmp << endl;
02053       }
02054       for (Array_size_type i=0; i<auxVecs_.size(); i++) {
02055         tmp = orthman_->orthogError(*P_,*auxVecs_[i]);
02056         os << " >> Error in P^H M Q[" << i << "] == 0 : " << tmp << endl;
02057       }
02058     }
02059     if (chk.checkMP && hasM_ && hasP_ && initialized_) {
02060       tmp = Utils::errorEquality(*P_, *MP_, MOp_);
02061       os << " >> Error in MP == M*P    : " << tmp << endl;
02062     }
02063     if (chk.checkKP && hasP_ && initialized_) {
02064       tmp = Utils::errorEquality(*P_, *KP_, Op_);
02065       os << " >> Error in KP == K*P    : " << tmp << endl;
02066     }
02067 
02068     // H and friends
02069     if (chk.checkH && initialized_) {
02070       if (fullOrtho_) {
02071         tmp = orthman_->orthonormError(*H_);
02072         os << " >> Error in H^H M H == I : " << tmp << endl;
02073         tmp = orthman_->orthogError(*H_,*X_);
02074         os << " >> Error in H^H M X == 0 : " << tmp << endl;
02075         if (hasP_) {
02076           tmp = orthman_->orthogError(*H_,*P_);
02077           os << " >> Error in H^H M P == 0 : " << tmp << endl;
02078         }
02079       }
02080       for (Array_size_type i=0; i<auxVecs_.size(); i++) {
02081         tmp = orthman_->orthogError(*H_,*auxVecs_[i]);
02082         os << " >> Error in H^H M Q[" << i << "] == 0 : " << tmp << endl;
02083       }
02084     }
02085     if (chk.checkMH && hasM_ && initialized_) {
02086       tmp = Utils::errorEquality(*H_, *MH_, MOp_);
02087       os << " >> Error in MH == M*H    : " << tmp << endl;
02088     }
02089 
02090     // R: this is not M-orthogonality, but standard euclidean orthogonality
02091     if (chk.checkR && initialized_) {
02092       Teuchos::SerialDenseMatrix<int,ScalarType> xTx(blockSize_,blockSize_);
02093       MVT::MvTransMv(ONE,*X_,*R_,xTx);
02094       tmp = xTx.normFrobenius();
02095       MVT::MvTransMv(ONE,*R_,*R_,xTx);
02096       double normR = xTx.normFrobenius();
02097       os << " >> RelError in X^H R == 0: " << tmp/normR << endl;
02098     }
02099 
02100     // Q
02101     if (chk.checkQ) {
02102       for (Array_size_type i=0; i<auxVecs_.size(); i++) {
02103         tmp = orthman_->orthonormError(*auxVecs_[i]);
02104         os << " >> Error in Q[" << i << "]^H M Q[" << i << "] == I : " << tmp << endl;
02105         for (Array_size_type j=i+1; j<auxVecs_.size(); j++) {
02106           tmp = orthman_->orthogError(*auxVecs_[i],*auxVecs_[j]);
02107           os << " >> Error in Q[" << i << "]^H M Q[" << j << "] == 0 : " << tmp << endl;
02108         }
02109       }
02110     }
02111 
02112     os << endl;
02113 
02114     return os.str();
02115   }
02116 
02117 
02119   // Print the current status of the solver
02120   template <class ScalarType, class MV, class OP>
02121   void 
02122   LOBPCG<ScalarType,MV,OP>::currentStatus(std::ostream &os) 
02123   {
02124     using std::endl;
02125 
02126     os.setf(std::ios::scientific, std::ios::floatfield);  
02127     os.precision(6);
02128     os <<endl;
02129     os <<"================================================================================" << endl;
02130     os << endl;
02131     os <<"                              LOBPCG Solver Status" << endl;
02132     os << endl;
02133     os <<"The solver is "<<(initialized_ ? "initialized." : "not initialized.") << endl;
02134     os <<"The number of iterations performed is " << iter_       << endl;
02135     os <<"The current block size is             " << blockSize_  << endl;
02136     os <<"The number of auxiliary vectors is    " << numAuxVecs_ << endl;
02137     os <<"The number of operations Op*x   is " << count_ApplyOp_   << endl;
02138     os <<"The number of operations M*x    is " << count_ApplyM_    << endl;
02139     os <<"The number of operations Prec*x is " << count_ApplyPrec_ << endl;
02140 
02141     os.setf(std::ios_base::right, std::ios_base::adjustfield);
02142 
02143     if (initialized_) {
02144       os << endl;
02145       os <<"CURRENT EIGENVALUE ESTIMATES             "<<endl;
02146       os << std::setw(20) << "Eigenvalue" 
02147          << std::setw(20) << "Residual(M)"
02148          << std::setw(20) << "Residual(2)"
02149          << endl;
02150       os <<"--------------------------------------------------------------------------------"<<endl;
02151       for (int i=0; i<blockSize_; i++) {
02152         os << std::setw(20) << theta_[i];
02153         if (Rnorms_current_) os << std::setw(20) << Rnorms_[i];
02154         else os << std::setw(20) << "not current";
02155         if (R2norms_current_) os << std::setw(20) << R2norms_[i];
02156         else os << std::setw(20) << "not current";
02157         os << endl;
02158       }
02159     }
02160     os <<"================================================================================" << endl;
02161     os << endl;
02162   }
02163 
02165   // are we initialized or not?
02166   template <class ScalarType, class MV, class OP>
02167   bool LOBPCG<ScalarType,MV,OP>::isInitialized() const { 
02168     return initialized_; 
02169   }
02170 
02171 
02173   // is P valid or not?
02174   template <class ScalarType, class MV, class OP>
02175   bool LOBPCG<ScalarType,MV,OP>::hasP() {
02176     return hasP_;
02177   }
02178   
02180   // is full orthogonalization enabled or not?
02181   template <class ScalarType, class MV, class OP>
02182   bool LOBPCG<ScalarType,MV,OP>::getFullOrtho() const { 
02183     return(fullOrtho_); 
02184   }
02185 
02186   
02188   // return the current auxilliary vectors
02189   template <class ScalarType, class MV, class OP>
02190   Teuchos::Array<Teuchos::RCP<const MV> > LOBPCG<ScalarType,MV,OP>::getAuxVecs() const {
02191     return auxVecs_;
02192   }
02193 
02195   // return the current block size
02196   template <class ScalarType, class MV, class OP>
02197   int LOBPCG<ScalarType,MV,OP>::getBlockSize() const {
02198     return(blockSize_); 
02199   }
02200 
02202   // return the current eigenproblem
02203   template <class ScalarType, class MV, class OP>
02204   const Eigenproblem<ScalarType,MV,OP>& LOBPCG<ScalarType,MV,OP>::getProblem() const { 
02205     return(*problem_); 
02206   }
02207 
02208   
02210   // return the max subspace dimension
02211   template <class ScalarType, class MV, class OP>
02212   int LOBPCG<ScalarType,MV,OP>::getMaxSubspaceDim() const {
02213     return 3*blockSize_;
02214   }
02215 
02217   // return the current subspace dimension
02218   template <class ScalarType, class MV, class OP>
02219   int LOBPCG<ScalarType,MV,OP>::getCurSubspaceDim() const {
02220     if (!initialized_) return 0;
02221     return nevLocal_;
02222   }
02223 
02224   
02226   // return the current ritz residual norms
02227   template <class ScalarType, class MV, class OP>
02228   std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> 
02229   LOBPCG<ScalarType,MV,OP>::getRitzRes2Norms() 
02230   {
02231     return this->getRes2Norms();
02232   }
02233 
02234   
02236   // return the current compression indices
02237   template <class ScalarType, class MV, class OP>
02238   std::vector<int> LOBPCG<ScalarType,MV,OP>::getRitzIndex() {
02239     std::vector<int> ret(nevLocal_,0);
02240     return ret;
02241   }
02242 
02243   
02245   // return the current ritz values
02246   template <class ScalarType, class MV, class OP>
02247   std::vector<Value<ScalarType> > LOBPCG<ScalarType,MV,OP>::getRitzValues() { 
02248     std::vector<Value<ScalarType> > ret(nevLocal_);
02249     for (int i=0; i<nevLocal_; i++) {
02250       ret[i].realpart = theta_[i];
02251       ret[i].imagpart = ZERO;
02252     }
02253     return ret;
02254   }
02255 
02257   // Set a new StatusTest for the solver.
02258   template <class ScalarType, class MV, class OP>
02259   void LOBPCG<ScalarType,MV,OP>::setStatusTest(Teuchos::RCP<StatusTest<ScalarType,MV,OP> > test) {
02260     TEUCHOS_TEST_FOR_EXCEPTION(test == Teuchos::null,std::invalid_argument,
02261         "Anasazi::LOBPCG::setStatusTest() was passed a null StatusTest.");
02262     tester_ = test;
02263   }
02264 
02266   // Get the current StatusTest used by the solver.
02267   template <class ScalarType, class MV, class OP>
02268   Teuchos::RCP<StatusTest<ScalarType,MV,OP> > LOBPCG<ScalarType,MV,OP>::getStatusTest() const {
02269     return tester_;
02270   }
02271   
02273   // return the current ritz vectors
02274   template <class ScalarType, class MV, class OP>
02275   Teuchos::RCP<const MV> LOBPCG<ScalarType,MV,OP>::getRitzVectors() {
02276     return X_;
02277   }
02278 
02279   
02281   // reset the iteration counter
02282   template <class ScalarType, class MV, class OP>
02283   void LOBPCG<ScalarType,MV,OP>::resetNumIters() {
02284     iter_=0; 
02285   }
02286 
02287   
02289   // return the number of iterations
02290   template <class ScalarType, class MV, class OP>
02291   int LOBPCG<ScalarType,MV,OP>::getNumIters() const { 
02292     return(iter_); 
02293   }
02294 
02295   
02297   // return the state
02298   template <class ScalarType, class MV, class OP>
02299   LOBPCGState<ScalarType,MV> LOBPCG<ScalarType,MV,OP>::getState() const {
02300     LOBPCGState<ScalarType,MV> state;
02301     state.V = V_;
02302     state.KV = KV_;
02303     state.X = X_;
02304     state.KX = KX_;
02305     state.P = P_;
02306     state.KP = KP_;
02307     state.H = H_;
02308     state.KH = KH_;
02309     state.R = R_;
02310     state.T = Teuchos::rcp(new std::vector<MagnitudeType>(theta_));
02311     if (hasM_) {
02312       state.MV = MV_;
02313       state.MX = MX_;
02314       state.MP = MP_;
02315       state.MH = MH_;
02316     }
02317     else {
02318       state.MX = Teuchos::null;
02319       state.MP = Teuchos::null;
02320       state.MH = Teuchos::null;
02321     }
02322     return state;
02323   }
02324 
02325 } // end Anasazi namespace
02326 
02327 #endif // ANASAZI_LOBPCG_HPP
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends