Anasazi Version of the Day
AnasaziLOBPCG.hpp
Go to the documentation of this file.
00001 // @HEADER
00002 // ***********************************************************************
00003 //
00004 //                 Anasazi: Block Eigensolvers Package
00005 //                 Copyright (2004) Sandia Corporation
00006 //
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 //
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00025 //
00026 // ***********************************************************************
00027 // @HEADER
00028 
00029 
00034 /*
00035     LOBPCG contains local storage of up to 10*blockSize_ vectors, representing 10 entities
00036       X,H,P,R
00037       KX,KH,KP  (product of K and the above)
00038       MX,MH,MP  (product of M and the above, not allocated if we don't have an M matrix)
00039     If full orthogonalization is enabled, one extra multivector of blockSize_ vectors is required to 
00040     compute the local update of X and P.
00041     
00042     A solver is bound to an eigenproblem at declaration.
00043     Other solver parameters (e.g., block size, auxiliary vectors) can be changed dynamically.
00044     
00045     The orthogonalization manager is used to project away from the auxiliary vectors.
00046     If full orthogonalization is enabled, the orthogonalization manager is also used to construct an M orthonormal basis.
00047     The orthogonalization manager is subclass of MatOrthoManager, which LOBPCG assumes to be defined by the M inner product.
00048     LOBPCG will not work correctly if the orthomanager uses a different inner product.
00049  */
00050 
00051 
00052 #ifndef ANASAZI_LOBPCG_HPP
00053 #define ANASAZI_LOBPCG_HPP
00054 
00055 #include "AnasaziTypes.hpp"
00056 
00057 #include "AnasaziEigensolver.hpp"
00058 #include "AnasaziMultiVecTraits.hpp"
00059 #include "AnasaziOperatorTraits.hpp"
00060 #include "Teuchos_ScalarTraits.hpp"
00061 
00062 #include "AnasaziMatOrthoManager.hpp"
00063 #include "AnasaziSolverUtils.hpp"
00064 
00065 #include "Teuchos_LAPACK.hpp"
00066 #include "Teuchos_BLAS.hpp"
00067 #include "Teuchos_SerialDenseMatrix.hpp"
00068 #include "Teuchos_ParameterList.hpp"
00069 #include "Teuchos_TimeMonitor.hpp"
00070 
00091 namespace Anasazi {
00092 
00094 
00095 
00100   template <class ScalarType, class MultiVector>
00101   struct LOBPCGState {
00103     Teuchos::RCP<const MultiVector> V; 
00105     Teuchos::RCP<const MultiVector> KV; 
00107     Teuchos::RCP<const MultiVector> MV;
00108 
00110     Teuchos::RCP<const MultiVector> X; 
00112     Teuchos::RCP<const MultiVector> KX; 
00114     Teuchos::RCP<const MultiVector> MX;
00115 
00117     Teuchos::RCP<const MultiVector> P; 
00119     Teuchos::RCP<const MultiVector> KP; 
00121     Teuchos::RCP<const MultiVector> MP;
00122 
00127     Teuchos::RCP<const MultiVector> H; 
00129     Teuchos::RCP<const MultiVector> KH; 
00131     Teuchos::RCP<const MultiVector> MH;
00132 
00134     Teuchos::RCP<const MultiVector> R;
00135 
00137     Teuchos::RCP<const std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> > T;
00138 
00139     LOBPCGState() : 
00140       V(Teuchos::null),KV(Teuchos::null),MV(Teuchos::null),
00141       X(Teuchos::null),KX(Teuchos::null),MX(Teuchos::null),
00142       P(Teuchos::null),KP(Teuchos::null),MP(Teuchos::null),
00143       H(Teuchos::null),KH(Teuchos::null),MH(Teuchos::null),
00144       R(Teuchos::null),T(Teuchos::null) {};
00145   };
00146 
00148 
00150 
00151 
00165   class LOBPCGRitzFailure : public AnasaziError {public:
00166     LOBPCGRitzFailure(const std::string& what_arg) : AnasaziError(what_arg)
00167     {}};
00168 
00181   class LOBPCGInitFailure : public AnasaziError {public:
00182     LOBPCGInitFailure(const std::string& what_arg) : AnasaziError(what_arg)
00183     {}};
00184 
00201   class LOBPCGOrthoFailure : public AnasaziError {public:
00202     LOBPCGOrthoFailure(const std::string& what_arg) : AnasaziError(what_arg)
00203     {}};
00204 
00206 
00207 
00208   template <class ScalarType, class MV, class OP>
00209   class LOBPCG : public Eigensolver<ScalarType,MV,OP> { 
00210   public:
00211     
00213 
00214     
00222     LOBPCG( const Teuchos::RCP<Eigenproblem<ScalarType,MV,OP> > &problem, 
00223             const Teuchos::RCP<SortManager<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> > &sorter,
00224             const Teuchos::RCP<OutputManager<ScalarType> > &printer,
00225             const Teuchos::RCP<StatusTest<ScalarType,MV,OP> > &tester,
00226             const Teuchos::RCP<MatOrthoManager<ScalarType,MV,OP> > &ortho,
00227             Teuchos::ParameterList &params 
00228           );
00229     
00231     virtual ~LOBPCG() {};
00232 
00234 
00236 
00237 
00264     void iterate();
00265 
00291     void initialize(LOBPCGState<ScalarType,MV> newstate);
00292 
00296     void initialize();
00297 
00317     bool isInitialized() const;
00318 
00329     LOBPCGState<ScalarType,MV> getState() const;
00330 
00332 
00334 
00335 
00337     int getNumIters() const;
00338 
00340     void resetNumIters();
00341 
00349     Teuchos::RCP<const MV> getRitzVectors();
00350 
00356     std::vector<Value<ScalarType> > getRitzValues();
00357 
00365     std::vector<int> getRitzIndex();
00366 
00367 
00373     std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> getResNorms();
00374 
00375 
00381     std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> getRes2Norms();
00382 
00383 
00391     std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> getRitzRes2Norms();
00392 
00393 
00402     int getCurSubspaceDim() const;
00403 
00407     int getMaxSubspaceDim() const;
00408 
00410 
00412 
00413 
00415     void setStatusTest(Teuchos::RCP<StatusTest<ScalarType,MV,OP> > test);
00416 
00418     Teuchos::RCP<StatusTest<ScalarType,MV,OP> > getStatusTest() const;
00419 
00421     const Eigenproblem<ScalarType,MV,OP>& getProblem() const;
00422 
00423 
00432     void setBlockSize(int blockSize);
00433 
00434 
00436     int getBlockSize() const;
00437 
00438 
00450     void setAuxVecs(const Teuchos::Array<Teuchos::RCP<const MV> > &auxvecs);
00451 
00453     Teuchos::Array<Teuchos::RCP<const MV> > getAuxVecs() const;
00454 
00456 
00458 
00459 
00465     void setFullOrtho(bool fullOrtho);
00466 
00468     bool getFullOrtho() const;
00469     
00471     bool hasP();
00472 
00474     
00476 
00477 
00479     void currentStatus(std::ostream &os);
00480 
00482 
00483   private:
00484     //
00485     //
00486     //
00487     void setupViews();
00488     //
00489     // Convenience typedefs
00490     //
00491     typedef SolverUtils<ScalarType,MV,OP> Utils;
00492     typedef MultiVecTraits<ScalarType,MV> MVT;
00493     typedef OperatorTraits<ScalarType,MV,OP> OPT;
00494     typedef Teuchos::ScalarTraits<ScalarType> SCT;
00495     typedef typename SCT::magnitudeType MagnitudeType;
00496     const MagnitudeType ONE;  
00497     const MagnitudeType ZERO; 
00498     const MagnitudeType NANVAL;
00499     //
00500     // Internal structs
00501     //
00502     struct CheckList {
00503       bool checkX, checkMX, checkKX;
00504       bool checkH, checkMH;
00505       bool checkP, checkMP, checkKP;
00506       bool checkR, checkQ;
00507       CheckList() : checkX(false),checkMX(false),checkKX(false),
00508                     checkH(false),checkMH(false),
00509                     checkP(false),checkMP(false),checkKP(false),
00510                     checkR(false),checkQ(false) {};
00511     };
00512     //
00513     // Internal methods
00514     //
00515     std::string accuracyCheck(const CheckList &chk, const std::string &where) const;
00516     //
00517     // Classes inputed through constructor that define the eigenproblem to be solved.
00518     //
00519     const Teuchos::RCP<Eigenproblem<ScalarType,MV,OP> >     problem_;
00520     const Teuchos::RCP<SortManager<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> > sm_;
00521     const Teuchos::RCP<OutputManager<ScalarType> >          om_;
00522     Teuchos::RCP<StatusTest<ScalarType,MV,OP> >       tester_;
00523     const Teuchos::RCP<MatOrthoManager<ScalarType,MV,OP> >  orthman_;
00524     //
00525     // Information obtained from the eigenproblem
00526     //
00527     Teuchos::RCP<const OP> Op_;
00528     Teuchos::RCP<const OP> MOp_;
00529     Teuchos::RCP<const OP> Prec_;
00530     bool hasM_;
00531     //
00532     // Internal timers
00533     //
00534     Teuchos::RCP<Teuchos::Time> timerOp_, timerMOp_, timerPrec_,
00535                                 timerSort_, 
00536                                 timerLocalProj_, timerDS_,
00537                                 timerLocalUpdate_, timerCompRes_,
00538                                 timerOrtho_, timerInit_;
00539     //
00540     // Counters
00541     //
00542     // Number of operator applications
00543     int count_ApplyOp_, count_ApplyM_, count_ApplyPrec_;
00544 
00545     //
00546     // Algorithmic parameters.
00547     //
00548     // blockSize_ is the solver block size
00549     int blockSize_;
00550     //
00551     // fullOrtho_ dictates whether the orthogonalization procedures specified by Hetmaniuk and Lehoucq should
00552     // be activated (see citations at the top of this file)
00553     bool fullOrtho_;
00554 
00555     //
00556     // Current solver state
00557     //
00558     // initialized_ specifies that the basis vectors have been initialized and the iterate() routine
00559     // is capable of running; _initialize is controlled  by the initialize() member method
00560     // For the implications of the state of initialized_, please see documentation for initialize()
00561     bool initialized_;
00562     //
00563     // nevLocal_ reflects how much of the current basis is valid (0 <= nevLocal_ <= 3*blockSize_)
00564     // this tells us how many of the values in theta_ are valid Ritz values
00565     int nevLocal_;
00566     //
00567     // hasP_ tells us whether there is valid data in P (and KP,MP)
00568     bool hasP_;
00569     //
00570     // State Multivecs
00571     // V_, KV_ MV_  and  R_ are primary pointers to allocated multivectors
00572     Teuchos::RCP<MV> V_, KV_, MV_, R_;
00573     // the rest are multivector views into V_, KV_ and MV_
00574     Teuchos::RCP<MV> X_, KX_, MX_,
00575                      H_, KH_, MH_,
00576                      P_, KP_, MP_;
00577 
00578     //
00579     // if fullOrtho_ == true, then we must produce the following on every iteration:
00580     // [newX newP] = [X H P] [CX;CP]
00581     // the structure of [CX;CP] when using full orthogonalization does not allow us to 
00582     // do this in situ, and R_ does not have enough storage for newX and newP. therefore, 
00583     // we must allocate additional storage for this.
00584     // otherwise, when not using full orthogonalization, the structure
00585     // [newX newP] = [X H P] [CX1  0 ]
00586     //                       [CX2 CP2]  allows us to work using only R as work space
00587     //                       [CX3 CP3] 
00588     Teuchos::RCP<MV> tmpmvec_;        
00589     // 
00590     // auxiliary vectors
00591     Teuchos::Array<Teuchos::RCP<const MV> > auxVecs_;
00592     int numAuxVecs_;
00593     //
00594     // Number of iterations that have been performed.
00595     int iter_;
00596     // 
00597     // Current eigenvalues, residual norms
00598     std::vector<MagnitudeType> theta_, Rnorms_, R2norms_;
00599     // 
00600     // are the residual norms current with the residual?
00601     bool Rnorms_current_, R2norms_current_;
00602 
00603   };
00604 
00605 
00606 
00607 
00609   // Constructor
00610   template <class ScalarType, class MV, class OP>
00611   LOBPCG<ScalarType,MV,OP>::LOBPCG(
00612         const Teuchos::RCP<Eigenproblem<ScalarType,MV,OP> > &problem, 
00613         const Teuchos::RCP<SortManager<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> > &sorter,
00614         const Teuchos::RCP<OutputManager<ScalarType> > &printer,
00615         const Teuchos::RCP<StatusTest<ScalarType,MV,OP> > &tester,
00616         const Teuchos::RCP<MatOrthoManager<ScalarType,MV,OP> > &ortho,
00617         Teuchos::ParameterList &params
00618         ) :
00619     ONE(Teuchos::ScalarTraits<MagnitudeType>::one()),
00620     ZERO(Teuchos::ScalarTraits<MagnitudeType>::zero()),
00621     NANVAL(Teuchos::ScalarTraits<MagnitudeType>::nan()),
00622     // problem, tools
00623     problem_(problem), 
00624     sm_(sorter),
00625     om_(printer),
00626     tester_(tester),
00627     orthman_(ortho),
00628     // timers, counters
00629 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
00630     timerOp_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Operation Op*x")),
00631     timerMOp_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Operation M*x")),
00632     timerPrec_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Operation Prec*x")),
00633     timerSort_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Sorting eigenvalues")),
00634     timerLocalProj_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Local projection")),
00635     timerDS_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Direct solve")),
00636     timerLocalUpdate_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Local update")),
00637     timerCompRes_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Computing residuals")),
00638     timerOrtho_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Orthogonalization")),
00639     timerInit_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Initialization")),
00640 #endif
00641     count_ApplyOp_(0),
00642     count_ApplyM_(0),
00643     count_ApplyPrec_(0),
00644     // internal data
00645     blockSize_(0),
00646     fullOrtho_(params.get("Full Ortho", true)),
00647     initialized_(false),
00648     nevLocal_(0),
00649     hasP_(false),
00650     auxVecs_( Teuchos::Array<Teuchos::RCP<const MV> >(0) ), 
00651     numAuxVecs_(0),
00652     iter_(0),
00653     Rnorms_current_(false),
00654     R2norms_current_(false)
00655   {     
00656     TEUCHOS_TEST_FOR_EXCEPTION(problem_ == Teuchos::null,std::invalid_argument,
00657                        "Anasazi::LOBPCG::constructor: user passed null problem pointer.");
00658     TEUCHOS_TEST_FOR_EXCEPTION(sm_ == Teuchos::null,std::invalid_argument,
00659                        "Anasazi::LOBPCG::constructor: user passed null sort manager pointer.");
00660     TEUCHOS_TEST_FOR_EXCEPTION(om_ == Teuchos::null,std::invalid_argument,
00661                        "Anasazi::LOBPCG::constructor: user passed null output manager pointer.");
00662     TEUCHOS_TEST_FOR_EXCEPTION(tester_ == Teuchos::null,std::invalid_argument,
00663                        "Anasazi::LOBPCG::constructor: user passed null status test pointer.");
00664     TEUCHOS_TEST_FOR_EXCEPTION(orthman_ == Teuchos::null,std::invalid_argument,
00665                        "Anasazi::LOBPCG::constructor: user passed null orthogonalization manager pointer.");
00666     TEUCHOS_TEST_FOR_EXCEPTION(problem_->isProblemSet() == false, std::invalid_argument,
00667                        "Anasazi::LOBPCG::constructor: problem is not set.");
00668     TEUCHOS_TEST_FOR_EXCEPTION(problem_->isHermitian() == false, std::invalid_argument,
00669                        "Anasazi::LOBPCG::constructor: problem is not Hermitian; LOBPCG requires Hermitian problem.");
00670 
00671     // get the problem operators
00672     Op_   = problem_->getOperator();
00673     TEUCHOS_TEST_FOR_EXCEPTION(Op_ == Teuchos::null, std::invalid_argument,
00674                        "Anasazi::LOBPCG::constructor: problem provides no operator.");
00675     MOp_  = problem_->getM();
00676     Prec_ = problem_->getPrec();
00677     hasM_ = (MOp_ != Teuchos::null);
00678 
00679     // set the block size and allocate data
00680     int bs = params.get("Block Size", problem_->getNEV());
00681     setBlockSize(bs);
00682   }
00683 
00684 
00686   // Set the block size and make necessary adjustments.
00687   template <class ScalarType, class MV, class OP>
00688   void LOBPCG<ScalarType,MV,OP>::setBlockSize (int newBS) 
00689   {
00690     // time spent here counts towards timerInit_
00691 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
00692     Teuchos::TimeMonitor lcltimer( *timerInit_ );
00693 #endif
00694 
00695     // This routine only allocates space; it doesn't not perform any computation
00696     // if size is decreased, take the first newBS vectors of all and leave state as is
00697     // otherwise, grow/allocate space and set solver to unitialized
00698 
00699     Teuchos::RCP<const MV> tmp;
00700     // grab some Multivector to Clone
00701     // in practice, getInitVec() should always provide this, but it is possible to use a 
00702     // Eigenproblem with nothing in getInitVec() by manually initializing with initialize(); 
00703     // in case of that strange scenario, we will try to Clone from R_ because it is smaller 
00704     // than V_, and we don't want to keep V_ around longer than necessary
00705     if (blockSize_ > 0) {
00706       tmp = R_;
00707     }
00708     else {
00709       tmp = problem_->getInitVec();
00710       TEUCHOS_TEST_FOR_EXCEPTION(tmp == Teuchos::null,std::logic_error,
00711                          "Anasazi::LOBPCG::setBlockSize(): eigenproblem did not specify initial vectors to clone from.");
00712     }
00713     
00714     TEUCHOS_TEST_FOR_EXCEPTION(newBS <= 0 || newBS > MVT::GetVecLength(*tmp), std::invalid_argument, "Anasazi::LOBPCG::setBlockSize(): block size must be strictly positive.");
00715     if (newBS == blockSize_) {
00716       // do nothing
00717       return;
00718     }
00719     else if (newBS < blockSize_ && initialized_) {
00720       //
00721       // shrink vectors
00722 
00723       // release views so we can modify the bases
00724       X_ = Teuchos::null;
00725       KX_ = Teuchos::null;
00726       MX_ = Teuchos::null;
00727       H_ = Teuchos::null;
00728       KH_ = Teuchos::null;
00729       MH_ = Teuchos::null;
00730       P_ = Teuchos::null;
00731       KP_ = Teuchos::null;
00732       MP_ = Teuchos::null;
00733 
00734       // make new indices vectors
00735       std::vector<int> newind(newBS), oldind(newBS);
00736       for (int i=0; i<newBS; i++) {
00737         newind[i] = i;
00738         oldind[i] = i;
00739       }
00740 
00741       Teuchos::RCP<MV> newV, newMV, newKV, newR;
00742       Teuchos::RCP<const MV> src;
00743       // allocate R and newV
00744       newR = MVT::Clone(*tmp,newBS);
00745       newV = MVT::Clone(*tmp,newBS*3);
00746       newKV = MVT::Clone(*tmp,newBS*3);
00747       if (hasM_) {
00748         newMV = MVT::Clone(*tmp,newBS*3);
00749       }
00750 
00751       //
00752       // if we are initialized, we want to pull the data from V_ into newV:
00753       //           bs  |  bs  | bs 
00754       // newV  = [newX | **** |newP ]
00755       // newKV = [newKX| **** |newKP]
00756       // newMV = [newMX| **** |newMP]
00757       // where 
00758       //          oldbs   |  oldbs  |   oldbs   
00759       //  V_ = [newX  *** | ******* | newP  ***]
00760       // KV_ = [newKX *** | ******* | newKP ***]
00761       // MV_ = [newMX *** | ******* | newMP ***]
00762       //
00763       // we don't care to copy the data corresponding to H
00764       // we will not copy the M data if !hasM_, because it doesn't exist
00765       //
00766 
00767       // these are shrink operations which preserve their data
00768       theta_.resize(3*newBS);
00769       Rnorms_.resize(newBS);
00770       R2norms_.resize(newBS);
00771 
00772       // copy residual vectors: oldind,newind currently contains [0,...,newBS-1]
00773       src = MVT::CloneView(*R_,newind);
00774       MVT::SetBlock(*src,newind,*newR);
00775       // free old memory and point to new memory
00776       R_ = newR;
00777 
00778       // copy in order: newX newKX newMX, then newP newKP newMP
00779       // for  X: [0,bs-1] <-- [0,bs-1] 
00780       src = MVT::CloneView(*V_,oldind);
00781       MVT::SetBlock(*src,newind,*newV);
00782       src = MVT::CloneView(*KV_,oldind);
00783       MVT::SetBlock(*src,newind,*newKV);
00784       if (hasM_) {
00785         src = MVT::CloneView(*MV_,oldind);
00786         MVT::SetBlock(*src,newind,*newMV);
00787       }
00788       // for  P: [2*bs, 3*bs-1] <-- [2*oldbs, 2*oldbs+bs-1] 
00789       for (int i=0; i<newBS; i++) {
00790         newind[i] += 2*newBS;
00791         oldind[i] += 2*blockSize_;
00792       }
00793       src = MVT::CloneView(*V_,oldind);
00794       MVT::SetBlock(*src,newind,*newV);
00795       src = MVT::CloneView(*KV_,oldind);
00796       MVT::SetBlock(*src,newind,*newKV);
00797       if (hasM_) {
00798         src = MVT::CloneView(*MV_,oldind);
00799         MVT::SetBlock(*src,newind,*newMV);
00800       }
00801 
00802       // release temp view
00803       src = Teuchos::null;
00804 
00805       // release old allocations and point at new memory
00806       V_ = newV;
00807       KV_ = newKV;
00808       if (hasM_) {
00809         MV_ = newMV;
00810       }
00811       else {
00812         MV_ = V_;
00813       }
00814     }
00815     else {  
00816       // newBS > blockSize_  or  not initialized
00817       // this is also the scenario for our initial call to setBlockSize(), in the constructor
00818       initialized_ = false;
00819       hasP_ = false;
00820 
00821       // release views
00822       X_ = Teuchos::null;
00823       KX_ = Teuchos::null;
00824       MX_ = Teuchos::null;
00825       H_ = Teuchos::null;
00826       KH_ = Teuchos::null;
00827       MH_ = Teuchos::null;
00828       P_ = Teuchos::null;
00829       KP_ = Teuchos::null;
00830       MP_ = Teuchos::null;
00831 
00832       // free allocated storage
00833       R_ = Teuchos::null;
00834       V_ = Teuchos::null;
00835 
00836       // allocate scalar vectors
00837       theta_.resize(3*newBS,NANVAL);
00838       Rnorms_.resize(newBS,NANVAL);
00839       R2norms_.resize(newBS,NANVAL);
00840       
00841       // clone multivectors off of tmp
00842       R_ = MVT::Clone(*tmp,newBS);
00843       V_ = MVT::Clone(*tmp,newBS*3);
00844       KV_ = MVT::Clone(*tmp,newBS*3);
00845       if (hasM_) {
00846         MV_ = MVT::Clone(*tmp,newBS*3);
00847       }
00848       else {
00849         MV_ = V_;
00850       }
00851     }
00852 
00853     // allocate tmp space
00854     tmpmvec_ = Teuchos::null;
00855     if (fullOrtho_) {
00856       tmpmvec_ = MVT::Clone(*tmp,newBS);
00857     }
00858 
00859     // set new block size
00860     blockSize_ = newBS;
00861 
00862     // setup new views
00863     setupViews();
00864   }
00865 
00866 
00868   // Setup views into V,KV,MV
00869   template <class ScalarType, class MV, class OP>
00870   void LOBPCG<ScalarType,MV,OP>::setupViews() 
00871   {
00872     std::vector<int> ind(blockSize_);
00873 
00874     for (int i=0; i<blockSize_; i++) {
00875       ind[i] = i;
00876     }
00877     X_  = MVT::CloneViewNonConst(*V_,ind);
00878     KX_ = MVT::CloneViewNonConst(*KV_,ind);
00879     if (hasM_) {
00880       MX_ = MVT::CloneViewNonConst(*MV_,ind);
00881     }
00882     else {
00883       MX_ = X_;
00884     }
00885 
00886     for (int i=0; i<blockSize_; i++) {
00887       ind[i] += blockSize_;
00888     }
00889     H_  = MVT::CloneViewNonConst(*V_,ind);
00890     KH_ = MVT::CloneViewNonConst(*KV_,ind);
00891     if (hasM_) {
00892       MH_ = MVT::CloneViewNonConst(*MV_,ind);
00893     }
00894     else {
00895       MH_ = H_;
00896     }
00897 
00898     for (int i=0; i<blockSize_; i++) {
00899       ind[i] += blockSize_;
00900     }
00901     P_  = MVT::CloneViewNonConst(*V_,ind);
00902     KP_ = MVT::CloneViewNonConst(*KV_,ind);
00903     if (hasM_) {
00904       MP_ = MVT::CloneViewNonConst(*MV_,ind);
00905     }
00906     else {
00907       MP_ = P_;
00908     }
00909   }
00910 
00911 
00913   // Set the auxiliary vectors
00914   template <class ScalarType, class MV, class OP>
00915   void LOBPCG<ScalarType,MV,OP>::setAuxVecs(const Teuchos::Array<Teuchos::RCP<const MV> > &auxvecs) {
00916     typedef typename Teuchos::Array<Teuchos::RCP<const MV> >::iterator tarcpmv;
00917 
00918     // set new auxiliary vectors
00919     auxVecs_ = auxvecs;
00920     
00921     numAuxVecs_ = 0;
00922     for (tarcpmv i=auxVecs_.begin(); i != auxVecs_.end(); i++) {
00923       numAuxVecs_ += MVT::GetNumberVecs(**i);
00924     }
00925     
00926     // If the solver has been initialized, X and P are not necessarily orthogonal to new auxiliary vectors
00927     if (numAuxVecs_ > 0 && initialized_) {
00928       initialized_ = false;
00929       hasP_ = false;
00930     }
00931 
00932     if (om_->isVerbosity( Debug ) ) {
00933       // Check almost everything here
00934       CheckList chk;
00935       chk.checkQ = true;
00936       om_->print( Debug, accuracyCheck(chk, ": in setAuxVecs()") );
00937     }
00938   }
00939 
00940 
00942   /* Initialize the state of the solver
00943    * 
00944    * POST-CONDITIONS:
00945    *
00946    * initialized_ == true
00947    * X is orthonormal, orthogonal to auxVecs_
00948    * KX = Op*X
00949    * MX = M*X if hasM_
00950    * theta_ contains Ritz values of X
00951    * R = KX - MX*diag(theta_)
00952    * if hasP() == true,
00953    *   P orthogonal to auxVecs_
00954    *   if fullOrtho_ == true,
00955    *     P orthonormal and orthogonal to X
00956    *   KP = Op*P
00957    *   MP = M*P
00958    */
00959   template <class ScalarType, class MV, class OP>
00960   void LOBPCG<ScalarType,MV,OP>::initialize(LOBPCGState<ScalarType,MV> newstate)
00961   {
00962     // NOTE: memory has been allocated by setBlockSize(). Use SetBlock below; do not Clone
00963     // NOTE: Overall time spent in this routine is counted to timerInit_; portions will also be counted towards other primitives
00964 
00965 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
00966     Teuchos::TimeMonitor inittimer( *timerInit_ );
00967 #endif
00968 
00969     std::vector<int> bsind(blockSize_);
00970     for (int i=0; i<blockSize_; i++) bsind[i] = i;
00971 
00972     // in LOBPCG, X (the subspace iterate) is primary
00973     // the order of dependence follows like so.
00974     // --init->                 X
00975     //    --op apply->          MX,KX
00976     //       --ritz analysis->  theta
00977     //          --optional->    P,MP,KP
00978     // 
00979     // if the user specifies all data for a level, we will accept it.
00980     // otherwise, we will generate the whole level, and all subsequent levels.
00981     //
00982     // the data members are ordered based on dependence, and the levels are
00983     // partitioned according to the amount of work required to produce the
00984     // items in a level.
00985     //
00986     // inconsitent multivectors widths and lengths will not be tolerated, and
00987     // will be treated with exceptions.
00988 
00989     // set up X, KX, MX: get them from "state" if user specified them
00990 
00991     //----------------------------------------
00992     // set up X, MX, KX
00993     //----------------------------------------
00994     if (newstate.X != Teuchos::null) {
00995       TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetVecLength(*newstate.X) != MVT::GetVecLength(*X_),
00996                           std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.X not correct." );
00997       // newstate.X must have blockSize_ vectors; any more will be ignored
00998       TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.X) < blockSize_,
00999                           std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.X must have at least block size vectors.");
01000 
01001       // put X data in X_
01002       MVT::SetBlock(*newstate.X,bsind,*X_);
01003 
01004       // put MX data in MX_
01005       if (hasM_) {
01006         if (newstate.MX != Teuchos::null) {
01007           TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetVecLength(*newstate.MX) != MVT::GetVecLength(*MX_),
01008                               std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.MX not correct." );
01009           // newstate.MX must have blockSize_ vectors; any more will be ignored
01010           TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.MX) < blockSize_,
01011                               std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.MX must have at least block size vectors.");
01012           MVT::SetBlock(*newstate.MX,bsind,*MX_);
01013         }
01014         else {
01015           // user didn't specify MX, compute it
01016           {
01017 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
01018             Teuchos::TimeMonitor lcltimer( *timerMOp_ );
01019 #endif
01020             OPT::Apply(*MOp_,*X_,*MX_);
01021             count_ApplyM_ += blockSize_;
01022           }
01023           // we generated MX; we will generate R as well
01024           newstate.R = Teuchos::null;
01025         }
01026       }
01027   
01028       // put data in KX
01029       if (newstate.KX != Teuchos::null) {
01030         TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetVecLength(*newstate.KX) != MVT::GetVecLength(*KX_),
01031                             std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.KX not correct." );
01032         // newstate.KX must have blockSize_ vectors; any more will be ignored
01033         TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.KX) < blockSize_,
01034                             std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.KX must have at least block size vectors.");
01035         MVT::SetBlock(*newstate.KX,bsind,*KX_);
01036       }
01037       else {
01038         // user didn't specify KX, compute it
01039         {
01040 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
01041           Teuchos::TimeMonitor lcltimer( *timerOp_ );
01042 #endif
01043           OPT::Apply(*Op_,*X_,*KX_);
01044           count_ApplyOp_ += blockSize_;
01045         }
01046         // we generated KX; we will generate R as well
01047         newstate.R = Teuchos::null;
01048       }
01049     }
01050     else {
01051       // user did not specify X
01052       // we will initialize X, compute KX and MX, and compute R
01053       //
01054       // clear state so we won't use any data from it below
01055       newstate.P  = Teuchos::null;
01056       newstate.KP = Teuchos::null;
01057       newstate.MP = Teuchos::null;
01058       newstate.R  = Teuchos::null;
01059       newstate.T  = Teuchos::null;
01060 
01061       // generate a basis and projectAndNormalize
01062       Teuchos::RCP<const MV> ivec = problem_->getInitVec();
01063       TEUCHOS_TEST_FOR_EXCEPTION(ivec == Teuchos::null,std::logic_error,
01064                          "Anasazi::LOBPCG::initialize(): Eigenproblem did not specify initial vectors to clone from.");
01065 
01066       int initSize = MVT::GetNumberVecs(*ivec);
01067       if (initSize > blockSize_) {
01068         // we need only the first blockSize_ vectors from ivec; get a view of them
01069         initSize = blockSize_;
01070         std::vector<int> ind(blockSize_);
01071         for (int i=0; i<blockSize_; i++) ind[i] = i;
01072         ivec = MVT::CloneView(*ivec,ind);
01073       }
01074 
01075       // assign ivec to first part of X_ 
01076       if (initSize > 0) {
01077         std::vector<int> ind(initSize);
01078         for (int i=0; i<initSize; i++) ind[i] = i;
01079         MVT::SetBlock(*ivec,ind,*X_);
01080       }
01081       // fill the rest of X_ with random
01082       if (blockSize_ > initSize) {
01083         std::vector<int> ind(blockSize_ - initSize);
01084         for (int i=0; i<blockSize_ - initSize; i++) ind[i] = initSize + i;
01085         Teuchos::RCP<MV> rX = MVT::CloneViewNonConst(*X_,ind);
01086         MVT::MvRandom(*rX);
01087         rX = Teuchos::null;
01088       }
01089 
01090       // put data in MX
01091       if (hasM_) {
01092 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
01093         Teuchos::TimeMonitor lcltimer( *timerMOp_ );
01094 #endif
01095         OPT::Apply(*MOp_,*X_,*MX_);
01096         count_ApplyM_ += blockSize_;
01097       }
01098   
01099       // remove auxVecs from X_ and normalize it
01100       if (numAuxVecs_ > 0) {
01101 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
01102         Teuchos::TimeMonitor lcltimer( *timerOrtho_ );
01103 #endif
01104         Teuchos::Array<Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> > > dummy;
01105         int rank = orthman_->projectAndNormalizeMat(*X_,auxVecs_,dummy,Teuchos::null,MX_);
01106         TEUCHOS_TEST_FOR_EXCEPTION(rank != blockSize_, LOBPCGInitFailure,
01107                            "Anasazi::LOBPCG::initialize(): Couldn't generate initial basis of full rank.");
01108       }
01109       else {
01110 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
01111         Teuchos::TimeMonitor lcltimer( *timerOrtho_ );
01112 #endif
01113         int rank = orthman_->normalizeMat(*X_,Teuchos::null,MX_);
01114         TEUCHOS_TEST_FOR_EXCEPTION(rank != blockSize_, LOBPCGInitFailure,
01115                            "Anasazi::LOBPCG::initialize(): Couldn't generate initial basis of full rank.");
01116       }
01117 
01118       // put data in KX
01119       {
01120 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
01121         Teuchos::TimeMonitor lcltimer( *timerOp_ );
01122 #endif
01123         OPT::Apply(*Op_,*X_,*KX_);
01124         count_ApplyOp_ += blockSize_;
01125       }
01126     } // end if (newstate.X != Teuchos::null)
01127 
01128 
01129     //----------------------------------------
01130     // set up Ritz values
01131     //----------------------------------------
01132     theta_.resize(3*blockSize_,NANVAL);
01133     if (newstate.T != Teuchos::null) {
01134       TEUCHOS_TEST_FOR_EXCEPTION( (signed int)(newstate.T->size()) < blockSize_,
01135                           std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.T must contain at least block size Ritz values.");
01136       for (int i=0; i<blockSize_; i++) {
01137         theta_[i] = (*newstate.T)[i];
01138       }
01139       nevLocal_ = blockSize_;
01140     }
01141     else {
01142       // get ritz vecs/vals
01143       Teuchos::SerialDenseMatrix<int,ScalarType> KK(blockSize_,blockSize_),
01144                                                  MM(blockSize_,blockSize_),
01145                                                   S(blockSize_,blockSize_);
01146       {
01147 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
01148         Teuchos::TimeMonitor lcltimer( *timerLocalProj_ );
01149 #endif
01150         // project K
01151         MVT::MvTransMv(ONE,*X_,*KX_,KK);
01152         // project M
01153         MVT::MvTransMv(ONE,*X_,*MX_,MM);
01154         nevLocal_ = blockSize_;
01155       }
01156 
01157       // solve the projected problem
01158       {
01159 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
01160         Teuchos::TimeMonitor lcltimer( *timerDS_ );
01161 #endif
01162         Utils::directSolver(blockSize_, KK, Teuchos::rcpFromRef(MM), S, theta_, nevLocal_, 1);
01163         TEUCHOS_TEST_FOR_EXCEPTION(nevLocal_ != blockSize_,LOBPCGInitFailure,
01164                            "Anasazi::LOBPCG::initialize(): Initial Ritz analysis did not produce enough Ritz pairs to initialize algorithm.");
01165       }
01166 
01167       // We only have blockSize_ ritz pairs, ergo we do not need to select.
01168       // However, we still require them to be ordered correctly
01169       {
01170 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
01171         Teuchos::TimeMonitor lcltimer( *timerSort_ );
01172 #endif
01173 
01174         std::vector<int> order(blockSize_);
01175         // 
01176         // sort the first blockSize_ values in theta_
01177         sm_->sort(theta_, Teuchos::rcpFromRef(order), blockSize_);   // don't catch exception
01178         //
01179         // apply the same ordering to the primitive ritz vectors
01180         Utils::permuteVectors(order,S);
01181       }
01182 
01183       // update the solution, use R for storage
01184       {
01185 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
01186         Teuchos::TimeMonitor lcltimer( *timerLocalUpdate_ );
01187 #endif
01188         // X <- X*S
01189         MVT::MvAddMv( ONE, *X_, ZERO, *X_, *R_ );        
01190         MVT::MvTimesMatAddMv( ONE, *R_, S, ZERO, *X_ );
01191         // KX <- KX*S
01192         MVT::MvAddMv( ONE, *KX_, ZERO, *KX_, *R_ );        
01193         MVT::MvTimesMatAddMv( ONE, *R_, S, ZERO, *KX_ );
01194         if (hasM_) {
01195           // MX <- MX*S
01196           MVT::MvAddMv( ONE, *MX_, ZERO, *MX_, *R_ );        
01197           MVT::MvTimesMatAddMv( ONE, *R_, S, ZERO, *MX_ );
01198         }
01199       }
01200     }
01201 
01202     //----------------------------------------
01203     // compute R
01204     //----------------------------------------
01205     if (newstate.R != Teuchos::null) {
01206       TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetVecLength(*newstate.R) != MVT::GetVecLength(*R_),
01207                           std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.R not correct." );
01208       TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.R) < blockSize_,
01209                           std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.R must have blockSize number of vectors." );
01210       MVT::SetBlock(*newstate.R,bsind,*R_);
01211     }
01212     else {
01213 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
01214       Teuchos::TimeMonitor lcltimer( *timerCompRes_ );
01215 #endif
01216       // form R <- KX - MX*T
01217       MVT::MvAddMv(ZERO,*KX_,ONE,*KX_,*R_);
01218       Teuchos::SerialDenseMatrix<int,ScalarType> T(blockSize_,blockSize_);
01219       for (int i=0; i<blockSize_; i++) T(i,i) = theta_[i];
01220       MVT::MvTimesMatAddMv(-ONE,*MX_,T,ONE,*R_);
01221     }
01222 
01223     // R has been updated; mark the norms as out-of-date
01224     Rnorms_current_ = false;
01225     R2norms_current_ = false;
01226   
01227     // put data in P,KP,MP: P is not used to set theta
01228     if (newstate.P != Teuchos::null) {
01229       TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.P) < blockSize_ ,
01230                           std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.P must have blockSize number of vectors." );
01231       TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetVecLength(*newstate.P) != MVT::GetVecLength(*P_),
01232                           std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.P not correct." );
01233       hasP_ = true;
01234 
01235       // set P_
01236       MVT::SetBlock(*newstate.P,bsind,*P_);
01237 
01238       // set/compute KP_
01239       if (newstate.KP != Teuchos::null) {
01240         TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.KP) < blockSize_,
01241                             std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.KP must have blockSize number of vectors." );
01242         TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetVecLength(*newstate.KP) != MVT::GetVecLength(*KP_),
01243                             std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.KP not correct." );
01244         MVT::SetBlock(*newstate.KP,bsind,*KP_);
01245       }
01246       else {
01247 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
01248         Teuchos::TimeMonitor lcltimer( *timerOp_ );
01249 #endif
01250         OPT::Apply(*Op_,*P_,*KP_);
01251         count_ApplyOp_ += blockSize_;
01252       }
01253 
01254       // set/compute MP_
01255       if (hasM_) {
01256         if (newstate.MP != Teuchos::null) {
01257           TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.MP) < blockSize_,
01258                               std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.MP must have blockSize number of vectors." );
01259           TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetVecLength(*newstate.MP) != MVT::GetVecLength(*MP_),
01260                               std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.MP not correct." );
01261           MVT::SetBlock(*newstate.MP,bsind,*MP_);
01262         }
01263         else {
01264 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
01265           Teuchos::TimeMonitor lcltimer( *timerMOp_ );
01266 #endif
01267           OPT::Apply(*MOp_,*P_,*MP_);
01268           count_ApplyM_ += blockSize_;
01269         }
01270       }
01271     }
01272     else {
01273       hasP_ = false;
01274     }
01275 
01276     // finally, we are initialized
01277     initialized_ = true;
01278 
01279     if (om_->isVerbosity( Debug ) ) {
01280       // Check almost everything here
01281       CheckList chk;
01282       chk.checkX = true;
01283       chk.checkKX = true;
01284       chk.checkMX = true;
01285       chk.checkP = true;
01286       chk.checkKP = true;
01287       chk.checkMP = true;
01288       chk.checkR = true;
01289       chk.checkQ = true;
01290       om_->print( Debug, accuracyCheck(chk, ": after initialize()") );
01291     }
01292 
01293   }
01294 
01295   template <class ScalarType, class MV, class OP>
01296   void LOBPCG<ScalarType,MV,OP>::initialize()
01297   {
01298     LOBPCGState<ScalarType,MV> empty;
01299     initialize(empty);
01300   }
01301 
01302 
01304   // Instruct the solver to use full orthogonalization
01305   template <class ScalarType, class MV, class OP>
01306   void LOBPCG<ScalarType,MV,OP>::setFullOrtho (bool fullOrtho)
01307   {
01308     if ( fullOrtho_ == true || initialized_ == false || fullOrtho == fullOrtho_ ) {
01309       // state is already orthogonalized or solver is not initialized
01310       fullOrtho_ = fullOrtho;
01311     }
01312     else {
01313       // solver is initialized, state is not fully orthogonalized, and user has requested full orthogonalization
01314       // ergo, we must throw away data in P
01315       fullOrtho_ = true;
01316       hasP_ = false;
01317     }
01318 
01319     // the user has called setFullOrtho, so the class has been instantiated
01320     // ergo, the data has already been allocated, i.e., setBlockSize() has been called
01321     // if it is already allocated, it should be the proper size
01322     if (fullOrtho_ && tmpmvec_ == Teuchos::null) {
01323       // allocated the workspace
01324       tmpmvec_ = MVT::Clone(*X_,blockSize_);
01325     }
01326     else if (fullOrtho_==false) {
01327       // free the workspace
01328       tmpmvec_ = Teuchos::null;
01329     }
01330   }
01331 
01332 
01334   // Perform LOBPCG iterations until the StatusTest tells us to stop.
01335   template <class ScalarType, class MV, class OP>
01336   void LOBPCG<ScalarType,MV,OP>::iterate () 
01337   {
01338     //
01339     // Allocate/initialize data structures
01340     //
01341     if (initialized_ == false) {
01342       initialize();
01343     }
01344 
01345     //
01346     // Miscellaneous definitions
01347     const int oneBlock    =   blockSize_;
01348     const int twoBlocks   = 2*blockSize_;
01349     const int threeBlocks = 3*blockSize_;
01350 
01351     std::vector<int> indblock1(blockSize_), indblock2(blockSize_), indblock3(blockSize_);
01352     for (int i=0; i<blockSize_; i++) {
01353       indblock1[i] = i;
01354       indblock2[i] = i +  blockSize_;
01355       indblock3[i] = i + 2*blockSize_;
01356     }
01357 
01358     //
01359     // Define dense projected/local matrices
01360     //   KK = Local stiffness matrix               (size: 3*blockSize_ x 3*blockSize_)
01361     //   MM = Local mass matrix                    (size: 3*blockSize_ x 3*blockSize_)
01362     //    S = Local eigenvectors                   (size: 3*blockSize_ x 3*blockSize_)
01363     Teuchos::SerialDenseMatrix<int,ScalarType> KK( threeBlocks, threeBlocks ), 
01364                                                MM( threeBlocks, threeBlocks ),
01365                                                 S( threeBlocks, threeBlocks );
01366 
01367     while (tester_->checkStatus(this) != Passed) {
01368 
01369       // Print information on current status
01370       if (om_->isVerbosity(Debug)) {
01371         currentStatus( om_->stream(Debug) );
01372       }
01373       else if (om_->isVerbosity(IterationDetails)) {
01374         currentStatus( om_->stream(IterationDetails) );
01375       }
01376 
01377       // increment iteration counter
01378       iter_++;
01379 
01380       // Apply the preconditioner on the residuals: H <- Prec*R
01381       if (Prec_ != Teuchos::null) {
01382 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
01383         Teuchos::TimeMonitor lcltimer( *timerPrec_ );
01384 #endif
01385         OPT::Apply( *Prec_, *R_, *H_ );   // don't catch the exception
01386         count_ApplyPrec_ += blockSize_;
01387       }
01388       else {
01389         MVT::MvAddMv(ONE,*R_,ZERO,*R_,*H_);
01390       }
01391 
01392       // Apply the mass matrix on H
01393       if (hasM_) {
01394 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
01395         Teuchos::TimeMonitor lcltimer( *timerMOp_ );
01396 #endif
01397         OPT::Apply( *MOp_, *H_, *MH_);    // don't catch the exception
01398         count_ApplyM_ += blockSize_;
01399       }
01400 
01401       // orthogonalize H against the auxiliary vectors
01402       // optionally: orthogonalize H against X and P ([X P] is already orthonormal)
01403       Teuchos::Array<Teuchos::RCP<const MV> > Q;
01404       Q = auxVecs_;
01405       if (fullOrtho_) {
01406         // X and P are not contiguous, so there is not much point in putting them under 
01407         // a single multivector view
01408         Q.push_back(X_);
01409         if (hasP_) {
01410           Q.push_back(P_);
01411         }
01412       }
01413       {
01414 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
01415         Teuchos::TimeMonitor lcltimer( *timerOrtho_ );
01416 #endif
01417         Teuchos::Array<Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> > > dummyC = 
01418           Teuchos::tuple<Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> > >(Teuchos::null);
01419         int rank = orthman_->projectAndNormalizeMat(*H_,Q,dummyC,Teuchos::null,MH_);
01420         // our views are currently in place; it is safe to throw an exception
01421         TEUCHOS_TEST_FOR_EXCEPTION(rank != blockSize_,LOBPCGOrthoFailure,
01422                            "Anasazi::LOBPCG::iterate(): unable to compute orthonormal basis for H.");
01423       }
01424 
01425       if (om_->isVerbosity( Debug ) ) {
01426         CheckList chk;
01427         chk.checkH = true;
01428         chk.checkMH = true;
01429         om_->print( Debug, accuracyCheck(chk, ": after ortho H") );
01430       }
01431       else if (om_->isVerbosity( OrthoDetails ) ) {
01432         CheckList chk;
01433         chk.checkH = true;
01434         chk.checkMH = true;
01435         om_->print( OrthoDetails, accuracyCheck(chk,": after ortho H") );
01436       }
01437 
01438       // Apply the stiffness matrix to H
01439       {
01440 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
01441         Teuchos::TimeMonitor lcltimer( *timerOp_ );
01442 #endif
01443         OPT::Apply( *Op_, *H_, *KH_);   // don't catch the exception
01444         count_ApplyOp_ += blockSize_;
01445       }
01446 
01447       if (hasP_) {
01448         nevLocal_ = threeBlocks;
01449       }
01450       else {
01451         nevLocal_ = twoBlocks;
01452       }
01453 
01454       //
01455       // we need bases: [X H P] and [H P] (only need the latter if fullOrtho == false)
01456       // we need to perform the following operations:
01457       //    X' [KX KH KP]
01458       //    X' [MX MH MP]
01459       //    H' [KH KP]
01460       //    H' [MH MP]
01461       //    P' [KP]
01462       //    P' [MP]
01463       //    [X H P] CX
01464       //    [X H P] CP    if  fullOrtho
01465       //      [H P] CP    if !fullOrtho
01466       //
01467       // since M[X H P] is potentially the same memory as [X H P], and 
01468       // because we are not allowed to have overlapping non-const views of 
01469       // a multivector, we will now abandon our non-const views in favor of 
01470       // const views
01471       //
01472       X_ = Teuchos::null;
01473       KX_ = Teuchos::null;
01474       MX_ = Teuchos::null;
01475       H_ = Teuchos::null;
01476       KH_ = Teuchos::null;
01477       MH_ = Teuchos::null;
01478       P_ = Teuchos::null;
01479       KP_ = Teuchos::null;
01480       MP_ = Teuchos::null;
01481       Teuchos::RCP<const MV> cX, cH, cXHP, cHP, cK_XHP, cK_HP, cM_XHP, cM_HP, cP, cK_P, cM_P;
01482       {
01483         cX = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(V_),indblock1);
01484         cH = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(V_),indblock2);
01485 
01486         std::vector<int> indXHP(nevLocal_);
01487         for (int i=0; i<nevLocal_; i++) {
01488           indXHP[i] = i;
01489         }
01490         cXHP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(V_),indXHP);
01491         cK_XHP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(KV_),indXHP);
01492         if (hasM_) {
01493           cM_XHP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(MV_),indXHP);
01494         }
01495         else {
01496           cM_XHP = cXHP;
01497         }
01498 
01499         std::vector<int> indHP(nevLocal_-blockSize_);
01500         for (int i=blockSize_; i<nevLocal_; i++) {
01501           indHP[i-blockSize_] = i;
01502         }
01503         cHP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(V_),indHP);
01504         cK_HP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(KV_),indHP);
01505         if (hasM_) {
01506           cM_HP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(MV_),indHP);
01507         }
01508         else {
01509           cM_HP = cHP;
01510         }
01511 
01512         if (nevLocal_ == threeBlocks) {
01513           cP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(V_),indblock3);
01514           cK_P = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(KV_),indblock3);
01515           if (hasM_) {
01516             cM_P = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(MV_),indblock3);
01517           }
01518           else {
01519             cM_P = cP;
01520           }
01521         }
01522       }
01523 
01524       //
01525       //----------------------------------------
01526       // Form "local" mass and stiffness matrices
01527       //----------------------------------------
01528       {
01529         // We will form only the block upper triangular part of 
01530         // [X H P]' K [X H P]  and  [X H P]' M [X H P]
01531         // Get the necessary views into KK and MM:
01532         //      [--K1--]        [--M1--]
01533         // KK = [  -K2-]   MM = [  -M2-]
01534         //      [    K3]        [    M3]
01535         // 
01536         // It is okay to declare a zero-area view of a Teuchos::SerialDenseMatrix
01537         //
01538         Teuchos::SerialDenseMatrix<int,ScalarType> 
01539           K1(Teuchos::View,KK,blockSize_,nevLocal_             ,0*blockSize_,0*blockSize_),
01540           K2(Teuchos::View,KK,blockSize_,nevLocal_-1*blockSize_,1*blockSize_,1*blockSize_),
01541           K3(Teuchos::View,KK,blockSize_,nevLocal_-2*blockSize_,2*blockSize_,2*blockSize_),
01542           M1(Teuchos::View,MM,blockSize_,nevLocal_             ,0*blockSize_,0*blockSize_),
01543           M2(Teuchos::View,MM,blockSize_,nevLocal_-1*blockSize_,1*blockSize_,1*blockSize_),
01544           M3(Teuchos::View,MM,blockSize_,nevLocal_-2*blockSize_,2*blockSize_,2*blockSize_);
01545         {
01546 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
01547           Teuchos::TimeMonitor lcltimer( *timerLocalProj_ );
01548 #endif
01549           MVT::MvTransMv( ONE, *cX, *cK_XHP, K1 );
01550           MVT::MvTransMv( ONE, *cX, *cM_XHP, M1 );
01551           MVT::MvTransMv( ONE, *cH, *cK_HP , K2 );
01552           MVT::MvTransMv( ONE, *cH, *cM_HP , M2 );
01553           if (nevLocal_ == threeBlocks) {
01554             MVT::MvTransMv( ONE, *cP, *cK_P, K3 );
01555             MVT::MvTransMv( ONE, *cP, *cM_P, M3 );
01556           }
01557         }
01558       }
01559       // below, we only need bases [X H P] and [H P] and friends
01560       // furthermore, we only need [H P] and friends if fullOrtho == false
01561       // clear the others now
01562       cX   = Teuchos::null;
01563       cH   = Teuchos::null;
01564       cP   = Teuchos::null;
01565       cK_P = Teuchos::null;
01566       cM_P = Teuchos::null;
01567       if (fullOrtho_ == true) {
01568         cHP = Teuchos::null;
01569         cK_HP = Teuchos::null;
01570         cM_HP = Teuchos::null;
01571       }
01572 
01573       //
01574       //---------------------------------------------------
01575       // Perform a spectral decomposition of (KK,MM)
01576       //---------------------------------------------------
01577       //
01578       // Get pointers to relevant part of KK, MM and S for Rayleigh-Ritz analysis
01579       Teuchos::SerialDenseMatrix<int,ScalarType> lclKK(Teuchos::View,KK,nevLocal_,nevLocal_), 
01580                                                  lclMM(Teuchos::View,MM,nevLocal_,nevLocal_),
01581                                                   lclS(Teuchos::View, S,nevLocal_,nevLocal_);
01582       {
01583 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
01584         Teuchos::TimeMonitor lcltimer( *timerDS_ );
01585 #endif
01586         int localSize = nevLocal_;
01587         Utils::directSolver(localSize, lclKK, Teuchos::rcpFromRef(lclMM), lclS, theta_, nevLocal_, 0);
01588         // localSize tells directSolver() how big KK,MM are
01589         // however, directSolver() may choose to use only the principle submatrices of KK,MM 
01590         // because of loss of MM-orthogonality in the projected eigenvectors
01591         // nevLocal_ tells us how much it used, telling us the effective localSize 
01592         // (i.e., how much of KK,MM used by directSolver)
01593         // we will not tolerate any indefiniteness, and will throw an exception if it was 
01594         // detected by directSolver
01595         //
01596         if (nevLocal_ != localSize) {
01597           // before throwing the exception, and thereby leaving iterate(), setup the views again
01598           // first, clear the const views
01599           cXHP   = Teuchos::null;
01600           cK_XHP = Teuchos::null;
01601           cM_XHP = Teuchos::null;
01602           cHP    = Teuchos::null;
01603           cK_HP  = Teuchos::null;
01604           cM_HP  = Teuchos::null;
01605           setupViews();
01606         }
01607         TEUCHOS_TEST_FOR_EXCEPTION(nevLocal_ != localSize, LOBPCGRitzFailure, 
01608             "Anasazi::LOBPCG::iterate(): indefiniteness detected in projected mass matrix." );
01609       }
01610 
01611       //
01612       //---------------------------------------------------
01613       // Sort the ritz values using the sort manager
01614       //---------------------------------------------------
01615       Teuchos::LAPACK<int,ScalarType> lapack;
01616       Teuchos::BLAS<int,ScalarType> blas;
01617       {
01618 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
01619         Teuchos::TimeMonitor lcltimer( *timerSort_ );
01620 #endif
01621 
01622         std::vector<int> order(nevLocal_);
01623         // 
01624         // Sort the first nevLocal_ values in theta_
01625         sm_->sort(theta_, Teuchos::rcpFromRef(order), nevLocal_);   // don't catch exception
01626         //
01627         // Sort the primitive ritz vectors
01628         Utils::permuteVectors(order,lclS);
01629       }
01630 
01631       //
01632       //----------------------------------------
01633       // Compute coefficients for X and P under [X H P]
01634       //----------------------------------------
01635       // Before computing X,P, optionally perform orthogonalization per Hetmaniuk,Lehoucq paper
01636       // CX will be the coefficients of [X,H,P] for new X, CP for new P
01637       // The paper suggests orthogonalizing CP against CX and orthonormalizing CP, w.r.t. MM
01638       // Here, we will also orthonormalize CX.
01639       // This is accomplished using the Cholesky factorization of [CX CP]^H lclMM [CX CP]
01640       Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> > CX, CP;
01641       if (fullOrtho_) {
01642         // build orthonormal basis for (  0  ) that is MM orthogonal to ( S11 )
01643         //                             ( S21 )                          ( S21 )
01644         //                             ( S31 )                          ( S31 )
01645         // Do this using Cholesky factorization of ( S11  0  )
01646         //                                         ( S21 S21 )
01647         //                                         ( S31 S31 )
01648         //           ( S11  0  )
01649         // Build C = ( S21 S21 )
01650         //           ( S31 S31 )
01651         Teuchos::SerialDenseMatrix<int,ScalarType> C(nevLocal_,twoBlocks),
01652                                                 MMC(nevLocal_,twoBlocks),
01653                                                 CMMC(twoBlocks  ,twoBlocks);
01654 
01655         // first block of rows: ( S11 0 )
01656         for (int j=0; j<oneBlock; j++) {
01657           for (int i=0; i<oneBlock; i++) {
01658             // CX
01659             C(i,j) = lclS(i,j);
01660             // CP
01661             C(i,j+oneBlock) = ZERO;
01662           }
01663         }
01664         // second block of rows: (S21 S21)
01665         for (int j=0; j<oneBlock; j++) {
01666           for (int i=oneBlock; i<twoBlocks; i++) {
01667             // CX
01668             C(i,j)          = lclS(i,j);
01669             // CP
01670             C(i,j+oneBlock) = lclS(i,j);
01671           }
01672         }
01673         // third block of rows
01674         if (nevLocal_ == threeBlocks) {
01675           for (int j=0; j<oneBlock; j++) {
01676             for (int i=twoBlocks; i<threeBlocks; i++) {
01677               // CX
01678               C(i,j)          = lclS(i,j);
01679               // CP
01680               C(i,j+oneBlock) = lclS(i,j);
01681             }
01682           }
01683         }
01684 
01685         // compute MMC = lclMM*C
01686         {
01687           int teuchosret;
01688           teuchosret = MMC.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,ONE,lclMM,C,ZERO);
01689           TEUCHOS_TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,
01690               "Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply");
01691           // compute CMMC = C^H*MMC == C^H*lclMM*C
01692           teuchosret = CMMC.multiply(Teuchos::CONJ_TRANS,Teuchos::NO_TRANS,ONE,C,MMC,ZERO);
01693           TEUCHOS_TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,
01694               "Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply");
01695         }
01696 
01697         // compute R (cholesky) of CMMC
01698         {
01699           int info;
01700           lapack.POTRF('U',twoBlocks,CMMC.values(),CMMC.stride(),&info);
01701           // our views ARE NOT currently in place; we must reestablish them before throwing an exception
01702           if (info != 0) {
01703             cXHP = Teuchos::null;
01704             cHP = Teuchos::null;
01705             cK_XHP = Teuchos::null;
01706             cK_HP = Teuchos::null;
01707             cM_XHP = Teuchos::null;
01708             cM_HP = Teuchos::null;
01709             setupViews();
01710           }
01711           TEUCHOS_TEST_FOR_EXCEPTION(info != 0, LOBPCGOrthoFailure, 
01712               "Anasazi::LOBPCG::iterate(): Cholesky factorization failed during full orthogonalization.");
01713         }
01714         // compute C = C inv(R)
01715         blas.TRSM(Teuchos::RIGHT_SIDE,Teuchos::UPPER_TRI,Teuchos::NO_TRANS,Teuchos::NON_UNIT_DIAG,
01716                   nevLocal_,twoBlocks,ONE,CMMC.values(),CMMC.stride(),C.values(),C.stride());
01717         // put C(:,0:oneBlock-1) into CX
01718         CX = Teuchos::rcp( new Teuchos::SerialDenseMatrix<int,ScalarType>(Teuchos::Copy,C,nevLocal_,oneBlock,0,0) );
01719         // put C(:,oneBlock:twoBlocks-1) into CP
01720         CP = Teuchos::rcp( new Teuchos::SerialDenseMatrix<int,ScalarType>(Teuchos::Copy,C,nevLocal_,oneBlock,0,oneBlock) );
01721 
01722         // check the results
01723         if (om_->isVerbosity( Debug ) ) {
01724           Teuchos::SerialDenseMatrix<int,ScalarType> tmp1(nevLocal_,oneBlock),
01725                                                      tmp2(oneBlock,oneBlock);
01726           MagnitudeType tmp;
01727           int teuchosret;
01728           std::stringstream os;
01729           os.precision(2);
01730           os.setf(std::ios::scientific, std::ios::floatfield);
01731 
01732           os << " Checking Full Ortho: iteration " << iter_ << std::endl;
01733 
01734           // check CX^T MM CX == I
01735           // compute tmp1 = MM*CX
01736           teuchosret = tmp1.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,ONE,lclMM,*CX,ZERO);
01737           TEUCHOS_TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,
01738               "Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply");
01739           // compute tmp2 = CX^H*tmp1 == CX^H*MM*CX
01740           teuchosret = tmp2.multiply(Teuchos::CONJ_TRANS,Teuchos::NO_TRANS,ONE,*CX,tmp1,ZERO);
01741           TEUCHOS_TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,
01742               "Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply");
01743           // subtrace tmp2 - I == CX^H * MM * CX - I
01744           for (int i=0; i<oneBlock; i++) tmp2(i,i) -= ONE;
01745           tmp = tmp2.normFrobenius();          
01746           os << " >> Error in CX^H MM CX == I : " << tmp << std::endl;
01747 
01748           // check CP^T MM CP == I
01749           // compute tmp1 = MM*CP
01750           teuchosret = tmp1.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,ONE,lclMM,*CP,ZERO);
01751           TEUCHOS_TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,
01752               "Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply");
01753           // compute tmp2 = CP^H*tmp1 == CP^H*MM*CP
01754           teuchosret = tmp2.multiply(Teuchos::CONJ_TRANS,Teuchos::NO_TRANS,ONE,*CP,tmp1,ZERO);
01755           TEUCHOS_TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,
01756               "Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply");
01757           // subtrace tmp2 - I == CP^H * MM * CP - I
01758           for (int i=0; i<oneBlock; i++) tmp2(i,i) -= ONE;
01759           tmp = tmp2.normFrobenius();          
01760           os << " >> Error in CP^H MM CP == I : " << tmp << std::endl;
01761 
01762           // check CX^T MM CP == 0
01763           // compute tmp1 = MM*CP
01764           teuchosret = tmp1.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,ONE,lclMM,*CP,ZERO);
01765           TEUCHOS_TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,"Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply");
01766           // compute tmp2 = CX^H*tmp1 == CX^H*MM*CP
01767           teuchosret = tmp2.multiply(Teuchos::CONJ_TRANS,Teuchos::NO_TRANS,ONE,*CX,tmp1,ZERO);
01768           TEUCHOS_TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,"Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply");
01769           // subtrace tmp2 == CX^H * MM * CP
01770           tmp = tmp2.normFrobenius();          
01771           os << " >> Error in CX^H MM CP == 0 : " << tmp << std::endl;
01772 
01773           os << std::endl;
01774           om_->print(Debug,os.str());
01775         }
01776       }
01777       else {
01778         //     [S11 ... ...]
01779         // S = [S21 ... ...]
01780         //     [S31 ... ...]
01781         //
01782         // CX = [S11]
01783         //      [S21]
01784         //      [S31]   ->  X = [X H P] CX
01785         //      
01786         // CP = [S21]   ->  P =   [H P] CP
01787         //      [S31]
01788         //
01789         CX = Teuchos::rcp( new Teuchos::SerialDenseMatrix<int,ScalarType>(Teuchos::Copy,lclS,nevLocal_         ,oneBlock,0       ,0) );
01790         CP = Teuchos::rcp( new Teuchos::SerialDenseMatrix<int,ScalarType>(Teuchos::Copy,lclS,nevLocal_-oneBlock,oneBlock,oneBlock,0) );
01791       }
01792 
01793       //
01794       //----------------------------------------
01795       // Compute new X and new P
01796       //----------------------------------------
01797       // Note: Use R as a temporary work space and (if full ortho) tmpMV as well
01798       {
01799 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
01800         Teuchos::TimeMonitor lcltimer( *timerLocalUpdate_ );
01801 #endif
01802 
01803         // if full ortho, then CX and CP are dense
01804         // we multiply [X H P]*CX into tmpMV
01805         //             [X H P]*CP into R
01806         // then put V(:,firstblock) <- tmpMV
01807         //          V(:,thirdblock) <- R
01808         //
01809         // if no full ortho, then [H P]*CP doesn't reference first block (X) 
01810         // of V, so that we can modify it before computing P
01811         // so we multiply [X H P]*CX into R
01812         //                V(:,firstblock) <- R
01813         //       multiply [H P]*CP into R
01814         //                V(:,thirdblock) <- R
01815         //
01816         // mutatis mutandis for K[XP] and M[XP]
01817         //
01818         // use SetBlock to do the assignments into V_
01819         //
01820         // in either case, views are only allowed to be overlapping
01821         // if they are const, and it should be assume that SetBlock
01822         // creates a view of the associated part
01823         //
01824         // we have from above const-pointers to [KM]XHP, [KM]HP and (if hasP) [KM]P
01825         //
01826         if (fullOrtho_) {
01827           // X,P
01828           MVT::MvTimesMatAddMv(ONE,*cXHP,*CX,ZERO,*tmpmvec_);
01829           MVT::MvTimesMatAddMv(ONE,*cXHP,*CP,ZERO,*R_);
01830           cXHP = Teuchos::null;
01831           MVT::SetBlock(*tmpmvec_,indblock1,*V_);
01832           MVT::SetBlock(*R_      ,indblock3,*V_);
01833           // KX,KP
01834           MVT::MvTimesMatAddMv(ONE,*cK_XHP,*CX,ZERO,*tmpmvec_);
01835           MVT::MvTimesMatAddMv(ONE,*cK_XHP,*CP,ZERO,*R_);
01836           cK_XHP = Teuchos::null;
01837           MVT::SetBlock(*tmpmvec_,indblock1,*KV_);
01838           MVT::SetBlock(*R_      ,indblock3,*KV_);
01839           // MX,MP
01840           if (hasM_) {
01841             MVT::MvTimesMatAddMv(ONE,*cM_XHP,*CX,ZERO,*tmpmvec_);
01842             MVT::MvTimesMatAddMv(ONE,*cM_XHP,*CP,ZERO,*R_);
01843             cM_XHP = Teuchos::null;
01844             MVT::SetBlock(*tmpmvec_,indblock1,*MV_);
01845             MVT::SetBlock(*R_      ,indblock3,*MV_);
01846           }
01847           else {
01848             cM_XHP = Teuchos::null;
01849           }
01850         }
01851         else {
01852           // X,P
01853           MVT::MvTimesMatAddMv(ONE,*cXHP,*CX,ZERO,*R_);
01854           cXHP = Teuchos::null;
01855           MVT::SetBlock(*R_,indblock1,*V_);
01856           MVT::MvTimesMatAddMv(ONE,*cHP,*CP,ZERO,*R_);
01857           cHP = Teuchos::null;
01858           MVT::SetBlock(*R_,indblock3,*V_);
01859           // KX,KP
01860           MVT::MvTimesMatAddMv(ONE,*cK_XHP,*CX,ZERO,*R_);
01861           cK_XHP = Teuchos::null;
01862           MVT::SetBlock(*R_,indblock1,*KV_);
01863           MVT::MvTimesMatAddMv(ONE,*cK_HP,*CP,ZERO,*R_);
01864           cK_HP = Teuchos::null;
01865           MVT::SetBlock(*R_,indblock3,*KV_);
01866           // MX,MP
01867           if (hasM_) {
01868             MVT::MvTimesMatAddMv(ONE,*cM_XHP,*CX,ZERO,*R_);
01869             cM_XHP = Teuchos::null;
01870             MVT::SetBlock(*R_,indblock1,*MV_);
01871             MVT::MvTimesMatAddMv(ONE,*cM_HP,*CP,ZERO,*R_);
01872             cM_HP = Teuchos::null;
01873             MVT::SetBlock(*R_,indblock3,*MV_);
01874           }
01875           else {
01876             cM_XHP = Teuchos::null;
01877             cM_HP = Teuchos::null;
01878           }
01879         }
01880       } // end timing block
01881       // done with coefficient matrices
01882       CX = Teuchos::null;
01883       CP = Teuchos::null;
01884 
01885       //
01886       // we now have a P direction
01887       hasP_ = true;
01888 
01889       // debugging check: all of our const views should have been cleared by now
01890       // if not, we have a logic error above
01891       TEUCHOS_TEST_FOR_EXCEPTION(   cXHP != Teuchos::null || cK_XHP != Teuchos::null || cM_XHP != Teuchos::null
01892                           || cHP != Teuchos::null ||  cK_HP != Teuchos::null || cM_HP  != Teuchos::null
01893                           ||  cP != Teuchos::null ||   cK_P != Teuchos::null || cM_P   != Teuchos::null,
01894                           std::logic_error,
01895                           "Anasazi::BlockKrylovSchur::iterate(): const views were not all cleared! Something went wrong!" );
01896 
01897       //
01898       // recreate our const MV views of X,H,P and friends
01899       setupViews();
01900 
01901       //
01902       // Compute the new residuals, explicitly
01903       {
01904 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
01905         Teuchos::TimeMonitor lcltimer( *timerCompRes_ );
01906 #endif
01907         MVT::MvAddMv( ONE, *KX_, ZERO, *KX_, *R_ );
01908         Teuchos::SerialDenseMatrix<int,ScalarType> T( blockSize_, blockSize_ );
01909         for (int i = 0; i < blockSize_; i++) {
01910           T(i,i) = theta_[i];
01911         }
01912         MVT::MvTimesMatAddMv( -ONE, *MX_, T, ONE, *R_ );
01913       }
01914 
01915       // R has been updated; mark the norms as out-of-date
01916       Rnorms_current_ = false;
01917       R2norms_current_ = false;
01918 
01919       // When required, monitor some orthogonalities
01920       if (om_->isVerbosity( Debug ) ) {
01921         // Check almost everything here
01922         CheckList chk;
01923         chk.checkX = true;
01924         chk.checkKX = true;
01925         chk.checkMX = true;
01926         chk.checkP = true;
01927         chk.checkKP = true;
01928         chk.checkMP = true;
01929         chk.checkR = true;
01930         om_->print( Debug, accuracyCheck(chk, ": after local update") );
01931       }
01932       else if (om_->isVerbosity( OrthoDetails )) {
01933         CheckList chk;
01934         chk.checkX = true;
01935         chk.checkP = true;
01936         chk.checkR = true;
01937         om_->print( OrthoDetails, accuracyCheck(chk, ": after local update") );
01938       }
01939     } // end while (statusTest == false)
01940   }
01941 
01942 
01944   // compute/return residual M-norms
01945   template <class ScalarType, class MV, class OP>
01946   std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> 
01947   LOBPCG<ScalarType,MV,OP>::getResNorms() {
01948     if (Rnorms_current_ == false) {
01949       // Update the residual norms
01950       orthman_->norm(*R_,Rnorms_);
01951       Rnorms_current_ = true;
01952     }
01953     return Rnorms_;
01954   }
01955 
01956   
01958   // compute/return residual 2-norms
01959   template <class ScalarType, class MV, class OP>
01960   std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> 
01961   LOBPCG<ScalarType,MV,OP>::getRes2Norms() {
01962     if (R2norms_current_ == false) {
01963       // Update the residual 2-norms 
01964       MVT::MvNorm(*R_,R2norms_);
01965       R2norms_current_ = true;
01966     }
01967     return R2norms_;
01968   }
01969 
01970 
01971 
01972 
01974   // Check accuracy, orthogonality, and other debugging stuff
01975   // 
01976   // bools specify which tests we want to run (instead of running more than we actually care about)
01977   //
01978   // we don't bother checking the following because they are computed explicitly:
01979   //    H == Prec*R
01980   //   KH == K*H
01981   //
01982   // 
01983   // checkX : X orthonormal
01984   //          orthogonal to auxvecs
01985   // checkMX: check MX == M*X
01986   // checkKX: check KX == K*X
01987   // checkP : if fullortho P orthonormal and orthogonal to X
01988   //          orthogonal to auxvecs
01989   // checkMP: check MP == M*P
01990   // checkKP: check KP == K*P
01991   // checkH : if fullortho H orthonormal and orthogonal to X and P
01992   //          orthogonal to auxvecs
01993   // checkMH: check MH == M*H
01994   // checkR : check R orthogonal to X
01995   // checkQ : check that auxiliary vectors are actually orthonormal
01996   //
01997   // TODO: 
01998   //  add checkTheta 
01999   //
02000   template <class ScalarType, class MV, class OP>
02001   std::string LOBPCG<ScalarType,MV,OP>::accuracyCheck( const CheckList &chk, const std::string &where ) const 
02002   {
02003     using std::endl;
02004 
02005     std::stringstream os;
02006     os.precision(2);
02007     os.setf(std::ios::scientific, std::ios::floatfield);
02008     MagnitudeType tmp;
02009 
02010     os << " Debugging checks: iteration " << iter_ << where << endl;
02011 
02012     // X and friends
02013     if (chk.checkX && initialized_) {
02014       tmp = orthman_->orthonormError(*X_);
02015       os << " >> Error in X^H M X == I : " << tmp << endl;
02016       for (Array_size_type i=0; i<auxVecs_.size(); i++) {
02017         tmp = orthman_->orthogError(*X_,*auxVecs_[i]);
02018         os << " >> Error in X^H M Q[" << i << "] == 0 : " << tmp << endl;
02019       }
02020     }
02021     if (chk.checkMX && hasM_ && initialized_) {
02022       tmp = Utils::errorEquality(*X_, *MX_, MOp_);
02023       os << " >> Error in MX == M*X    : " << tmp << endl;
02024     }
02025     if (chk.checkKX && initialized_) {
02026       tmp = Utils::errorEquality(*X_, *KX_, Op_);
02027       os << " >> Error in KX == K*X    : " << tmp << endl;
02028     }
02029 
02030     // P and friends
02031     if (chk.checkP && hasP_ && initialized_) {
02032       if (fullOrtho_) {
02033         tmp = orthman_->orthonormError(*P_);
02034         os << " >> Error in P^H M P == I : " << tmp << endl;
02035         tmp = orthman_->orthogError(*P_,*X_);
02036         os << " >> Error in P^H M X == 0 : " << tmp << endl;
02037       }
02038       for (Array_size_type i=0; i<auxVecs_.size(); i++) {
02039         tmp = orthman_->orthogError(*P_,*auxVecs_[i]);
02040         os << " >> Error in P^H M Q[" << i << "] == 0 : " << tmp << endl;
02041       }
02042     }
02043     if (chk.checkMP && hasM_ && hasP_ && initialized_) {
02044       tmp = Utils::errorEquality(*P_, *MP_, MOp_);
02045       os << " >> Error in MP == M*P    : " << tmp << endl;
02046     }
02047     if (chk.checkKP && hasP_ && initialized_) {
02048       tmp = Utils::errorEquality(*P_, *KP_, Op_);
02049       os << " >> Error in KP == K*P    : " << tmp << endl;
02050     }
02051 
02052     // H and friends
02053     if (chk.checkH && initialized_) {
02054       if (fullOrtho_) {
02055         tmp = orthman_->orthonormError(*H_);
02056         os << " >> Error in H^H M H == I : " << tmp << endl;
02057         tmp = orthman_->orthogError(*H_,*X_);
02058         os << " >> Error in H^H M X == 0 : " << tmp << endl;
02059         if (hasP_) {
02060           tmp = orthman_->orthogError(*H_,*P_);
02061           os << " >> Error in H^H M P == 0 : " << tmp << endl;
02062         }
02063       }
02064       for (Array_size_type i=0; i<auxVecs_.size(); i++) {
02065         tmp = orthman_->orthogError(*H_,*auxVecs_[i]);
02066         os << " >> Error in H^H M Q[" << i << "] == 0 : " << tmp << endl;
02067       }
02068     }
02069     if (chk.checkMH && hasM_ && initialized_) {
02070       tmp = Utils::errorEquality(*H_, *MH_, MOp_);
02071       os << " >> Error in MH == M*H    : " << tmp << endl;
02072     }
02073 
02074     // R: this is not M-orthogonality, but standard euclidean orthogonality
02075     if (chk.checkR && initialized_) {
02076       Teuchos::SerialDenseMatrix<int,ScalarType> xTx(blockSize_,blockSize_);
02077       MVT::MvTransMv(ONE,*X_,*R_,xTx);
02078       tmp = xTx.normFrobenius();
02079       MVT::MvTransMv(ONE,*R_,*R_,xTx);
02080       double normR = xTx.normFrobenius();
02081       os << " >> RelError in X^H R == 0: " << tmp/normR << endl;
02082     }
02083 
02084     // Q
02085     if (chk.checkQ) {
02086       for (Array_size_type i=0; i<auxVecs_.size(); i++) {
02087         tmp = orthman_->orthonormError(*auxVecs_[i]);
02088         os << " >> Error in Q[" << i << "]^H M Q[" << i << "] == I : " << tmp << endl;
02089         for (Array_size_type j=i+1; j<auxVecs_.size(); j++) {
02090           tmp = orthman_->orthogError(*auxVecs_[i],*auxVecs_[j]);
02091           os << " >> Error in Q[" << i << "]^H M Q[" << j << "] == 0 : " << tmp << endl;
02092         }
02093       }
02094     }
02095 
02096     os << endl;
02097 
02098     return os.str();
02099   }
02100 
02101 
02103   // Print the current status of the solver
02104   template <class ScalarType, class MV, class OP>
02105   void 
02106   LOBPCG<ScalarType,MV,OP>::currentStatus(std::ostream &os) 
02107   {
02108     using std::endl;
02109 
02110     os.setf(std::ios::scientific, std::ios::floatfield);  
02111     os.precision(6);
02112     os <<endl;
02113     os <<"================================================================================" << endl;
02114     os << endl;
02115     os <<"                              LOBPCG Solver Status" << endl;
02116     os << endl;
02117     os <<"The solver is "<<(initialized_ ? "initialized." : "not initialized.") << endl;
02118     os <<"The number of iterations performed is " << iter_       << endl;
02119     os <<"The current block size is             " << blockSize_  << endl;
02120     os <<"The number of auxiliary vectors is    " << numAuxVecs_ << endl;
02121     os <<"The number of operations Op*x   is " << count_ApplyOp_   << endl;
02122     os <<"The number of operations M*x    is " << count_ApplyM_    << endl;
02123     os <<"The number of operations Prec*x is " << count_ApplyPrec_ << endl;
02124 
02125     os.setf(std::ios_base::right, std::ios_base::adjustfield);
02126 
02127     if (initialized_) {
02128       os << endl;
02129       os <<"CURRENT EIGENVALUE ESTIMATES             "<<endl;
02130       os << std::setw(20) << "Eigenvalue" 
02131          << std::setw(20) << "Residual(M)"
02132          << std::setw(20) << "Residual(2)"
02133          << endl;
02134       os <<"--------------------------------------------------------------------------------"<<endl;
02135       for (int i=0; i<blockSize_; i++) {
02136         os << std::setw(20) << theta_[i];
02137         if (Rnorms_current_) os << std::setw(20) << Rnorms_[i];
02138         else os << std::setw(20) << "not current";
02139         if (R2norms_current_) os << std::setw(20) << R2norms_[i];
02140         else os << std::setw(20) << "not current";
02141         os << endl;
02142       }
02143     }
02144     os <<"================================================================================" << endl;
02145     os << endl;
02146   }
02147 
02149   // are we initialized or not?
02150   template <class ScalarType, class MV, class OP>
02151   bool LOBPCG<ScalarType,MV,OP>::isInitialized() const { 
02152     return initialized_; 
02153   }
02154 
02155 
02157   // is P valid or not?
02158   template <class ScalarType, class MV, class OP>
02159   bool LOBPCG<ScalarType,MV,OP>::hasP() {
02160     return hasP_;
02161   }
02162   
02164   // is full orthogonalization enabled or not?
02165   template <class ScalarType, class MV, class OP>
02166   bool LOBPCG<ScalarType,MV,OP>::getFullOrtho() const { 
02167     return(fullOrtho_); 
02168   }
02169 
02170   
02172   // return the current auxilliary vectors
02173   template <class ScalarType, class MV, class OP>
02174   Teuchos::Array<Teuchos::RCP<const MV> > LOBPCG<ScalarType,MV,OP>::getAuxVecs() const {
02175     return auxVecs_;
02176   }
02177 
02179   // return the current block size
02180   template <class ScalarType, class MV, class OP>
02181   int LOBPCG<ScalarType,MV,OP>::getBlockSize() const {
02182     return(blockSize_); 
02183   }
02184 
02186   // return the current eigenproblem
02187   template <class ScalarType, class MV, class OP>
02188   const Eigenproblem<ScalarType,MV,OP>& LOBPCG<ScalarType,MV,OP>::getProblem() const { 
02189     return(*problem_); 
02190   }
02191 
02192   
02194   // return the max subspace dimension
02195   template <class ScalarType, class MV, class OP>
02196   int LOBPCG<ScalarType,MV,OP>::getMaxSubspaceDim() const {
02197     return 3*blockSize_;
02198   }
02199 
02201   // return the current subspace dimension
02202   template <class ScalarType, class MV, class OP>
02203   int LOBPCG<ScalarType,MV,OP>::getCurSubspaceDim() const {
02204     if (!initialized_) return 0;
02205     return nevLocal_;
02206   }
02207 
02208   
02210   // return the current ritz residual norms
02211   template <class ScalarType, class MV, class OP>
02212   std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> 
02213   LOBPCG<ScalarType,MV,OP>::getRitzRes2Norms() 
02214   {
02215     return this->getRes2Norms();
02216   }
02217 
02218   
02220   // return the current compression indices
02221   template <class ScalarType, class MV, class OP>
02222   std::vector<int> LOBPCG<ScalarType,MV,OP>::getRitzIndex() {
02223     std::vector<int> ret(nevLocal_,0);
02224     return ret;
02225   }
02226 
02227   
02229   // return the current ritz values
02230   template <class ScalarType, class MV, class OP>
02231   std::vector<Value<ScalarType> > LOBPCG<ScalarType,MV,OP>::getRitzValues() { 
02232     std::vector<Value<ScalarType> > ret(nevLocal_);
02233     for (int i=0; i<nevLocal_; i++) {
02234       ret[i].realpart = theta_[i];
02235       ret[i].imagpart = ZERO;
02236     }
02237     return ret;
02238   }
02239 
02241   // Set a new StatusTest for the solver.
02242   template <class ScalarType, class MV, class OP>
02243   void LOBPCG<ScalarType,MV,OP>::setStatusTest(Teuchos::RCP<StatusTest<ScalarType,MV,OP> > test) {
02244     TEUCHOS_TEST_FOR_EXCEPTION(test == Teuchos::null,std::invalid_argument,
02245         "Anasazi::LOBPCG::setStatusTest() was passed a null StatusTest.");
02246     tester_ = test;
02247   }
02248 
02250   // Get the current StatusTest used by the solver.
02251   template <class ScalarType, class MV, class OP>
02252   Teuchos::RCP<StatusTest<ScalarType,MV,OP> > LOBPCG<ScalarType,MV,OP>::getStatusTest() const {
02253     return tester_;
02254   }
02255   
02257   // return the current ritz vectors
02258   template <class ScalarType, class MV, class OP>
02259   Teuchos::RCP<const MV> LOBPCG<ScalarType,MV,OP>::getRitzVectors() {
02260     return X_;
02261   }
02262 
02263   
02265   // reset the iteration counter
02266   template <class ScalarType, class MV, class OP>
02267   void LOBPCG<ScalarType,MV,OP>::resetNumIters() {
02268     iter_=0; 
02269   }
02270 
02271   
02273   // return the number of iterations
02274   template <class ScalarType, class MV, class OP>
02275   int LOBPCG<ScalarType,MV,OP>::getNumIters() const { 
02276     return(iter_); 
02277   }
02278 
02279   
02281   // return the state
02282   template <class ScalarType, class MV, class OP>
02283   LOBPCGState<ScalarType,MV> LOBPCG<ScalarType,MV,OP>::getState() const {
02284     LOBPCGState<ScalarType,MV> state;
02285     state.V = V_;
02286     state.KV = KV_;
02287     state.X = X_;
02288     state.KX = KX_;
02289     state.P = P_;
02290     state.KP = KP_;
02291     state.H = H_;
02292     state.KH = KH_;
02293     state.R = R_;
02294     state.T = Teuchos::rcp(new std::vector<MagnitudeType>(theta_));
02295     if (hasM_) {
02296       state.MV = MV_;
02297       state.MX = MX_;
02298       state.MP = MP_;
02299       state.MH = MH_;
02300     }
02301     else {
02302       state.MX = Teuchos::null;
02303       state.MP = Teuchos::null;
02304       state.MH = Teuchos::null;
02305     }
02306     return state;
02307   }
02308 
02309 } // end Anasazi namespace
02310 
02311 #endif // ANASAZI_LOBPCG_HPP
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends