Anasazi Version of the Day
AnasaziLOBPCG.hpp
Go to the documentation of this file.
00001 // @HEADER
00002 // ***********************************************************************
00003 //
00004 //                 Anasazi: Block Eigensolvers Package
00005 //                 Copyright (2004) Sandia Corporation
00006 //
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 //
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00025 //
00026 // ***********************************************************************
00027 // @HEADER
00028 
00029 
00034 /*
00035     LOBPCG contains local storage of up to 10*blockSize_ vectors, representing 10 entities
00036       X,H,P,R
00037       KX,KH,KP  (product of K and the above)
00038       MX,MH,MP  (product of M and the above, not allocated if we don't have an M matrix)
00039     If full orthogonalization is enabled, one extra multivector of blockSize_ vectors is required to 
00040     compute the local update of X and P.
00041     
00042     A solver is bound to an eigenproblem at declaration.
00043     Other solver parameters (e.g., block size, auxiliary vectors) can be changed dynamically.
00044     
00045     The orthogonalization manager is used to project away from the auxiliary vectors.
00046     If full orthogonalization is enabled, the orthogonalization manager is also used to construct an M orthonormal basis.
00047     The orthogonalization manager is subclass of MatOrthoManager, which LOBPCG assumes to be defined by the M inner product.
00048     LOBPCG will not work correctly if the orthomanager uses a different inner product.
00049  */
00050 
00051 
00052 #ifndef ANASAZI_LOBPCG_HPP
00053 #define ANASAZI_LOBPCG_HPP
00054 
00055 #include "AnasaziTypes.hpp"
00056 
00057 #include "AnasaziEigensolver.hpp"
00058 #include "AnasaziMultiVecTraits.hpp"
00059 #include "AnasaziOperatorTraits.hpp"
00060 #include "Teuchos_ScalarTraits.hpp"
00061 
00062 #include "AnasaziMatOrthoManager.hpp"
00063 #include "AnasaziSolverUtils.hpp"
00064 
00065 #include "Teuchos_LAPACK.hpp"
00066 #include "Teuchos_BLAS.hpp"
00067 #include "Teuchos_SerialDenseMatrix.hpp"
00068 #include "Teuchos_ParameterList.hpp"
00069 #include "Teuchos_TimeMonitor.hpp"
00070 
00091 namespace Anasazi {
00092 
00094 
00095 
00100   template <class ScalarType, class MultiVector>
00101   struct LOBPCGState {
00103     Teuchos::RCP<const MultiVector> V; 
00105     Teuchos::RCP<const MultiVector> KV; 
00107     Teuchos::RCP<const MultiVector> MV;
00108 
00110     Teuchos::RCP<const MultiVector> X; 
00112     Teuchos::RCP<const MultiVector> KX; 
00114     Teuchos::RCP<const MultiVector> MX;
00115 
00117     Teuchos::RCP<const MultiVector> P; 
00119     Teuchos::RCP<const MultiVector> KP; 
00121     Teuchos::RCP<const MultiVector> MP;
00122 
00127     Teuchos::RCP<const MultiVector> H; 
00129     Teuchos::RCP<const MultiVector> KH; 
00131     Teuchos::RCP<const MultiVector> MH;
00132 
00134     Teuchos::RCP<const MultiVector> R;
00135 
00137     Teuchos::RCP<const std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> > T;
00138 
00139     LOBPCGState() : 
00140       V(Teuchos::null),KV(Teuchos::null),MV(Teuchos::null),
00141       X(Teuchos::null),KX(Teuchos::null),MX(Teuchos::null),
00142       P(Teuchos::null),KP(Teuchos::null),MP(Teuchos::null),
00143       H(Teuchos::null),KH(Teuchos::null),MH(Teuchos::null),
00144       R(Teuchos::null),T(Teuchos::null) {};
00145   };
00146 
00148 
00150 
00151 
00165   class LOBPCGRitzFailure : public AnasaziError {public:
00166     LOBPCGRitzFailure(const std::string& what_arg) : AnasaziError(what_arg)
00167     {}};
00168 
00181   class LOBPCGInitFailure : public AnasaziError {public:
00182     LOBPCGInitFailure(const std::string& what_arg) : AnasaziError(what_arg)
00183     {}};
00184 
00201   class LOBPCGOrthoFailure : public AnasaziError {public:
00202     LOBPCGOrthoFailure(const std::string& what_arg) : AnasaziError(what_arg)
00203     {}};
00204 
00206 
00207 
00208   template <class ScalarType, class MV, class OP>
00209   class LOBPCG : public Eigensolver<ScalarType,MV,OP> { 
00210   public:
00211     
00213 
00214     
00222     LOBPCG( const Teuchos::RCP<Eigenproblem<ScalarType,MV,OP> > &problem, 
00223             const Teuchos::RCP<SortManager<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> > &sorter,
00224             const Teuchos::RCP<OutputManager<ScalarType> > &printer,
00225             const Teuchos::RCP<StatusTest<ScalarType,MV,OP> > &tester,
00226             const Teuchos::RCP<MatOrthoManager<ScalarType,MV,OP> > &ortho,
00227             Teuchos::ParameterList &params 
00228           );
00229     
00231     virtual ~LOBPCG() {};
00232 
00234 
00236 
00237 
00264     void iterate();
00265 
00291     void initialize(LOBPCGState<ScalarType,MV> newstate);
00292 
00296     void initialize();
00297 
00317     bool isInitialized() const;
00318 
00329     LOBPCGState<ScalarType,MV> getState() const;
00330 
00332 
00334 
00335 
00337     int getNumIters() const;
00338 
00340     void resetNumIters();
00341 
00349     Teuchos::RCP<const MV> getRitzVectors();
00350 
00356     std::vector<Value<ScalarType> > getRitzValues();
00357 
00365     std::vector<int> getRitzIndex();
00366 
00367 
00373     std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> getResNorms();
00374 
00375 
00381     std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> getRes2Norms();
00382 
00383 
00391     std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> getRitzRes2Norms();
00392 
00393 
00402     int getCurSubspaceDim() const;
00403 
00407     int getMaxSubspaceDim() const;
00408 
00410 
00412 
00413 
00415     void setStatusTest(Teuchos::RCP<StatusTest<ScalarType,MV,OP> > test);
00416 
00418     Teuchos::RCP<StatusTest<ScalarType,MV,OP> > getStatusTest() const;
00419 
00421     const Eigenproblem<ScalarType,MV,OP>& getProblem() const;
00422 
00423 
00432     void setBlockSize(int blockSize);
00433 
00434 
00436     int getBlockSize() const;
00437 
00438 
00450     void setAuxVecs(const Teuchos::Array<Teuchos::RCP<const MV> > &auxvecs);
00451 
00453     Teuchos::Array<Teuchos::RCP<const MV> > getAuxVecs() const;
00454 
00456 
00458 
00459 
00465     void setFullOrtho(bool fullOrtho);
00466 
00468     bool getFullOrtho() const;
00469     
00471     bool hasP();
00472 
00474     
00476 
00477 
00479     void currentStatus(std::ostream &os);
00480 
00482 
00483   private:
00484     //
00485     //
00486     //
00487     void setupViews();
00488     //
00489     // Convenience typedefs
00490     //
00491     typedef SolverUtils<ScalarType,MV,OP> Utils;
00492     typedef MultiVecTraits<ScalarType,MV> MVT;
00493     typedef MultiVecTraitsExt<ScalarType,MV> MVText;
00494     typedef OperatorTraits<ScalarType,MV,OP> OPT;
00495     typedef Teuchos::ScalarTraits<ScalarType> SCT;
00496     typedef typename SCT::magnitudeType MagnitudeType;
00497     const MagnitudeType ONE;  
00498     const MagnitudeType ZERO; 
00499     const MagnitudeType NANVAL;
00500     //
00501     // Internal structs
00502     //
00503     struct CheckList {
00504       bool checkX, checkMX, checkKX;
00505       bool checkH, checkMH;
00506       bool checkP, checkMP, checkKP;
00507       bool checkR, checkQ;
00508       CheckList() : checkX(false),checkMX(false),checkKX(false),
00509                     checkH(false),checkMH(false),
00510                     checkP(false),checkMP(false),checkKP(false),
00511                     checkR(false),checkQ(false) {};
00512     };
00513     //
00514     // Internal methods
00515     //
00516     std::string accuracyCheck(const CheckList &chk, const std::string &where) const;
00517     //
00518     // Classes inputed through constructor that define the eigenproblem to be solved.
00519     //
00520     const Teuchos::RCP<Eigenproblem<ScalarType,MV,OP> >     problem_;
00521     const Teuchos::RCP<SortManager<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> > sm_;
00522     const Teuchos::RCP<OutputManager<ScalarType> >          om_;
00523     Teuchos::RCP<StatusTest<ScalarType,MV,OP> >       tester_;
00524     const Teuchos::RCP<MatOrthoManager<ScalarType,MV,OP> >  orthman_;
00525     //
00526     // Information obtained from the eigenproblem
00527     //
00528     Teuchos::RCP<const OP> Op_;
00529     Teuchos::RCP<const OP> MOp_;
00530     Teuchos::RCP<const OP> Prec_;
00531     bool hasM_;
00532     //
00533     // Internal timers
00534     //
00535     Teuchos::RCP<Teuchos::Time> timerOp_, timerMOp_, timerPrec_,
00536                                 timerSort_, 
00537                                 timerLocalProj_, timerDS_,
00538                                 timerLocalUpdate_, timerCompRes_,
00539                                 timerOrtho_, timerInit_;
00540     //
00541     // Counters
00542     //
00543     // Number of operator applications
00544     int count_ApplyOp_, count_ApplyM_, count_ApplyPrec_;
00545 
00546     //
00547     // Algorithmic parameters.
00548     //
00549     // blockSize_ is the solver block size
00550     int blockSize_;
00551     //
00552     // fullOrtho_ dictates whether the orthogonalization procedures specified by Hetmaniuk and Lehoucq should
00553     // be activated (see citations at the top of this file)
00554     bool fullOrtho_;
00555 
00556     //
00557     // Current solver state
00558     //
00559     // initialized_ specifies that the basis vectors have been initialized and the iterate() routine
00560     // is capable of running; _initialize is controlled  by the initialize() member method
00561     // For the implications of the state of initialized_, please see documentation for initialize()
00562     bool initialized_;
00563     //
00564     // nevLocal_ reflects how much of the current basis is valid (0 <= nevLocal_ <= 3*blockSize_)
00565     // this tells us how many of the values in theta_ are valid Ritz values
00566     int nevLocal_;
00567     //
00568     // hasP_ tells us whether there is valid data in P (and KP,MP)
00569     bool hasP_;
00570     //
00571     // State Multivecs
00572     // V_, KV_ MV_  and  R_ are primary pointers to allocated multivectors
00573     Teuchos::RCP<MV> V_, KV_, MV_, R_;
00574     // the rest are multivector views into V_, KV_ and MV_
00575     Teuchos::RCP<MV> X_, KX_, MX_,
00576                      H_, KH_, MH_,
00577                      P_, KP_, MP_;
00578 
00579     //
00580     // if fullOrtho_ == true, then we must produce the following on every iteration:
00581     // [newX newP] = [X H P] [CX;CP]
00582     // the structure of [CX;CP] when using full orthogonalization does not allow us to 
00583     // do this in situ, and R_ does not have enough storage for newX and newP. therefore, 
00584     // we must allocate additional storage for this.
00585     // otherwise, when not using full orthogonalization, the structure
00586     // [newX newP] = [X H P] [CX1  0 ]
00587     //                       [CX2 CP2]  allows us to work using only R as work space
00588     //                       [CX3 CP3] 
00589     Teuchos::RCP<MV> tmpmvec_;        
00590     // 
00591     // auxiliary vectors
00592     Teuchos::Array<Teuchos::RCP<const MV> > auxVecs_;
00593     int numAuxVecs_;
00594     //
00595     // Number of iterations that have been performed.
00596     int iter_;
00597     // 
00598     // Current eigenvalues, residual norms
00599     std::vector<MagnitudeType> theta_, Rnorms_, R2norms_;
00600     // 
00601     // are the residual norms current with the residual?
00602     bool Rnorms_current_, R2norms_current_;
00603 
00604   };
00605 
00606 
00607 
00608 
00610   // Constructor
00611   template <class ScalarType, class MV, class OP>
00612   LOBPCG<ScalarType,MV,OP>::LOBPCG(
00613         const Teuchos::RCP<Eigenproblem<ScalarType,MV,OP> > &problem, 
00614         const Teuchos::RCP<SortManager<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> > &sorter,
00615         const Teuchos::RCP<OutputManager<ScalarType> > &printer,
00616         const Teuchos::RCP<StatusTest<ScalarType,MV,OP> > &tester,
00617         const Teuchos::RCP<MatOrthoManager<ScalarType,MV,OP> > &ortho,
00618         Teuchos::ParameterList &params
00619         ) :
00620     ONE(Teuchos::ScalarTraits<MagnitudeType>::one()),
00621     ZERO(Teuchos::ScalarTraits<MagnitudeType>::zero()),
00622     NANVAL(Teuchos::ScalarTraits<MagnitudeType>::nan()),
00623     // problem, tools
00624     problem_(problem), 
00625     sm_(sorter),
00626     om_(printer),
00627     tester_(tester),
00628     orthman_(ortho),
00629     // timers, counters
00630 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
00631     timerOp_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Operation Op*x")),
00632     timerMOp_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Operation M*x")),
00633     timerPrec_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Operation Prec*x")),
00634     timerSort_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Sorting eigenvalues")),
00635     timerLocalProj_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Local projection")),
00636     timerDS_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Direct solve")),
00637     timerLocalUpdate_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Local update")),
00638     timerCompRes_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Computing residuals")),
00639     timerOrtho_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Orthogonalization")),
00640     timerInit_(Teuchos::TimeMonitor::getNewTimer("Anasazi: LOBPCG::Initialization")),
00641 #endif
00642     count_ApplyOp_(0),
00643     count_ApplyM_(0),
00644     count_ApplyPrec_(0),
00645     // internal data
00646     blockSize_(0),
00647     fullOrtho_(params.get("Full Ortho", true)),
00648     initialized_(false),
00649     nevLocal_(0),
00650     hasP_(false),
00651     auxVecs_( Teuchos::Array<Teuchos::RCP<const MV> >(0) ), 
00652     numAuxVecs_(0),
00653     iter_(0),
00654     Rnorms_current_(false),
00655     R2norms_current_(false)
00656   {     
00657     TEUCHOS_TEST_FOR_EXCEPTION(problem_ == Teuchos::null,std::invalid_argument,
00658                        "Anasazi::LOBPCG::constructor: user passed null problem pointer.");
00659     TEUCHOS_TEST_FOR_EXCEPTION(sm_ == Teuchos::null,std::invalid_argument,
00660                        "Anasazi::LOBPCG::constructor: user passed null sort manager pointer.");
00661     TEUCHOS_TEST_FOR_EXCEPTION(om_ == Teuchos::null,std::invalid_argument,
00662                        "Anasazi::LOBPCG::constructor: user passed null output manager pointer.");
00663     TEUCHOS_TEST_FOR_EXCEPTION(tester_ == Teuchos::null,std::invalid_argument,
00664                        "Anasazi::LOBPCG::constructor: user passed null status test pointer.");
00665     TEUCHOS_TEST_FOR_EXCEPTION(orthman_ == Teuchos::null,std::invalid_argument,
00666                        "Anasazi::LOBPCG::constructor: user passed null orthogonalization manager pointer.");
00667     TEUCHOS_TEST_FOR_EXCEPTION(problem_->isProblemSet() == false, std::invalid_argument,
00668                        "Anasazi::LOBPCG::constructor: problem is not set.");
00669     TEUCHOS_TEST_FOR_EXCEPTION(problem_->isHermitian() == false, std::invalid_argument,
00670                        "Anasazi::LOBPCG::constructor: problem is not Hermitian; LOBPCG requires Hermitian problem.");
00671 
00672     // get the problem operators
00673     Op_   = problem_->getOperator();
00674     TEUCHOS_TEST_FOR_EXCEPTION(Op_ == Teuchos::null, std::invalid_argument,
00675                        "Anasazi::LOBPCG::constructor: problem provides no operator.");
00676     MOp_  = problem_->getM();
00677     Prec_ = problem_->getPrec();
00678     hasM_ = (MOp_ != Teuchos::null);
00679 
00680     // set the block size and allocate data
00681     int bs = params.get("Block Size", problem_->getNEV());
00682     setBlockSize(bs);
00683   }
00684 
00685 
00687   // Set the block size and make necessary adjustments.
00688   template <class ScalarType, class MV, class OP>
00689   void LOBPCG<ScalarType,MV,OP>::setBlockSize (int newBS) 
00690   {
00691     // time spent here counts towards timerInit_
00692 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
00693     Teuchos::TimeMonitor lcltimer( *timerInit_ );
00694 #endif
00695 
00696     // This routine only allocates space; it doesn't not perform any computation
00697     // if size is decreased, take the first newBS vectors of all and leave state as is
00698     // otherwise, grow/allocate space and set solver to unitialized
00699 
00700     Teuchos::RCP<const MV> tmp;
00701     // grab some Multivector to Clone
00702     // in practice, getInitVec() should always provide this, but it is possible to use a 
00703     // Eigenproblem with nothing in getInitVec() by manually initializing with initialize(); 
00704     // in case of that strange scenario, we will try to Clone from R_ because it is smaller 
00705     // than V_, and we don't want to keep V_ around longer than necessary
00706     if (blockSize_ > 0) {
00707       tmp = R_;
00708     }
00709     else {
00710       tmp = problem_->getInitVec();
00711       TEUCHOS_TEST_FOR_EXCEPTION(tmp == Teuchos::null,std::logic_error,
00712                          "Anasazi::LOBPCG::setBlockSize(): eigenproblem did not specify initial vectors to clone from.");
00713     }
00714     
00715     TEUCHOS_TEST_FOR_EXCEPTION(newBS <= 0 || static_cast<ptrdiff_t>(newBS) > MVText::GetGlobalLength(*tmp), 
00716                          std::invalid_argument, "Anasazi::LOBPCG::setBlockSize(): block size must be strictly positive.");
00717     if (newBS == blockSize_) {
00718       // do nothing
00719       return;
00720     }
00721     else if (newBS < blockSize_ && initialized_) {
00722       //
00723       // shrink vectors
00724 
00725       // release views so we can modify the bases
00726       X_ = Teuchos::null;
00727       KX_ = Teuchos::null;
00728       MX_ = Teuchos::null;
00729       H_ = Teuchos::null;
00730       KH_ = Teuchos::null;
00731       MH_ = Teuchos::null;
00732       P_ = Teuchos::null;
00733       KP_ = Teuchos::null;
00734       MP_ = Teuchos::null;
00735 
00736       // make new indices vectors
00737       std::vector<int> newind(newBS), oldind(newBS);
00738       for (int i=0; i<newBS; i++) {
00739         newind[i] = i;
00740         oldind[i] = i;
00741       }
00742 
00743       Teuchos::RCP<MV> newV, newMV, newKV, newR;
00744       Teuchos::RCP<const MV> src;
00745       // allocate R and newV
00746       newR = MVT::Clone(*tmp,newBS);
00747       newV = MVT::Clone(*tmp,newBS*3);
00748       newKV = MVT::Clone(*tmp,newBS*3);
00749       if (hasM_) {
00750         newMV = MVT::Clone(*tmp,newBS*3);
00751       }
00752 
00753       //
00754       // if we are initialized, we want to pull the data from V_ into newV:
00755       //           bs  |  bs  | bs 
00756       // newV  = [newX | **** |newP ]
00757       // newKV = [newKX| **** |newKP]
00758       // newMV = [newMX| **** |newMP]
00759       // where 
00760       //          oldbs   |  oldbs  |   oldbs   
00761       //  V_ = [newX  *** | ******* | newP  ***]
00762       // KV_ = [newKX *** | ******* | newKP ***]
00763       // MV_ = [newMX *** | ******* | newMP ***]
00764       //
00765       // we don't care to copy the data corresponding to H
00766       // we will not copy the M data if !hasM_, because it doesn't exist
00767       //
00768 
00769       // these are shrink operations which preserve their data
00770       theta_.resize(3*newBS);
00771       Rnorms_.resize(newBS);
00772       R2norms_.resize(newBS);
00773 
00774       // copy residual vectors: oldind,newind currently contains [0,...,newBS-1]
00775       src = MVT::CloneView(*R_,newind);
00776       MVT::SetBlock(*src,newind,*newR);
00777       // free old memory and point to new memory
00778       R_ = newR;
00779 
00780       // copy in order: newX newKX newMX, then newP newKP newMP
00781       // for  X: [0,bs-1] <-- [0,bs-1] 
00782       src = MVT::CloneView(*V_,oldind);
00783       MVT::SetBlock(*src,newind,*newV);
00784       src = MVT::CloneView(*KV_,oldind);
00785       MVT::SetBlock(*src,newind,*newKV);
00786       if (hasM_) {
00787         src = MVT::CloneView(*MV_,oldind);
00788         MVT::SetBlock(*src,newind,*newMV);
00789       }
00790       // for  P: [2*bs, 3*bs-1] <-- [2*oldbs, 2*oldbs+bs-1] 
00791       for (int i=0; i<newBS; i++) {
00792         newind[i] += 2*newBS;
00793         oldind[i] += 2*blockSize_;
00794       }
00795       src = MVT::CloneView(*V_,oldind);
00796       MVT::SetBlock(*src,newind,*newV);
00797       src = MVT::CloneView(*KV_,oldind);
00798       MVT::SetBlock(*src,newind,*newKV);
00799       if (hasM_) {
00800         src = MVT::CloneView(*MV_,oldind);
00801         MVT::SetBlock(*src,newind,*newMV);
00802       }
00803 
00804       // release temp view
00805       src = Teuchos::null;
00806 
00807       // release old allocations and point at new memory
00808       V_ = newV;
00809       KV_ = newKV;
00810       if (hasM_) {
00811         MV_ = newMV;
00812       }
00813       else {
00814         MV_ = V_;
00815       }
00816     }
00817     else {  
00818       // newBS > blockSize_  or  not initialized
00819       // this is also the scenario for our initial call to setBlockSize(), in the constructor
00820       initialized_ = false;
00821       hasP_ = false;
00822 
00823       // release views
00824       X_ = Teuchos::null;
00825       KX_ = Teuchos::null;
00826       MX_ = Teuchos::null;
00827       H_ = Teuchos::null;
00828       KH_ = Teuchos::null;
00829       MH_ = Teuchos::null;
00830       P_ = Teuchos::null;
00831       KP_ = Teuchos::null;
00832       MP_ = Teuchos::null;
00833 
00834       // free allocated storage
00835       R_ = Teuchos::null;
00836       V_ = Teuchos::null;
00837 
00838       // allocate scalar vectors
00839       theta_.resize(3*newBS,NANVAL);
00840       Rnorms_.resize(newBS,NANVAL);
00841       R2norms_.resize(newBS,NANVAL);
00842       
00843       // clone multivectors off of tmp
00844       R_ = MVT::Clone(*tmp,newBS);
00845       V_ = MVT::Clone(*tmp,newBS*3);
00846       KV_ = MVT::Clone(*tmp,newBS*3);
00847       if (hasM_) {
00848         MV_ = MVT::Clone(*tmp,newBS*3);
00849       }
00850       else {
00851         MV_ = V_;
00852       }
00853     }
00854 
00855     // allocate tmp space
00856     tmpmvec_ = Teuchos::null;
00857     if (fullOrtho_) {
00858       tmpmvec_ = MVT::Clone(*tmp,newBS);
00859     }
00860 
00861     // set new block size
00862     blockSize_ = newBS;
00863 
00864     // setup new views
00865     setupViews();
00866   }
00867 
00868 
00870   // Setup views into V,KV,MV
00871   template <class ScalarType, class MV, class OP>
00872   void LOBPCG<ScalarType,MV,OP>::setupViews() 
00873   {
00874     std::vector<int> ind(blockSize_);
00875 
00876     for (int i=0; i<blockSize_; i++) {
00877       ind[i] = i;
00878     }
00879     X_  = MVT::CloneViewNonConst(*V_,ind);
00880     KX_ = MVT::CloneViewNonConst(*KV_,ind);
00881     if (hasM_) {
00882       MX_ = MVT::CloneViewNonConst(*MV_,ind);
00883     }
00884     else {
00885       MX_ = X_;
00886     }
00887 
00888     for (int i=0; i<blockSize_; i++) {
00889       ind[i] += blockSize_;
00890     }
00891     H_  = MVT::CloneViewNonConst(*V_,ind);
00892     KH_ = MVT::CloneViewNonConst(*KV_,ind);
00893     if (hasM_) {
00894       MH_ = MVT::CloneViewNonConst(*MV_,ind);
00895     }
00896     else {
00897       MH_ = H_;
00898     }
00899 
00900     for (int i=0; i<blockSize_; i++) {
00901       ind[i] += blockSize_;
00902     }
00903     P_  = MVT::CloneViewNonConst(*V_,ind);
00904     KP_ = MVT::CloneViewNonConst(*KV_,ind);
00905     if (hasM_) {
00906       MP_ = MVT::CloneViewNonConst(*MV_,ind);
00907     }
00908     else {
00909       MP_ = P_;
00910     }
00911   }
00912 
00913 
00915   // Set the auxiliary vectors
00916   template <class ScalarType, class MV, class OP>
00917   void LOBPCG<ScalarType,MV,OP>::setAuxVecs(const Teuchos::Array<Teuchos::RCP<const MV> > &auxvecs) {
00918     typedef typename Teuchos::Array<Teuchos::RCP<const MV> >::iterator tarcpmv;
00919 
00920     // set new auxiliary vectors
00921     auxVecs_ = auxvecs;
00922     
00923     numAuxVecs_ = 0;
00924     for (tarcpmv i=auxVecs_.begin(); i != auxVecs_.end(); i++) {
00925       numAuxVecs_ += MVT::GetNumberVecs(**i);
00926     }
00927     
00928     // If the solver has been initialized, X and P are not necessarily orthogonal to new auxiliary vectors
00929     if (numAuxVecs_ > 0 && initialized_) {
00930       initialized_ = false;
00931       hasP_ = false;
00932     }
00933 
00934     if (om_->isVerbosity( Debug ) ) {
00935       // Check almost everything here
00936       CheckList chk;
00937       chk.checkQ = true;
00938       om_->print( Debug, accuracyCheck(chk, ": in setAuxVecs()") );
00939     }
00940   }
00941 
00942 
00944   /* Initialize the state of the solver
00945    * 
00946    * POST-CONDITIONS:
00947    *
00948    * initialized_ == true
00949    * X is orthonormal, orthogonal to auxVecs_
00950    * KX = Op*X
00951    * MX = M*X if hasM_
00952    * theta_ contains Ritz values of X
00953    * R = KX - MX*diag(theta_)
00954    * if hasP() == true,
00955    *   P orthogonal to auxVecs_
00956    *   if fullOrtho_ == true,
00957    *     P orthonormal and orthogonal to X
00958    *   KP = Op*P
00959    *   MP = M*P
00960    */
00961   template <class ScalarType, class MV, class OP>
00962   void LOBPCG<ScalarType,MV,OP>::initialize(LOBPCGState<ScalarType,MV> newstate)
00963   {
00964     // NOTE: memory has been allocated by setBlockSize(). Use SetBlock below; do not Clone
00965     // NOTE: Overall time spent in this routine is counted to timerInit_; portions will also be counted towards other primitives
00966 
00967 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
00968     Teuchos::TimeMonitor inittimer( *timerInit_ );
00969 #endif
00970 
00971     std::vector<int> bsind(blockSize_);
00972     for (int i=0; i<blockSize_; i++) bsind[i] = i;
00973 
00974     // in LOBPCG, X (the subspace iterate) is primary
00975     // the order of dependence follows like so.
00976     // --init->                 X
00977     //    --op apply->          MX,KX
00978     //       --ritz analysis->  theta
00979     //          --optional->    P,MP,KP
00980     // 
00981     // if the user specifies all data for a level, we will accept it.
00982     // otherwise, we will generate the whole level, and all subsequent levels.
00983     //
00984     // the data members are ordered based on dependence, and the levels are
00985     // partitioned according to the amount of work required to produce the
00986     // items in a level.
00987     //
00988     // inconsitent multivectors widths and lengths will not be tolerated, and
00989     // will be treated with exceptions.
00990 
00991     // set up X, KX, MX: get them from "state" if user specified them
00992 
00993     //----------------------------------------
00994     // set up X, MX, KX
00995     //----------------------------------------
00996     if (newstate.X != Teuchos::null) {
00997       TEUCHOS_TEST_FOR_EXCEPTION( MVText::GetGlobalLength(*newstate.X) != MVText::GetGlobalLength(*X_),
00998                           std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.X not correct." );
00999       // newstate.X must have blockSize_ vectors; any more will be ignored
01000       TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.X) < blockSize_,
01001                           std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.X must have at least block size vectors.");
01002 
01003       // put X data in X_
01004       MVT::SetBlock(*newstate.X,bsind,*X_);
01005 
01006       // put MX data in MX_
01007       if (hasM_) {
01008         if (newstate.MX != Teuchos::null) {
01009           TEUCHOS_TEST_FOR_EXCEPTION( MVText::GetGlobalLength(*newstate.MX) != MVText::GetGlobalLength(*MX_),
01010                               std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.MX not correct." );
01011           // newstate.MX must have blockSize_ vectors; any more will be ignored
01012           TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.MX) < blockSize_,
01013                               std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.MX must have at least block size vectors.");
01014           MVT::SetBlock(*newstate.MX,bsind,*MX_);
01015         }
01016         else {
01017           // user didn't specify MX, compute it
01018           {
01019 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
01020             Teuchos::TimeMonitor lcltimer( *timerMOp_ );
01021 #endif
01022             OPT::Apply(*MOp_,*X_,*MX_);
01023             count_ApplyM_ += blockSize_;
01024           }
01025           // we generated MX; we will generate R as well
01026           newstate.R = Teuchos::null;
01027         }
01028       }
01029   
01030       // put data in KX
01031       if (newstate.KX != Teuchos::null) {
01032         TEUCHOS_TEST_FOR_EXCEPTION( MVText::GetGlobalLength(*newstate.KX) != MVText::GetGlobalLength(*KX_),
01033                             std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.KX not correct." );
01034         // newstate.KX must have blockSize_ vectors; any more will be ignored
01035         TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.KX) < blockSize_,
01036                             std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.KX must have at least block size vectors.");
01037         MVT::SetBlock(*newstate.KX,bsind,*KX_);
01038       }
01039       else {
01040         // user didn't specify KX, compute it
01041         {
01042 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
01043           Teuchos::TimeMonitor lcltimer( *timerOp_ );
01044 #endif
01045           OPT::Apply(*Op_,*X_,*KX_);
01046           count_ApplyOp_ += blockSize_;
01047         }
01048         // we generated KX; we will generate R as well
01049         newstate.R = Teuchos::null;
01050       }
01051     }
01052     else {
01053       // user did not specify X
01054       // we will initialize X, compute KX and MX, and compute R
01055       //
01056       // clear state so we won't use any data from it below
01057       newstate.P  = Teuchos::null;
01058       newstate.KP = Teuchos::null;
01059       newstate.MP = Teuchos::null;
01060       newstate.R  = Teuchos::null;
01061       newstate.T  = Teuchos::null;
01062 
01063       // generate a basis and projectAndNormalize
01064       Teuchos::RCP<const MV> ivec = problem_->getInitVec();
01065       TEUCHOS_TEST_FOR_EXCEPTION(ivec == Teuchos::null,std::logic_error,
01066                          "Anasazi::LOBPCG::initialize(): Eigenproblem did not specify initial vectors to clone from.");
01067 
01068       int initSize = MVT::GetNumberVecs(*ivec);
01069       if (initSize > blockSize_) {
01070         // we need only the first blockSize_ vectors from ivec; get a view of them
01071         initSize = blockSize_;
01072         std::vector<int> ind(blockSize_);
01073         for (int i=0; i<blockSize_; i++) ind[i] = i;
01074         ivec = MVT::CloneView(*ivec,ind);
01075       }
01076 
01077       // assign ivec to first part of X_ 
01078       if (initSize > 0) {
01079         std::vector<int> ind(initSize);
01080         for (int i=0; i<initSize; i++) ind[i] = i;
01081         MVT::SetBlock(*ivec,ind,*X_);
01082       }
01083       // fill the rest of X_ with random
01084       if (blockSize_ > initSize) {
01085         std::vector<int> ind(blockSize_ - initSize);
01086         for (int i=0; i<blockSize_ - initSize; i++) ind[i] = initSize + i;
01087         Teuchos::RCP<MV> rX = MVT::CloneViewNonConst(*X_,ind);
01088         MVT::MvRandom(*rX);
01089         rX = Teuchos::null;
01090       }
01091 
01092       // put data in MX
01093       if (hasM_) {
01094 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
01095         Teuchos::TimeMonitor lcltimer( *timerMOp_ );
01096 #endif
01097         OPT::Apply(*MOp_,*X_,*MX_);
01098         count_ApplyM_ += blockSize_;
01099       }
01100   
01101       // remove auxVecs from X_ and normalize it
01102       if (numAuxVecs_ > 0) {
01103 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
01104         Teuchos::TimeMonitor lcltimer( *timerOrtho_ );
01105 #endif
01106         Teuchos::Array<Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> > > dummy;
01107         int rank = orthman_->projectAndNormalizeMat(*X_,auxVecs_,dummy,Teuchos::null,MX_);
01108         TEUCHOS_TEST_FOR_EXCEPTION(rank != blockSize_, LOBPCGInitFailure,
01109                            "Anasazi::LOBPCG::initialize(): Couldn't generate initial basis of full rank.");
01110       }
01111       else {
01112 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
01113         Teuchos::TimeMonitor lcltimer( *timerOrtho_ );
01114 #endif
01115         int rank = orthman_->normalizeMat(*X_,Teuchos::null,MX_);
01116         TEUCHOS_TEST_FOR_EXCEPTION(rank != blockSize_, LOBPCGInitFailure,
01117                            "Anasazi::LOBPCG::initialize(): Couldn't generate initial basis of full rank.");
01118       }
01119 
01120       // put data in KX
01121       {
01122 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
01123         Teuchos::TimeMonitor lcltimer( *timerOp_ );
01124 #endif
01125         OPT::Apply(*Op_,*X_,*KX_);
01126         count_ApplyOp_ += blockSize_;
01127       }
01128     } // end if (newstate.X != Teuchos::null)
01129 
01130 
01131     //----------------------------------------
01132     // set up Ritz values
01133     //----------------------------------------
01134     theta_.resize(3*blockSize_,NANVAL);
01135     if (newstate.T != Teuchos::null) {
01136       TEUCHOS_TEST_FOR_EXCEPTION( (signed int)(newstate.T->size()) < blockSize_,
01137                           std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.T must contain at least block size Ritz values.");
01138       for (int i=0; i<blockSize_; i++) {
01139         theta_[i] = (*newstate.T)[i];
01140       }
01141       nevLocal_ = blockSize_;
01142     }
01143     else {
01144       // get ritz vecs/vals
01145       Teuchos::SerialDenseMatrix<int,ScalarType> KK(blockSize_,blockSize_),
01146                                                  MM(blockSize_,blockSize_),
01147                                                   S(blockSize_,blockSize_);
01148       {
01149 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
01150         Teuchos::TimeMonitor lcltimer( *timerLocalProj_ );
01151 #endif
01152         // project K
01153         MVT::MvTransMv(ONE,*X_,*KX_,KK);
01154         // project M
01155         MVT::MvTransMv(ONE,*X_,*MX_,MM);
01156         nevLocal_ = blockSize_;
01157       }
01158 
01159       // solve the projected problem
01160       {
01161 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
01162         Teuchos::TimeMonitor lcltimer( *timerDS_ );
01163 #endif
01164         Utils::directSolver(blockSize_, KK, Teuchos::rcpFromRef(MM), S, theta_, nevLocal_, 1);
01165         TEUCHOS_TEST_FOR_EXCEPTION(nevLocal_ != blockSize_,LOBPCGInitFailure,
01166                            "Anasazi::LOBPCG::initialize(): Initial Ritz analysis did not produce enough Ritz pairs to initialize algorithm.");
01167       }
01168 
01169       // We only have blockSize_ ritz pairs, ergo we do not need to select.
01170       // However, we still require them to be ordered correctly
01171       {
01172 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
01173         Teuchos::TimeMonitor lcltimer( *timerSort_ );
01174 #endif
01175 
01176         std::vector<int> order(blockSize_);
01177         // 
01178         // sort the first blockSize_ values in theta_
01179         sm_->sort(theta_, Teuchos::rcpFromRef(order), blockSize_);   // don't catch exception
01180         //
01181         // apply the same ordering to the primitive ritz vectors
01182         Utils::permuteVectors(order,S);
01183       }
01184 
01185       // update the solution, use R for storage
01186       {
01187 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
01188         Teuchos::TimeMonitor lcltimer( *timerLocalUpdate_ );
01189 #endif
01190         // X <- X*S
01191         MVT::MvAddMv( ONE, *X_, ZERO, *X_, *R_ );        
01192         MVT::MvTimesMatAddMv( ONE, *R_, S, ZERO, *X_ );
01193         // KX <- KX*S
01194         MVT::MvAddMv( ONE, *KX_, ZERO, *KX_, *R_ );        
01195         MVT::MvTimesMatAddMv( ONE, *R_, S, ZERO, *KX_ );
01196         if (hasM_) {
01197           // MX <- MX*S
01198           MVT::MvAddMv( ONE, *MX_, ZERO, *MX_, *R_ );        
01199           MVT::MvTimesMatAddMv( ONE, *R_, S, ZERO, *MX_ );
01200         }
01201       }
01202     }
01203 
01204     //----------------------------------------
01205     // compute R
01206     //----------------------------------------
01207     if (newstate.R != Teuchos::null) {
01208       TEUCHOS_TEST_FOR_EXCEPTION( MVText::GetGlobalLength(*newstate.R) != MVText::GetGlobalLength(*R_),
01209                           std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.R not correct." );
01210       TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.R) < blockSize_,
01211                           std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.R must have blockSize number of vectors." );
01212       MVT::SetBlock(*newstate.R,bsind,*R_);
01213     }
01214     else {
01215 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
01216       Teuchos::TimeMonitor lcltimer( *timerCompRes_ );
01217 #endif
01218       // form R <- KX - MX*T
01219       MVT::MvAddMv(ZERO,*KX_,ONE,*KX_,*R_);
01220       Teuchos::SerialDenseMatrix<int,ScalarType> T(blockSize_,blockSize_);
01221       for (int i=0; i<blockSize_; i++) T(i,i) = theta_[i];
01222       MVT::MvTimesMatAddMv(-ONE,*MX_,T,ONE,*R_);
01223     }
01224 
01225     // R has been updated; mark the norms as out-of-date
01226     Rnorms_current_ = false;
01227     R2norms_current_ = false;
01228   
01229     // put data in P,KP,MP: P is not used to set theta
01230     if (newstate.P != Teuchos::null) {
01231       TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.P) < blockSize_ ,
01232                           std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.P must have blockSize number of vectors." );
01233       TEUCHOS_TEST_FOR_EXCEPTION( MVText::GetGlobalLength(*newstate.P) != MVText::GetGlobalLength(*P_),
01234                           std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.P not correct." );
01235       hasP_ = true;
01236 
01237       // set P_
01238       MVT::SetBlock(*newstate.P,bsind,*P_);
01239 
01240       // set/compute KP_
01241       if (newstate.KP != Teuchos::null) {
01242         TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.KP) < blockSize_,
01243                             std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.KP must have blockSize number of vectors." );
01244         TEUCHOS_TEST_FOR_EXCEPTION( MVText::GetGlobalLength(*newstate.KP) != MVText::GetGlobalLength(*KP_),
01245                             std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.KP not correct." );
01246         MVT::SetBlock(*newstate.KP,bsind,*KP_);
01247       }
01248       else {
01249 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
01250         Teuchos::TimeMonitor lcltimer( *timerOp_ );
01251 #endif
01252         OPT::Apply(*Op_,*P_,*KP_);
01253         count_ApplyOp_ += blockSize_;
01254       }
01255 
01256       // set/compute MP_
01257       if (hasM_) {
01258         if (newstate.MP != Teuchos::null) {
01259           TEUCHOS_TEST_FOR_EXCEPTION( MVT::GetNumberVecs(*newstate.MP) < blockSize_,
01260                               std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): newstate.MP must have blockSize number of vectors." );
01261           TEUCHOS_TEST_FOR_EXCEPTION( MVText::GetGlobalLength(*newstate.MP) != MVText::GetGlobalLength(*MP_),
01262                               std::invalid_argument, "Anasazi::LOBPCG::initialize(newstate): vector length of newstate.MP not correct." );
01263           MVT::SetBlock(*newstate.MP,bsind,*MP_);
01264         }
01265         else {
01266 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
01267           Teuchos::TimeMonitor lcltimer( *timerMOp_ );
01268 #endif
01269           OPT::Apply(*MOp_,*P_,*MP_);
01270           count_ApplyM_ += blockSize_;
01271         }
01272       }
01273     }
01274     else {
01275       hasP_ = false;
01276     }
01277 
01278     // finally, we are initialized
01279     initialized_ = true;
01280 
01281     if (om_->isVerbosity( Debug ) ) {
01282       // Check almost everything here
01283       CheckList chk;
01284       chk.checkX = true;
01285       chk.checkKX = true;
01286       chk.checkMX = true;
01287       chk.checkP = true;
01288       chk.checkKP = true;
01289       chk.checkMP = true;
01290       chk.checkR = true;
01291       chk.checkQ = true;
01292       om_->print( Debug, accuracyCheck(chk, ": after initialize()") );
01293     }
01294 
01295   }
01296 
01297   template <class ScalarType, class MV, class OP>
01298   void LOBPCG<ScalarType,MV,OP>::initialize()
01299   {
01300     LOBPCGState<ScalarType,MV> empty;
01301     initialize(empty);
01302   }
01303 
01304 
01306   // Instruct the solver to use full orthogonalization
01307   template <class ScalarType, class MV, class OP>
01308   void LOBPCG<ScalarType,MV,OP>::setFullOrtho (bool fullOrtho)
01309   {
01310     if ( fullOrtho_ == true || initialized_ == false || fullOrtho == fullOrtho_ ) {
01311       // state is already orthogonalized or solver is not initialized
01312       fullOrtho_ = fullOrtho;
01313     }
01314     else {
01315       // solver is initialized, state is not fully orthogonalized, and user has requested full orthogonalization
01316       // ergo, we must throw away data in P
01317       fullOrtho_ = true;
01318       hasP_ = false;
01319     }
01320 
01321     // the user has called setFullOrtho, so the class has been instantiated
01322     // ergo, the data has already been allocated, i.e., setBlockSize() has been called
01323     // if it is already allocated, it should be the proper size
01324     if (fullOrtho_ && tmpmvec_ == Teuchos::null) {
01325       // allocated the workspace
01326       tmpmvec_ = MVT::Clone(*X_,blockSize_);
01327     }
01328     else if (fullOrtho_==false) {
01329       // free the workspace
01330       tmpmvec_ = Teuchos::null;
01331     }
01332   }
01333 
01334 
01336   // Perform LOBPCG iterations until the StatusTest tells us to stop.
01337   template <class ScalarType, class MV, class OP>
01338   void LOBPCG<ScalarType,MV,OP>::iterate () 
01339   {
01340     //
01341     // Allocate/initialize data structures
01342     //
01343     if (initialized_ == false) {
01344       initialize();
01345     }
01346 
01347     //
01348     // Miscellaneous definitions
01349     const int oneBlock    =   blockSize_;
01350     const int twoBlocks   = 2*blockSize_;
01351     const int threeBlocks = 3*blockSize_;
01352 
01353     std::vector<int> indblock1(blockSize_), indblock2(blockSize_), indblock3(blockSize_);
01354     for (int i=0; i<blockSize_; i++) {
01355       indblock1[i] = i;
01356       indblock2[i] = i +  blockSize_;
01357       indblock3[i] = i + 2*blockSize_;
01358     }
01359 
01360     //
01361     // Define dense projected/local matrices
01362     //   KK = Local stiffness matrix               (size: 3*blockSize_ x 3*blockSize_)
01363     //   MM = Local mass matrix                    (size: 3*blockSize_ x 3*blockSize_)
01364     //    S = Local eigenvectors                   (size: 3*blockSize_ x 3*blockSize_)
01365     Teuchos::SerialDenseMatrix<int,ScalarType> KK( threeBlocks, threeBlocks ), 
01366                                                MM( threeBlocks, threeBlocks ),
01367                                                 S( threeBlocks, threeBlocks );
01368 
01369     while (tester_->checkStatus(this) != Passed) {
01370 
01371       // Print information on current status
01372       if (om_->isVerbosity(Debug)) {
01373         currentStatus( om_->stream(Debug) );
01374       }
01375       else if (om_->isVerbosity(IterationDetails)) {
01376         currentStatus( om_->stream(IterationDetails) );
01377       }
01378 
01379       // increment iteration counter
01380       iter_++;
01381 
01382       // Apply the preconditioner on the residuals: H <- Prec*R
01383       if (Prec_ != Teuchos::null) {
01384 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
01385         Teuchos::TimeMonitor lcltimer( *timerPrec_ );
01386 #endif
01387         OPT::Apply( *Prec_, *R_, *H_ );   // don't catch the exception
01388         count_ApplyPrec_ += blockSize_;
01389       }
01390       else {
01391         MVT::MvAddMv(ONE,*R_,ZERO,*R_,*H_);
01392       }
01393 
01394       // Apply the mass matrix on H
01395       if (hasM_) {
01396 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
01397         Teuchos::TimeMonitor lcltimer( *timerMOp_ );
01398 #endif
01399         OPT::Apply( *MOp_, *H_, *MH_);    // don't catch the exception
01400         count_ApplyM_ += blockSize_;
01401       }
01402 
01403       // orthogonalize H against the auxiliary vectors
01404       // optionally: orthogonalize H against X and P ([X P] is already orthonormal)
01405       Teuchos::Array<Teuchos::RCP<const MV> > Q;
01406       Q = auxVecs_;
01407       if (fullOrtho_) {
01408         // X and P are not contiguous, so there is not much point in putting them under 
01409         // a single multivector view
01410         Q.push_back(X_);
01411         if (hasP_) {
01412           Q.push_back(P_);
01413         }
01414       }
01415       {
01416 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
01417         Teuchos::TimeMonitor lcltimer( *timerOrtho_ );
01418 #endif
01419         Teuchos::Array<Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> > > dummyC = 
01420           Teuchos::tuple<Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> > >(Teuchos::null);
01421         int rank = orthman_->projectAndNormalizeMat(*H_,Q,dummyC,Teuchos::null,MH_);
01422         // our views are currently in place; it is safe to throw an exception
01423         TEUCHOS_TEST_FOR_EXCEPTION(rank != blockSize_,LOBPCGOrthoFailure,
01424                            "Anasazi::LOBPCG::iterate(): unable to compute orthonormal basis for H.");
01425       }
01426 
01427       if (om_->isVerbosity( Debug ) ) {
01428         CheckList chk;
01429         chk.checkH = true;
01430         chk.checkMH = true;
01431         om_->print( Debug, accuracyCheck(chk, ": after ortho H") );
01432       }
01433       else if (om_->isVerbosity( OrthoDetails ) ) {
01434         CheckList chk;
01435         chk.checkH = true;
01436         chk.checkMH = true;
01437         om_->print( OrthoDetails, accuracyCheck(chk,": after ortho H") );
01438       }
01439 
01440       // Apply the stiffness matrix to H
01441       {
01442 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
01443         Teuchos::TimeMonitor lcltimer( *timerOp_ );
01444 #endif
01445         OPT::Apply( *Op_, *H_, *KH_);   // don't catch the exception
01446         count_ApplyOp_ += blockSize_;
01447       }
01448 
01449       if (hasP_) {
01450         nevLocal_ = threeBlocks;
01451       }
01452       else {
01453         nevLocal_ = twoBlocks;
01454       }
01455 
01456       //
01457       // we need bases: [X H P] and [H P] (only need the latter if fullOrtho == false)
01458       // we need to perform the following operations:
01459       //    X' [KX KH KP]
01460       //    X' [MX MH MP]
01461       //    H' [KH KP]
01462       //    H' [MH MP]
01463       //    P' [KP]
01464       //    P' [MP]
01465       //    [X H P] CX
01466       //    [X H P] CP    if  fullOrtho
01467       //      [H P] CP    if !fullOrtho
01468       //
01469       // since M[X H P] is potentially the same memory as [X H P], and 
01470       // because we are not allowed to have overlapping non-const views of 
01471       // a multivector, we will now abandon our non-const views in favor of 
01472       // const views
01473       //
01474       X_ = Teuchos::null;
01475       KX_ = Teuchos::null;
01476       MX_ = Teuchos::null;
01477       H_ = Teuchos::null;
01478       KH_ = Teuchos::null;
01479       MH_ = Teuchos::null;
01480       P_ = Teuchos::null;
01481       KP_ = Teuchos::null;
01482       MP_ = Teuchos::null;
01483       Teuchos::RCP<const MV> cX, cH, cXHP, cHP, cK_XHP, cK_HP, cM_XHP, cM_HP, cP, cK_P, cM_P;
01484       {
01485         cX = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(V_),indblock1);
01486         cH = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(V_),indblock2);
01487 
01488         std::vector<int> indXHP(nevLocal_);
01489         for (int i=0; i<nevLocal_; i++) {
01490           indXHP[i] = i;
01491         }
01492         cXHP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(V_),indXHP);
01493         cK_XHP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(KV_),indXHP);
01494         if (hasM_) {
01495           cM_XHP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(MV_),indXHP);
01496         }
01497         else {
01498           cM_XHP = cXHP;
01499         }
01500 
01501         std::vector<int> indHP(nevLocal_-blockSize_);
01502         for (int i=blockSize_; i<nevLocal_; i++) {
01503           indHP[i-blockSize_] = i;
01504         }
01505         cHP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(V_),indHP);
01506         cK_HP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(KV_),indHP);
01507         if (hasM_) {
01508           cM_HP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(MV_),indHP);
01509         }
01510         else {
01511           cM_HP = cHP;
01512         }
01513 
01514         if (nevLocal_ == threeBlocks) {
01515           cP = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(V_),indblock3);
01516           cK_P = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(KV_),indblock3);
01517           if (hasM_) {
01518             cM_P = MVT::CloneView(*Teuchos::rcp_implicit_cast<const MV>(MV_),indblock3);
01519           }
01520           else {
01521             cM_P = cP;
01522           }
01523         }
01524       }
01525 
01526       //
01527       //----------------------------------------
01528       // Form "local" mass and stiffness matrices
01529       //----------------------------------------
01530       {
01531         // We will form only the block upper triangular part of 
01532         // [X H P]' K [X H P]  and  [X H P]' M [X H P]
01533         // Get the necessary views into KK and MM:
01534         //      [--K1--]        [--M1--]
01535         // KK = [  -K2-]   MM = [  -M2-]
01536         //      [    K3]        [    M3]
01537         // 
01538         // It is okay to declare a zero-area view of a Teuchos::SerialDenseMatrix
01539         //
01540         Teuchos::SerialDenseMatrix<int,ScalarType> 
01541           K1(Teuchos::View,KK,blockSize_,nevLocal_             ,0*blockSize_,0*blockSize_),
01542           K2(Teuchos::View,KK,blockSize_,nevLocal_-1*blockSize_,1*blockSize_,1*blockSize_),
01543           K3(Teuchos::View,KK,blockSize_,nevLocal_-2*blockSize_,2*blockSize_,2*blockSize_),
01544           M1(Teuchos::View,MM,blockSize_,nevLocal_             ,0*blockSize_,0*blockSize_),
01545           M2(Teuchos::View,MM,blockSize_,nevLocal_-1*blockSize_,1*blockSize_,1*blockSize_),
01546           M3(Teuchos::View,MM,blockSize_,nevLocal_-2*blockSize_,2*blockSize_,2*blockSize_);
01547         {
01548 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
01549           Teuchos::TimeMonitor lcltimer( *timerLocalProj_ );
01550 #endif
01551           MVT::MvTransMv( ONE, *cX, *cK_XHP, K1 );
01552           MVT::MvTransMv( ONE, *cX, *cM_XHP, M1 );
01553           MVT::MvTransMv( ONE, *cH, *cK_HP , K2 );
01554           MVT::MvTransMv( ONE, *cH, *cM_HP , M2 );
01555           if (nevLocal_ == threeBlocks) {
01556             MVT::MvTransMv( ONE, *cP, *cK_P, K3 );
01557             MVT::MvTransMv( ONE, *cP, *cM_P, M3 );
01558           }
01559         }
01560       }
01561       // below, we only need bases [X H P] and [H P] and friends
01562       // furthermore, we only need [H P] and friends if fullOrtho == false
01563       // clear the others now
01564       cX   = Teuchos::null;
01565       cH   = Teuchos::null;
01566       cP   = Teuchos::null;
01567       cK_P = Teuchos::null;
01568       cM_P = Teuchos::null;
01569       if (fullOrtho_ == true) {
01570         cHP = Teuchos::null;
01571         cK_HP = Teuchos::null;
01572         cM_HP = Teuchos::null;
01573       }
01574 
01575       //
01576       //---------------------------------------------------
01577       // Perform a spectral decomposition of (KK,MM)
01578       //---------------------------------------------------
01579       //
01580       // Get pointers to relevant part of KK, MM and S for Rayleigh-Ritz analysis
01581       Teuchos::SerialDenseMatrix<int,ScalarType> lclKK(Teuchos::View,KK,nevLocal_,nevLocal_), 
01582                                                  lclMM(Teuchos::View,MM,nevLocal_,nevLocal_),
01583                                                   lclS(Teuchos::View, S,nevLocal_,nevLocal_);
01584       {
01585 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
01586         Teuchos::TimeMonitor lcltimer( *timerDS_ );
01587 #endif
01588         int localSize = nevLocal_;
01589         Utils::directSolver(localSize, lclKK, Teuchos::rcpFromRef(lclMM), lclS, theta_, nevLocal_, 0);
01590         // localSize tells directSolver() how big KK,MM are
01591         // however, directSolver() may choose to use only the principle submatrices of KK,MM 
01592         // because of loss of MM-orthogonality in the projected eigenvectors
01593         // nevLocal_ tells us how much it used, telling us the effective localSize 
01594         // (i.e., how much of KK,MM used by directSolver)
01595         // we will not tolerate any indefiniteness, and will throw an exception if it was 
01596         // detected by directSolver
01597         //
01598         if (nevLocal_ != localSize) {
01599           // before throwing the exception, and thereby leaving iterate(), setup the views again
01600           // first, clear the const views
01601           cXHP   = Teuchos::null;
01602           cK_XHP = Teuchos::null;
01603           cM_XHP = Teuchos::null;
01604           cHP    = Teuchos::null;
01605           cK_HP  = Teuchos::null;
01606           cM_HP  = Teuchos::null;
01607           setupViews();
01608         }
01609         TEUCHOS_TEST_FOR_EXCEPTION(nevLocal_ != localSize, LOBPCGRitzFailure, 
01610             "Anasazi::LOBPCG::iterate(): indefiniteness detected in projected mass matrix." );
01611       }
01612 
01613       //
01614       //---------------------------------------------------
01615       // Sort the ritz values using the sort manager
01616       //---------------------------------------------------
01617       Teuchos::LAPACK<int,ScalarType> lapack;
01618       Teuchos::BLAS<int,ScalarType> blas;
01619       {
01620 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
01621         Teuchos::TimeMonitor lcltimer( *timerSort_ );
01622 #endif
01623 
01624         std::vector<int> order(nevLocal_);
01625         // 
01626         // Sort the first nevLocal_ values in theta_
01627         sm_->sort(theta_, Teuchos::rcpFromRef(order), nevLocal_);   // don't catch exception
01628         //
01629         // Sort the primitive ritz vectors
01630         Utils::permuteVectors(order,lclS);
01631       }
01632 
01633       //
01634       //----------------------------------------
01635       // Compute coefficients for X and P under [X H P]
01636       //----------------------------------------
01637       // Before computing X,P, optionally perform orthogonalization per Hetmaniuk,Lehoucq paper
01638       // CX will be the coefficients of [X,H,P] for new X, CP for new P
01639       // The paper suggests orthogonalizing CP against CX and orthonormalizing CP, w.r.t. MM
01640       // Here, we will also orthonormalize CX.
01641       // This is accomplished using the Cholesky factorization of [CX CP]^H lclMM [CX CP]
01642       Teuchos::RCP<Teuchos::SerialDenseMatrix<int,ScalarType> > CX, CP;
01643       if (fullOrtho_) {
01644         // build orthonormal basis for (  0  ) that is MM orthogonal to ( S11 )
01645         //                             ( S21 )                          ( S21 )
01646         //                             ( S31 )                          ( S31 )
01647         // Do this using Cholesky factorization of ( S11  0  )
01648         //                                         ( S21 S21 )
01649         //                                         ( S31 S31 )
01650         //           ( S11  0  )
01651         // Build C = ( S21 S21 )
01652         //           ( S31 S31 )
01653         Teuchos::SerialDenseMatrix<int,ScalarType> C(nevLocal_,twoBlocks),
01654                                                 MMC(nevLocal_,twoBlocks),
01655                                                 CMMC(twoBlocks  ,twoBlocks);
01656 
01657         // first block of rows: ( S11 0 )
01658         for (int j=0; j<oneBlock; j++) {
01659           for (int i=0; i<oneBlock; i++) {
01660             // CX
01661             C(i,j) = lclS(i,j);
01662             // CP
01663             C(i,j+oneBlock) = ZERO;
01664           }
01665         }
01666         // second block of rows: (S21 S21)
01667         for (int j=0; j<oneBlock; j++) {
01668           for (int i=oneBlock; i<twoBlocks; i++) {
01669             // CX
01670             C(i,j)          = lclS(i,j);
01671             // CP
01672             C(i,j+oneBlock) = lclS(i,j);
01673           }
01674         }
01675         // third block of rows
01676         if (nevLocal_ == threeBlocks) {
01677           for (int j=0; j<oneBlock; j++) {
01678             for (int i=twoBlocks; i<threeBlocks; i++) {
01679               // CX
01680               C(i,j)          = lclS(i,j);
01681               // CP
01682               C(i,j+oneBlock) = lclS(i,j);
01683             }
01684           }
01685         }
01686 
01687         // compute MMC = lclMM*C
01688         {
01689           int teuchosret;
01690           teuchosret = MMC.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,ONE,lclMM,C,ZERO);
01691           TEUCHOS_TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,
01692               "Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply");
01693           // compute CMMC = C^H*MMC == C^H*lclMM*C
01694           teuchosret = CMMC.multiply(Teuchos::CONJ_TRANS,Teuchos::NO_TRANS,ONE,C,MMC,ZERO);
01695           TEUCHOS_TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,
01696               "Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply");
01697         }
01698 
01699         // compute R (cholesky) of CMMC
01700         {
01701           int info;
01702           lapack.POTRF('U',twoBlocks,CMMC.values(),CMMC.stride(),&info);
01703           // our views ARE NOT currently in place; we must reestablish them before throwing an exception
01704           if (info != 0) {
01705             // Check symmetry of H'*K*H, this might be the first place a non-Hermitian operator will cause a problem.
01706             Teuchos::SerialDenseMatrix<int,ScalarType> K22(Teuchos::View,KK,blockSize_,blockSize_,1*blockSize_,1*blockSize_);
01707             Teuchos::SerialDenseMatrix<int,ScalarType> K22_trans( K22, Teuchos::CONJ_TRANS );
01708             K22_trans -= K22;
01709             MagnitudeType symNorm = K22_trans.normOne();
01710             MagnitudeType tol = SCT::magnitude(SCT::squareroot(SCT::eps()));
01711 
01712             cXHP = Teuchos::null;
01713             cHP = Teuchos::null;
01714             cK_XHP = Teuchos::null;
01715             cK_HP = Teuchos::null;
01716             cM_XHP = Teuchos::null;
01717             cM_HP = Teuchos::null;
01718             setupViews();
01719            
01720             std::string errMsg = "Anasazi::LOBPCG::iterate(): Cholesky factorization failed during full orthogonalization.";
01721             if ( symNorm < tol )
01722             {
01723               TEUCHOS_TEST_FOR_EXCEPTION(info != 0, LOBPCGOrthoFailure, errMsg );
01724             }
01725             else
01726             {
01727               errMsg += " Check the operator A (or K), it may not be Hermitian!";
01728               TEUCHOS_TEST_FOR_EXCEPTION(info != 0, LOBPCGOrthoFailure, errMsg );
01729             }
01730           }
01731         }
01732         // compute C = C inv(R)
01733         blas.TRSM(Teuchos::RIGHT_SIDE,Teuchos::UPPER_TRI,Teuchos::NO_TRANS,Teuchos::NON_UNIT_DIAG,
01734                   nevLocal_,twoBlocks,ONE,CMMC.values(),CMMC.stride(),C.values(),C.stride());
01735         // put C(:,0:oneBlock-1) into CX
01736         CX = Teuchos::rcp( new Teuchos::SerialDenseMatrix<int,ScalarType>(Teuchos::Copy,C,nevLocal_,oneBlock,0,0) );
01737         // put C(:,oneBlock:twoBlocks-1) into CP
01738         CP = Teuchos::rcp( new Teuchos::SerialDenseMatrix<int,ScalarType>(Teuchos::Copy,C,nevLocal_,oneBlock,0,oneBlock) );
01739 
01740         // check the results
01741         if (om_->isVerbosity( Debug ) ) {
01742           Teuchos::SerialDenseMatrix<int,ScalarType> tmp1(nevLocal_,oneBlock),
01743                                                      tmp2(oneBlock,oneBlock);
01744           MagnitudeType tmp;
01745           int teuchosret;
01746           std::stringstream os;
01747           os.precision(2);
01748           os.setf(std::ios::scientific, std::ios::floatfield);
01749 
01750           os << " Checking Full Ortho: iteration " << iter_ << std::endl;
01751 
01752           // check CX^T MM CX == I
01753           // compute tmp1 = MM*CX
01754           teuchosret = tmp1.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,ONE,lclMM,*CX,ZERO);
01755           TEUCHOS_TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,
01756               "Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply");
01757           // compute tmp2 = CX^H*tmp1 == CX^H*MM*CX
01758           teuchosret = tmp2.multiply(Teuchos::CONJ_TRANS,Teuchos::NO_TRANS,ONE,*CX,tmp1,ZERO);
01759           TEUCHOS_TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,
01760               "Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply");
01761           // subtrace tmp2 - I == CX^H * MM * CX - I
01762           for (int i=0; i<oneBlock; i++) tmp2(i,i) -= ONE;
01763           tmp = tmp2.normFrobenius();          
01764           os << " >> Error in CX^H MM CX == I : " << tmp << std::endl;
01765 
01766           // check CP^T MM CP == I
01767           // compute tmp1 = MM*CP
01768           teuchosret = tmp1.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,ONE,lclMM,*CP,ZERO);
01769           TEUCHOS_TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,
01770               "Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply");
01771           // compute tmp2 = CP^H*tmp1 == CP^H*MM*CP
01772           teuchosret = tmp2.multiply(Teuchos::CONJ_TRANS,Teuchos::NO_TRANS,ONE,*CP,tmp1,ZERO);
01773           TEUCHOS_TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,
01774               "Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply");
01775           // subtrace tmp2 - I == CP^H * MM * CP - I
01776           for (int i=0; i<oneBlock; i++) tmp2(i,i) -= ONE;
01777           tmp = tmp2.normFrobenius();          
01778           os << " >> Error in CP^H MM CP == I : " << tmp << std::endl;
01779 
01780           // check CX^T MM CP == 0
01781           // compute tmp1 = MM*CP
01782           teuchosret = tmp1.multiply(Teuchos::NO_TRANS,Teuchos::NO_TRANS,ONE,lclMM,*CP,ZERO);
01783           TEUCHOS_TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,"Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply");
01784           // compute tmp2 = CX^H*tmp1 == CX^H*MM*CP
01785           teuchosret = tmp2.multiply(Teuchos::CONJ_TRANS,Teuchos::NO_TRANS,ONE,*CX,tmp1,ZERO);
01786           TEUCHOS_TEST_FOR_EXCEPTION(teuchosret != 0,std::logic_error,"Anasazi::LOBPCG::iterate(): Logic error calling SerialDenseMatrix::multiply");
01787           // subtrace tmp2 == CX^H * MM * CP
01788           tmp = tmp2.normFrobenius();          
01789           os << " >> Error in CX^H MM CP == 0 : " << tmp << std::endl;
01790 
01791           os << std::endl;
01792           om_->print(Debug,os.str());
01793         }
01794       }
01795       else {
01796         //     [S11 ... ...]
01797         // S = [S21 ... ...]
01798         //     [S31 ... ...]
01799         //
01800         // CX = [S11]
01801         //      [S21]
01802         //      [S31]   ->  X = [X H P] CX
01803         //      
01804         // CP = [S21]   ->  P =   [H P] CP
01805         //      [S31]
01806         //
01807         CX = Teuchos::rcp( new Teuchos::SerialDenseMatrix<int,ScalarType>(Teuchos::Copy,lclS,nevLocal_         ,oneBlock,0       ,0) );
01808         CP = Teuchos::rcp( new Teuchos::SerialDenseMatrix<int,ScalarType>(Teuchos::Copy,lclS,nevLocal_-oneBlock,oneBlock,oneBlock,0) );
01809       }
01810 
01811       //
01812       //----------------------------------------
01813       // Compute new X and new P
01814       //----------------------------------------
01815       // Note: Use R as a temporary work space and (if full ortho) tmpMV as well
01816       {
01817 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
01818         Teuchos::TimeMonitor lcltimer( *timerLocalUpdate_ );
01819 #endif
01820 
01821         // if full ortho, then CX and CP are dense
01822         // we multiply [X H P]*CX into tmpMV
01823         //             [X H P]*CP into R
01824         // then put V(:,firstblock) <- tmpMV
01825         //          V(:,thirdblock) <- R
01826         //
01827         // if no full ortho, then [H P]*CP doesn't reference first block (X) 
01828         // of V, so that we can modify it before computing P
01829         // so we multiply [X H P]*CX into R
01830         //                V(:,firstblock) <- R
01831         //       multiply [H P]*CP into R
01832         //                V(:,thirdblock) <- R
01833         //
01834         // mutatis mutandis for K[XP] and M[XP]
01835         //
01836         // use SetBlock to do the assignments into V_
01837         //
01838         // in either case, views are only allowed to be overlapping
01839         // if they are const, and it should be assume that SetBlock
01840         // creates a view of the associated part
01841         //
01842         // we have from above const-pointers to [KM]XHP, [KM]HP and (if hasP) [KM]P
01843         //
01844         if (fullOrtho_) {
01845           // X,P
01846           MVT::MvTimesMatAddMv(ONE,*cXHP,*CX,ZERO,*tmpmvec_);
01847           MVT::MvTimesMatAddMv(ONE,*cXHP,*CP,ZERO,*R_);
01848           cXHP = Teuchos::null;
01849           MVT::SetBlock(*tmpmvec_,indblock1,*V_);
01850           MVT::SetBlock(*R_      ,indblock3,*V_);
01851           // KX,KP
01852           MVT::MvTimesMatAddMv(ONE,*cK_XHP,*CX,ZERO,*tmpmvec_);
01853           MVT::MvTimesMatAddMv(ONE,*cK_XHP,*CP,ZERO,*R_);
01854           cK_XHP = Teuchos::null;
01855           MVT::SetBlock(*tmpmvec_,indblock1,*KV_);
01856           MVT::SetBlock(*R_      ,indblock3,*KV_);
01857           // MX,MP
01858           if (hasM_) {
01859             MVT::MvTimesMatAddMv(ONE,*cM_XHP,*CX,ZERO,*tmpmvec_);
01860             MVT::MvTimesMatAddMv(ONE,*cM_XHP,*CP,ZERO,*R_);
01861             cM_XHP = Teuchos::null;
01862             MVT::SetBlock(*tmpmvec_,indblock1,*MV_);
01863             MVT::SetBlock(*R_      ,indblock3,*MV_);
01864           }
01865           else {
01866             cM_XHP = Teuchos::null;
01867           }
01868         }
01869         else {
01870           // X,P
01871           MVT::MvTimesMatAddMv(ONE,*cXHP,*CX,ZERO,*R_);
01872           cXHP = Teuchos::null;
01873           MVT::SetBlock(*R_,indblock1,*V_);
01874           MVT::MvTimesMatAddMv(ONE,*cHP,*CP,ZERO,*R_);
01875           cHP = Teuchos::null;
01876           MVT::SetBlock(*R_,indblock3,*V_);
01877           // KX,KP
01878           MVT::MvTimesMatAddMv(ONE,*cK_XHP,*CX,ZERO,*R_);
01879           cK_XHP = Teuchos::null;
01880           MVT::SetBlock(*R_,indblock1,*KV_);
01881           MVT::MvTimesMatAddMv(ONE,*cK_HP,*CP,ZERO,*R_);
01882           cK_HP = Teuchos::null;
01883           MVT::SetBlock(*R_,indblock3,*KV_);
01884           // MX,MP
01885           if (hasM_) {
01886             MVT::MvTimesMatAddMv(ONE,*cM_XHP,*CX,ZERO,*R_);
01887             cM_XHP = Teuchos::null;
01888             MVT::SetBlock(*R_,indblock1,*MV_);
01889             MVT::MvTimesMatAddMv(ONE,*cM_HP,*CP,ZERO,*R_);
01890             cM_HP = Teuchos::null;
01891             MVT::SetBlock(*R_,indblock3,*MV_);
01892           }
01893           else {
01894             cM_XHP = Teuchos::null;
01895             cM_HP = Teuchos::null;
01896           }
01897         }
01898       } // end timing block
01899       // done with coefficient matrices
01900       CX = Teuchos::null;
01901       CP = Teuchos::null;
01902 
01903       //
01904       // we now have a P direction
01905       hasP_ = true;
01906 
01907       // debugging check: all of our const views should have been cleared by now
01908       // if not, we have a logic error above
01909       TEUCHOS_TEST_FOR_EXCEPTION(   cXHP != Teuchos::null || cK_XHP != Teuchos::null || cM_XHP != Teuchos::null
01910                           || cHP != Teuchos::null ||  cK_HP != Teuchos::null || cM_HP  != Teuchos::null
01911                           ||  cP != Teuchos::null ||   cK_P != Teuchos::null || cM_P   != Teuchos::null,
01912                           std::logic_error,
01913                           "Anasazi::BlockKrylovSchur::iterate(): const views were not all cleared! Something went wrong!" );
01914 
01915       //
01916       // recreate our const MV views of X,H,P and friends
01917       setupViews();
01918 
01919       //
01920       // Compute the new residuals, explicitly
01921       {
01922 #ifdef ANASAZI_TEUCHOS_TIME_MONITOR
01923         Teuchos::TimeMonitor lcltimer( *timerCompRes_ );
01924 #endif
01925         MVT::MvAddMv( ONE, *KX_, ZERO, *KX_, *R_ );
01926         Teuchos::SerialDenseMatrix<int,ScalarType> T( blockSize_, blockSize_ );
01927         for (int i = 0; i < blockSize_; i++) {
01928           T(i,i) = theta_[i];
01929         }
01930         MVT::MvTimesMatAddMv( -ONE, *MX_, T, ONE, *R_ );
01931       }
01932 
01933       // R has been updated; mark the norms as out-of-date
01934       Rnorms_current_ = false;
01935       R2norms_current_ = false;
01936 
01937       // When required, monitor some orthogonalities
01938       if (om_->isVerbosity( Debug ) ) {
01939         // Check almost everything here
01940         CheckList chk;
01941         chk.checkX = true;
01942         chk.checkKX = true;
01943         chk.checkMX = true;
01944         chk.checkP = true;
01945         chk.checkKP = true;
01946         chk.checkMP = true;
01947         chk.checkR = true;
01948         om_->print( Debug, accuracyCheck(chk, ": after local update") );
01949       }
01950       else if (om_->isVerbosity( OrthoDetails )) {
01951         CheckList chk;
01952         chk.checkX = true;
01953         chk.checkP = true;
01954         chk.checkR = true;
01955         om_->print( OrthoDetails, accuracyCheck(chk, ": after local update") );
01956       }
01957     } // end while (statusTest == false)
01958   }
01959 
01960 
01962   // compute/return residual M-norms
01963   template <class ScalarType, class MV, class OP>
01964   std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> 
01965   LOBPCG<ScalarType,MV,OP>::getResNorms() {
01966     if (Rnorms_current_ == false) {
01967       // Update the residual norms
01968       orthman_->norm(*R_,Rnorms_);
01969       Rnorms_current_ = true;
01970     }
01971     return Rnorms_;
01972   }
01973 
01974   
01976   // compute/return residual 2-norms
01977   template <class ScalarType, class MV, class OP>
01978   std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> 
01979   LOBPCG<ScalarType,MV,OP>::getRes2Norms() {
01980     if (R2norms_current_ == false) {
01981       // Update the residual 2-norms 
01982       MVT::MvNorm(*R_,R2norms_);
01983       R2norms_current_ = true;
01984     }
01985     return R2norms_;
01986   }
01987 
01988 
01989 
01990 
01992   // Check accuracy, orthogonality, and other debugging stuff
01993   // 
01994   // bools specify which tests we want to run (instead of running more than we actually care about)
01995   //
01996   // we don't bother checking the following because they are computed explicitly:
01997   //    H == Prec*R
01998   //   KH == K*H
01999   //
02000   // 
02001   // checkX : X orthonormal
02002   //          orthogonal to auxvecs
02003   // checkMX: check MX == M*X
02004   // checkKX: check KX == K*X
02005   // checkP : if fullortho P orthonormal and orthogonal to X
02006   //          orthogonal to auxvecs
02007   // checkMP: check MP == M*P
02008   // checkKP: check KP == K*P
02009   // checkH : if fullortho H orthonormal and orthogonal to X and P
02010   //          orthogonal to auxvecs
02011   // checkMH: check MH == M*H
02012   // checkR : check R orthogonal to X
02013   // checkQ : check that auxiliary vectors are actually orthonormal
02014   //
02015   // TODO: 
02016   //  add checkTheta 
02017   //
02018   template <class ScalarType, class MV, class OP>
02019   std::string LOBPCG<ScalarType,MV,OP>::accuracyCheck( const CheckList &chk, const std::string &where ) const 
02020   {
02021     using std::endl;
02022 
02023     std::stringstream os;
02024     os.precision(2);
02025     os.setf(std::ios::scientific, std::ios::floatfield);
02026     MagnitudeType tmp;
02027 
02028     os << " Debugging checks: iteration " << iter_ << where << endl;
02029 
02030     // X and friends
02031     if (chk.checkX && initialized_) {
02032       tmp = orthman_->orthonormError(*X_);
02033       os << " >> Error in X^H M X == I : " << tmp << endl;
02034       for (Array_size_type i=0; i<auxVecs_.size(); i++) {
02035         tmp = orthman_->orthogError(*X_,*auxVecs_[i]);
02036         os << " >> Error in X^H M Q[" << i << "] == 0 : " << tmp << endl;
02037       }
02038     }
02039     if (chk.checkMX && hasM_ && initialized_) {
02040       tmp = Utils::errorEquality(*X_, *MX_, MOp_);
02041       os << " >> Error in MX == M*X    : " << tmp << endl;
02042     }
02043     if (chk.checkKX && initialized_) {
02044       tmp = Utils::errorEquality(*X_, *KX_, Op_);
02045       os << " >> Error in KX == K*X    : " << tmp << endl;
02046     }
02047 
02048     // P and friends
02049     if (chk.checkP && hasP_ && initialized_) {
02050       if (fullOrtho_) {
02051         tmp = orthman_->orthonormError(*P_);
02052         os << " >> Error in P^H M P == I : " << tmp << endl;
02053         tmp = orthman_->orthogError(*P_,*X_);
02054         os << " >> Error in P^H M X == 0 : " << tmp << endl;
02055       }
02056       for (Array_size_type i=0; i<auxVecs_.size(); i++) {
02057         tmp = orthman_->orthogError(*P_,*auxVecs_[i]);
02058         os << " >> Error in P^H M Q[" << i << "] == 0 : " << tmp << endl;
02059       }
02060     }
02061     if (chk.checkMP && hasM_ && hasP_ && initialized_) {
02062       tmp = Utils::errorEquality(*P_, *MP_, MOp_);
02063       os << " >> Error in MP == M*P    : " << tmp << endl;
02064     }
02065     if (chk.checkKP && hasP_ && initialized_) {
02066       tmp = Utils::errorEquality(*P_, *KP_, Op_);
02067       os << " >> Error in KP == K*P    : " << tmp << endl;
02068     }
02069 
02070     // H and friends
02071     if (chk.checkH && initialized_) {
02072       if (fullOrtho_) {
02073         tmp = orthman_->orthonormError(*H_);
02074         os << " >> Error in H^H M H == I : " << tmp << endl;
02075         tmp = orthman_->orthogError(*H_,*X_);
02076         os << " >> Error in H^H M X == 0 : " << tmp << endl;
02077         if (hasP_) {
02078           tmp = orthman_->orthogError(*H_,*P_);
02079           os << " >> Error in H^H M P == 0 : " << tmp << endl;
02080         }
02081       }
02082       for (Array_size_type i=0; i<auxVecs_.size(); i++) {
02083         tmp = orthman_->orthogError(*H_,*auxVecs_[i]);
02084         os << " >> Error in H^H M Q[" << i << "] == 0 : " << tmp << endl;
02085       }
02086     }
02087     if (chk.checkMH && hasM_ && initialized_) {
02088       tmp = Utils::errorEquality(*H_, *MH_, MOp_);
02089       os << " >> Error in MH == M*H    : " << tmp << endl;
02090     }
02091 
02092     // R: this is not M-orthogonality, but standard euclidean orthogonality
02093     if (chk.checkR && initialized_) {
02094       Teuchos::SerialDenseMatrix<int,ScalarType> xTx(blockSize_,blockSize_);
02095       MVT::MvTransMv(ONE,*X_,*R_,xTx);
02096       tmp = xTx.normFrobenius();
02097       MVT::MvTransMv(ONE,*R_,*R_,xTx);
02098       double normR = xTx.normFrobenius();
02099       os << " >> RelError in X^H R == 0: " << tmp/normR << endl;
02100     }
02101 
02102     // Q
02103     if (chk.checkQ) {
02104       for (Array_size_type i=0; i<auxVecs_.size(); i++) {
02105         tmp = orthman_->orthonormError(*auxVecs_[i]);
02106         os << " >> Error in Q[" << i << "]^H M Q[" << i << "] == I : " << tmp << endl;
02107         for (Array_size_type j=i+1; j<auxVecs_.size(); j++) {
02108           tmp = orthman_->orthogError(*auxVecs_[i],*auxVecs_[j]);
02109           os << " >> Error in Q[" << i << "]^H M Q[" << j << "] == 0 : " << tmp << endl;
02110         }
02111       }
02112     }
02113 
02114     os << endl;
02115 
02116     return os.str();
02117   }
02118 
02119 
02121   // Print the current status of the solver
02122   template <class ScalarType, class MV, class OP>
02123   void 
02124   LOBPCG<ScalarType,MV,OP>::currentStatus(std::ostream &os) 
02125   {
02126     using std::endl;
02127 
02128     os.setf(std::ios::scientific, std::ios::floatfield);  
02129     os.precision(6);
02130     os <<endl;
02131     os <<"================================================================================" << endl;
02132     os << endl;
02133     os <<"                              LOBPCG Solver Status" << endl;
02134     os << endl;
02135     os <<"The solver is "<<(initialized_ ? "initialized." : "not initialized.") << endl;
02136     os <<"The number of iterations performed is " << iter_       << endl;
02137     os <<"The current block size is             " << blockSize_  << endl;
02138     os <<"The number of auxiliary vectors is    " << numAuxVecs_ << endl;
02139     os <<"The number of operations Op*x   is " << count_ApplyOp_   << endl;
02140     os <<"The number of operations M*x    is " << count_ApplyM_    << endl;
02141     os <<"The number of operations Prec*x is " << count_ApplyPrec_ << endl;
02142 
02143     os.setf(std::ios_base::right, std::ios_base::adjustfield);
02144 
02145     if (initialized_) {
02146       os << endl;
02147       os <<"CURRENT EIGENVALUE ESTIMATES             "<<endl;
02148       os << std::setw(20) << "Eigenvalue" 
02149          << std::setw(20) << "Residual(M)"
02150          << std::setw(20) << "Residual(2)"
02151          << endl;
02152       os <<"--------------------------------------------------------------------------------"<<endl;
02153       for (int i=0; i<blockSize_; i++) {
02154         os << std::setw(20) << theta_[i];
02155         if (Rnorms_current_) os << std::setw(20) << Rnorms_[i];
02156         else os << std::setw(20) << "not current";
02157         if (R2norms_current_) os << std::setw(20) << R2norms_[i];
02158         else os << std::setw(20) << "not current";
02159         os << endl;
02160       }
02161     }
02162     os <<"================================================================================" << endl;
02163     os << endl;
02164   }
02165 
02167   // are we initialized or not?
02168   template <class ScalarType, class MV, class OP>
02169   bool LOBPCG<ScalarType,MV,OP>::isInitialized() const { 
02170     return initialized_; 
02171   }
02172 
02173 
02175   // is P valid or not?
02176   template <class ScalarType, class MV, class OP>
02177   bool LOBPCG<ScalarType,MV,OP>::hasP() {
02178     return hasP_;
02179   }
02180   
02182   // is full orthogonalization enabled or not?
02183   template <class ScalarType, class MV, class OP>
02184   bool LOBPCG<ScalarType,MV,OP>::getFullOrtho() const { 
02185     return(fullOrtho_); 
02186   }
02187 
02188   
02190   // return the current auxilliary vectors
02191   template <class ScalarType, class MV, class OP>
02192   Teuchos::Array<Teuchos::RCP<const MV> > LOBPCG<ScalarType,MV,OP>::getAuxVecs() const {
02193     return auxVecs_;
02194   }
02195 
02197   // return the current block size
02198   template <class ScalarType, class MV, class OP>
02199   int LOBPCG<ScalarType,MV,OP>::getBlockSize() const {
02200     return(blockSize_); 
02201   }
02202 
02204   // return the current eigenproblem
02205   template <class ScalarType, class MV, class OP>
02206   const Eigenproblem<ScalarType,MV,OP>& LOBPCG<ScalarType,MV,OP>::getProblem() const { 
02207     return(*problem_); 
02208   }
02209 
02210   
02212   // return the max subspace dimension
02213   template <class ScalarType, class MV, class OP>
02214   int LOBPCG<ScalarType,MV,OP>::getMaxSubspaceDim() const {
02215     return 3*blockSize_;
02216   }
02217 
02219   // return the current subspace dimension
02220   template <class ScalarType, class MV, class OP>
02221   int LOBPCG<ScalarType,MV,OP>::getCurSubspaceDim() const {
02222     if (!initialized_) return 0;
02223     return nevLocal_;
02224   }
02225 
02226   
02228   // return the current ritz residual norms
02229   template <class ScalarType, class MV, class OP>
02230   std::vector<typename Teuchos::ScalarTraits<ScalarType>::magnitudeType> 
02231   LOBPCG<ScalarType,MV,OP>::getRitzRes2Norms() 
02232   {
02233     return this->getRes2Norms();
02234   }
02235 
02236   
02238   // return the current compression indices
02239   template <class ScalarType, class MV, class OP>
02240   std::vector<int> LOBPCG<ScalarType,MV,OP>::getRitzIndex() {
02241     std::vector<int> ret(nevLocal_,0);
02242     return ret;
02243   }
02244 
02245   
02247   // return the current ritz values
02248   template <class ScalarType, class MV, class OP>
02249   std::vector<Value<ScalarType> > LOBPCG<ScalarType,MV,OP>::getRitzValues() { 
02250     std::vector<Value<ScalarType> > ret(nevLocal_);
02251     for (int i=0; i<nevLocal_; i++) {
02252       ret[i].realpart = theta_[i];
02253       ret[i].imagpart = ZERO;
02254     }
02255     return ret;
02256   }
02257 
02259   // Set a new StatusTest for the solver.
02260   template <class ScalarType, class MV, class OP>
02261   void LOBPCG<ScalarType,MV,OP>::setStatusTest(Teuchos::RCP<StatusTest<ScalarType,MV,OP> > test) {
02262     TEUCHOS_TEST_FOR_EXCEPTION(test == Teuchos::null,std::invalid_argument,
02263         "Anasazi::LOBPCG::setStatusTest() was passed a null StatusTest.");
02264     tester_ = test;
02265   }
02266 
02268   // Get the current StatusTest used by the solver.
02269   template <class ScalarType, class MV, class OP>
02270   Teuchos::RCP<StatusTest<ScalarType,MV,OP> > LOBPCG<ScalarType,MV,OP>::getStatusTest() const {
02271     return tester_;
02272   }
02273   
02275   // return the current ritz vectors
02276   template <class ScalarType, class MV, class OP>
02277   Teuchos::RCP<const MV> LOBPCG<ScalarType,MV,OP>::getRitzVectors() {
02278     return X_;
02279   }
02280 
02281   
02283   // reset the iteration counter
02284   template <class ScalarType, class MV, class OP>
02285   void LOBPCG<ScalarType,MV,OP>::resetNumIters() {
02286     iter_=0; 
02287   }
02288 
02289   
02291   // return the number of iterations
02292   template <class ScalarType, class MV, class OP>
02293   int LOBPCG<ScalarType,MV,OP>::getNumIters() const { 
02294     return(iter_); 
02295   }
02296 
02297   
02299   // return the state
02300   template <class ScalarType, class MV, class OP>
02301   LOBPCGState<ScalarType,MV> LOBPCG<ScalarType,MV,OP>::getState() const {
02302     LOBPCGState<ScalarType,MV> state;
02303     state.V = V_;
02304     state.KV = KV_;
02305     state.X = X_;
02306     state.KX = KX_;
02307     state.P = P_;
02308     state.KP = KP_;
02309     state.H = H_;
02310     state.KH = KH_;
02311     state.R = R_;
02312     state.T = Teuchos::rcp(new std::vector<MagnitudeType>(theta_));
02313     if (hasM_) {
02314       state.MV = MV_;
02315       state.MX = MX_;
02316       state.MP = MP_;
02317       state.MH = MH_;
02318     }
02319     else {
02320       state.MX = Teuchos::null;
02321       state.MP = Teuchos::null;
02322       state.MH = Teuchos::null;
02323     }
02324     return state;
02325   }
02326 
02327 } // end Anasazi namespace
02328 
02329 #endif // ANASAZI_LOBPCG_HPP
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends