Anasazi Version of the Day
AnasaziTsqrOrthoManagerImpl.hpp
00001 // @HEADER
00002 // ***********************************************************************
00003 //
00004 //                 Anasazi: Block Eigensolvers Package
00005 //                 Copyright (2010) Sandia Corporation
00006 //
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 //
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00025 //
00026 // ***********************************************************************
00027 // @HEADER
00028 
00032 #ifndef __AnasaziTsqrOrthoManagerImpl_hpp
00033 #define __AnasaziTsqrOrthoManagerImpl_hpp
00034 
00035 #include "AnasaziConfigDefs.hpp"
00036 #include "AnasaziMultiVecTraits.hpp"
00037 #include "AnasaziOrthoManager.hpp" // OrthoError, etc.
00038 
00039 #include "Teuchos_as.hpp"
00040 #include "Teuchos_LAPACK.hpp"
00041 #include "Teuchos_ParameterList.hpp"
00042 #include "Teuchos_ParameterListAcceptorDefaultBase.hpp"
00043 #include <algorithm>
00044 
00045 
00046 namespace Anasazi {
00047 
00051   class TsqrOrthoError : public OrthoError {
00052   public: 
00053     TsqrOrthoError (const std::string& what_arg) : 
00054       OrthoError (what_arg) {}
00055   };
00056 
00076   class TsqrOrthoFault : public OrthoError {
00077   public: 
00078     TsqrOrthoFault (const std::string& what_arg) : 
00079       OrthoError (what_arg) {}
00080   };
00081 
00114   template<class Scalar, class MV>
00115   class TsqrOrthoManagerImpl :
00116     public Teuchos::ParameterListAcceptorDefaultBase {
00117   public:
00118     typedef Scalar scalar_type;
00119     typedef typename Teuchos::ScalarTraits<Scalar>::magnitudeType magnitude_type;
00120     typedef MV multivector_type;
00123     typedef Teuchos::SerialDenseMatrix<int, Scalar> mat_type;
00124     typedef Teuchos::RCP<mat_type> mat_ptr;
00125 
00126   private:
00127     typedef Teuchos::ScalarTraits<Scalar> SCT;
00128     typedef Teuchos::ScalarTraits<magnitude_type> SCTM;
00129     typedef MultiVecTraits<Scalar, MV> MVT;
00130     typedef MultiVecTraitsExt<Scalar, MV> MVText;
00131     typedef typename MVT::tsqr_adaptor_type tsqr_adaptor_type;
00132 
00133   public:
00141     Teuchos::RCP<const Teuchos::ParameterList> getValidParameters () const;
00142 
00144     void setParameterList (const Teuchos::RCP<Teuchos::ParameterList>& params);
00145 
00156     Teuchos::RCP<const Teuchos::ParameterList> getFastParameters ();
00157 
00174     TsqrOrthoManagerImpl (const Teuchos::RCP<Teuchos::ParameterList>& params,
00175                           const std::string& label);
00176 
00181     TsqrOrthoManagerImpl (const std::string& label);
00182 
00190     void setLabel (const std::string& label) { 
00191       if (label != label_) {
00192         label_ = label; 
00193       }
00194     }
00195 
00197     const std::string& getLabel () const { return label_; }
00198 
00207     void 
00208     innerProd (const MV& X, const MV& Y, mat_type& Z) const
00209     {
00210       MVT::MvTransMv (SCT::one(), X, Y, Z);
00211     }
00212 
00230     void
00231     norm (const MV& X, std::vector<magnitude_type>& normvec) const;
00232 
00242     void 
00243     project (MV& X, 
00244              Teuchos::Array<mat_ptr> C, 
00245              Teuchos::ArrayView<Teuchos::RCP<const MV> > Q);
00246 
00260     int normalize (MV& X, mat_ptr B);
00261 
00280     int 
00281     normalizeOutOfPlace (MV& X, MV& Q, mat_ptr B);
00282 
00296     int 
00297     projectAndNormalize (MV &X,
00298                          Teuchos::Array<mat_ptr> C,
00299                          mat_ptr B,
00300                          Teuchos::ArrayView<Teuchos::RCP<const MV> > Q)
00301     {
00302       // "false" means we work on X in place.  The second argument is
00303       // not read or written in that case.
00304       return projectAndNormalizeImpl (X, X, false, C, B, Q);
00305     }
00306 
00326     int 
00327     projectAndNormalizeOutOfPlace (MV& X_in, 
00328                                    MV& X_out,
00329                                    Teuchos::Array<mat_ptr> C,
00330                                    mat_ptr B,
00331                                    Teuchos::ArrayView<Teuchos::RCP<const MV> > Q)
00332     {
00333       // "true" means we work on X_in out of place, writing the
00334       // results into X_out.
00335       return projectAndNormalizeImpl (X_in, X_out, true, C, B, Q);
00336     }
00337 
00342     magnitude_type
00343     orthonormError (const MV &X) const
00344     {
00345       const Scalar ONE = SCT::one();
00346       const int ncols = MVT::GetNumberVecs(X);
00347       mat_type XTX (ncols, ncols);
00348       innerProd (X, X, XTX);
00349       for (int k = 0; k < ncols; ++k) {
00350         XTX(k,k) -= ONE;
00351       }
00352       return XTX.normFrobenius();
00353     }
00354 
00356     magnitude_type 
00357     orthogError (const MV &X1, 
00358                  const MV &X2) const
00359     {
00360       const int ncols_X1 = MVT::GetNumberVecs (X1);
00361       const int ncols_X2 = MVT::GetNumberVecs (X2);
00362       mat_type X1_T_X2 (ncols_X1, ncols_X2);
00363       innerProd (X1, X2, X1_T_X2);
00364       return X1_T_X2.normFrobenius();
00365     }
00366 
00370     magnitude_type blockReorthogThreshold() const { return blockReorthogThreshold_; }
00371 
00374     magnitude_type relativeRankTolerance() const { return relativeRankTolerance_; }
00375 
00376   private:
00378     Teuchos::RCP<Teuchos::ParameterList> params_;
00379 
00381     mutable Teuchos::RCP<const Teuchos::ParameterList> defaultParams_;
00382 
00384     std::string label_;
00385 
00387     tsqr_adaptor_type tsqrAdaptor_;
00388 
00398     Teuchos::RCP<MV> Q_;
00399 
00401     magnitude_type eps_;
00402 
00403 
00407     bool randomizeNullSpace_;
00408 
00414     bool reorthogonalizeBlocks_;
00415 
00419     bool throwOnReorthogFault_;
00420 
00422     magnitude_type blockReorthogThreshold_;
00423 
00425     magnitude_type relativeRankTolerance_;
00426 
00433     bool forceNonnegativeDiagonal_;
00434 
00436     void
00437     raiseReorthogFault (const std::vector<magnitude_type>& normsAfterFirstPass,
00438                         const std::vector<magnitude_type>& normsAfterSecondPass,
00439                         const std::vector<int>& faultIndices);
00440 
00450     void
00451     checkProjectionDims (int& ncols_X, 
00452                          int& num_Q_blocks,
00453                          int& ncols_Q_total,
00454                          const MV& X, 
00455                          Teuchos::ArrayView<Teuchos::RCP<const MV> > Q) const;
00456 
00467     void
00468     allocateProjectionCoefficients (Teuchos::Array<mat_ptr>& C, 
00469                                     Teuchos::ArrayView<Teuchos::RCP<const MV> > Q,
00470                                     const MV& X,
00471                                     const bool attemptToRecycle = true) const;
00472 
00481     int 
00482     projectAndNormalizeImpl (MV& X_in, 
00483                              MV& X_out,
00484                              const bool outOfPlace,
00485                              Teuchos::Array<mat_ptr> C,
00486                              mat_ptr B,
00487                              Teuchos::ArrayView<Teuchos::RCP<const MV> > Q);
00488 
00494     void
00495     rawProject (MV& X, 
00496                 Teuchos::ArrayView<Teuchos::RCP<const MV> > Q,
00497                 Teuchos::ArrayView<mat_ptr> C) const;
00498 
00500     void
00501     rawProject (MV& X, 
00502                 const Teuchos::RCP<const MV>& Q, 
00503                 const mat_ptr& C) const;
00504 
00531     int rawNormalize (MV& X, MV& Q, mat_type& B);
00532 
00550     int normalizeOne (MV& X, mat_ptr B) const;
00551 
00579     int normalizeImpl (MV& X, MV& Q, mat_ptr B, const bool outOfPlace);
00580   };
00581 
00582   template<class Scalar, class MV>
00583   void
00584   TsqrOrthoManagerImpl<Scalar, MV>::
00585   setParameterList (const Teuchos::RCP<Teuchos::ParameterList>& params)
00586   {
00587     using Teuchos::ParameterList;
00588     using Teuchos::parameterList;
00589     using Teuchos::RCP;
00590     using Teuchos::sublist;
00591     typedef magnitude_type M; // abbreviation.
00592 
00593     RCP<const ParameterList> defaultParams = getValidParameters ();
00594     // Sublist of TSQR implementation parameters; to get below.
00595     RCP<ParameterList> tsqrParams;
00596 
00597     RCP<ParameterList> theParams;
00598     if (params.is_null()) {
00599       theParams = parameterList (*defaultParams);
00600     } else {
00601       theParams = params;
00602 
00603       // Don't call validateParametersAndSetDefaults(); we prefer to
00604       // ignore parameters that we don't recognize, at least for now.
00605       // However, we do fill in missing parameters with defaults.
00606 
00607       randomizeNullSpace_ = 
00608         theParams->get<bool> ("randomizeNullSpace", 
00609                               defaultParams->get<bool> ("randomizeNullSpace"));
00610       reorthogonalizeBlocks_ = 
00611         theParams->get<bool> ("reorthogonalizeBlocks", 
00612                               defaultParams->get<bool> ("reorthogonalizeBlocks"));
00613       throwOnReorthogFault_ = 
00614         theParams->get<bool> ("throwOnReorthogFault", 
00615                               defaultParams->get<bool> ("throwOnReorthogFault"));
00616       blockReorthogThreshold_ = 
00617         theParams->get<M> ("blockReorthogThreshold",
00618                            defaultParams->get<M> ("blockReorthogThreshold"));
00619       relativeRankTolerance_ = 
00620         theParams->get<M> ("relativeRankTolerance", 
00621                            defaultParams->get<M> ("relativeRankTolerance"));
00622       forceNonnegativeDiagonal_ = 
00623         theParams->get<bool> ("forceNonnegativeDiagonal", 
00624                               defaultParams->get<bool> ("forceNonnegativeDiagonal"));
00625 
00626       // Get the sublist of TSQR implementation parameters.  Use the
00627       // default sublist if one isn't provided.
00628       if (! theParams->isSublist ("TSQR implementation")) {
00629         theParams->set ("TSQR implementation", 
00630                         defaultParams->sublist ("TSQR implementation"));
00631       }
00632       tsqrParams = sublist (theParams, "TSQR implementation", true);
00633     }
00634 
00635     // Send the TSQR implementation parameters to the TSQR adaptor.
00636     tsqrAdaptor_.setParameterList (tsqrParams);
00637 
00638     // Save the input parameter list.
00639     setMyParamList (theParams);
00640   }
00641 
00642   template<class Scalar, class MV>
00643   TsqrOrthoManagerImpl<Scalar, MV>::
00644   TsqrOrthoManagerImpl (const Teuchos::RCP<Teuchos::ParameterList>& params,
00645                         const std::string& label) :
00646     label_ (label),
00647     Q_ (Teuchos::null),               // Initialized on demand
00648     eps_ (SCTM::eps()),               // Machine precision
00649     randomizeNullSpace_ (true),
00650     reorthogonalizeBlocks_ (true),
00651     throwOnReorthogFault_ (false),
00652     blockReorthogThreshold_ (0),
00653     relativeRankTolerance_ (0),
00654     forceNonnegativeDiagonal_ (false)
00655   {
00656     setParameterList (params); // This also sets tsqrAdaptor_'s parameters.
00657   }
00658 
00659   template<class Scalar, class MV>
00660   TsqrOrthoManagerImpl<Scalar, MV>::
00661   TsqrOrthoManagerImpl (const std::string& label) :
00662     label_ (label),
00663     Q_ (Teuchos::null),               // Initialized on demand
00664     eps_ (SCTM::eps()),               // Machine precision
00665     randomizeNullSpace_ (true),
00666     reorthogonalizeBlocks_ (true),
00667     throwOnReorthogFault_ (false),
00668     blockReorthogThreshold_ (0),
00669     relativeRankTolerance_ (0), 
00670     forceNonnegativeDiagonal_ (false) 
00671   {
00672     setParameterList (Teuchos::null); // Set default parameters.
00673   }
00674 
00675   template<class Scalar, class MV>
00676   void
00677   TsqrOrthoManagerImpl<Scalar, MV>::
00678   norm (const MV& X, std::vector<magnitude_type>& normVec) const
00679   {
00680     const int numCols = MVT::GetNumberVecs (X);
00681     // std::vector<T>::size_type is unsigned; int is signed.  Mixed
00682     // unsigned/signed comparisons trigger compiler warnings.
00683     if (normVec.size() < static_cast<size_t>(numCols))
00684       normVec.resize (numCols); // Resize normvec if necessary.
00685     MVT::MvNorm (X, normVec);
00686   }
00687 
00688   template<class Scalar, class MV>
00689   void
00690   TsqrOrthoManagerImpl<Scalar, MV>::project (MV& X, 
00691                                              Teuchos::Array<mat_ptr> C,
00692                                              Teuchos::ArrayView<Teuchos::RCP<const MV> > Q)
00693   {
00694     int ncols_X, num_Q_blocks, ncols_Q_total;
00695     checkProjectionDims (ncols_X, num_Q_blocks, ncols_Q_total, X, Q);
00696     // Test for quick exit: any dimension of X is zero, or there are
00697     // zero Q blocks, or the total number of columns of the Q blocks
00698     // is zero.
00699     if (ncols_X == 0 || num_Q_blocks == 0 || ncols_Q_total == 0)
00700       return;
00701 
00702     // Make space for first-pass projection coefficients
00703     allocateProjectionCoefficients (C, Q, X, true);
00704 
00705     // We only use columnNormsBefore and compute pre-projection column
00706     // norms if doing block reorthogonalization.
00707     std::vector<magnitude_type> columnNormsBefore (ncols_X, magnitude_type(0));
00708     if (reorthogonalizeBlocks_)
00709       MVT::MvNorm (X, columnNormsBefore);
00710 
00711     // Project (first block orthogonalization step): 
00712     // C := Q^* X, X := X - Q C.
00713     rawProject (X, Q, C); 
00714 
00715     // If we are doing block reorthogonalization, reorthogonalize X if
00716     // necessary.
00717     if (reorthogonalizeBlocks_)
00718       {
00719         std::vector<magnitude_type> columnNormsAfter (ncols_X, magnitude_type(0));
00720         MVT::MvNorm (X, columnNormsAfter);
00721         
00722         // Relative block reorthogonalization threshold
00723         const magnitude_type relThres = blockReorthogThreshold();
00724         // Reorthogonalize X if any of its column norms decreased by a
00725         // factor more than the block reorthogonalization threshold.
00726         // Don't bother trying to subset the columns; that will make
00727         // the columns noncontiguous and thus hinder BLAS 3
00728         // optimizations.
00729         bool reorthogonalize = false;
00730         for (int j = 0; j < ncols_X; ++j)
00731           if (columnNormsAfter[j] < relThres * columnNormsBefore[j])
00732             {
00733               reorthogonalize = true;
00734               break;
00735             }
00736         if (reorthogonalize)
00737           {
00738             // Second-pass projection coefficients
00739             Teuchos::Array<mat_ptr> C2;
00740             allocateProjectionCoefficients (C2, Q, X, false);
00741 
00742             // Perform the second projection pass:
00743             // C2 = Q' X, X = X - Q*C2
00744             rawProject (X, Q, C2);
00745             // Update the projection coefficients
00746             for (int k = 0; k < num_Q_blocks; ++k)
00747               *C[k] += *C2[k];
00748           }
00749       }
00750   }
00751 
00752 
00753 
00754   template<class Scalar, class MV>
00755   int 
00756   TsqrOrthoManagerImpl<Scalar, MV>::normalize (MV& X, mat_ptr B)
00757   {
00758     using Teuchos::Range1D;
00759     using Teuchos::RCP;
00760 
00761     // MVT returns int for this, even though the "local ordinal
00762     // type" of the MV may be some other type (for example,
00763     // Tpetra::MultiVector<double, int32_t, int64_t, ...>).
00764     const int numCols = MVT::GetNumberVecs (X);
00765 
00766     // This special case (for X having only one column) makes
00767     // TsqrOrthoManagerImpl equivalent to Modified Gram-Schmidt
00768     // orthogonalization with conditional full reorthogonalization,
00769     // if all multivector inputs have only one column.  It also
00770     // avoids allocating Q_ scratch space and copying data when we
00771     // don't need to invoke TSQR at all.
00772     if (numCols == 1) {
00773       return normalizeOne (X, B);
00774     }
00775 
00776     // We use Q_ as scratch space for the normalization, since TSQR
00777     // requires a scratch multivector (it can't factor in place).  Q_
00778     // should come from a vector space compatible with X's vector
00779     // space, and Q_ should have at least as many columns as X.
00780     // Otherwise, we have to reallocate.  We also have to allocate
00781     // (not "re-") Q_ if we haven't allocated it before.  (We can't
00782     // allocate Q_ until we have some X, so we need a multivector as
00783     // the "prototype.")
00784     //
00785     // NOTE (mfh 11 Jan 2011) We only increase the number of columsn
00786     // in Q_, never decrease.  This is OK for typical uses of TSQR,
00787     // but you might prefer different behavior in some cases.
00788     //
00789     // NOTE (mfh 10 Mar 2011) We should only reuse the scratch space
00790     // Q_ if X and Q_ have compatible data distributions.  However,
00791     // Belos' current MultiVecTraits interface does not let us check
00792     // for this.  Thus, we can only check whether X and Q_ have the
00793     // same number of rows.  This will behave correctly for the common
00794     // case in Belos that all multivectors with the same number of
00795     // rows have the same data distribution.
00796     //
00797     // The specific MV implementation may do more checks than this on
00798     // its own and throw an exception if X and Q_ are not compatible,
00799     // but it may not.  If you find that recycling the Q_ space causes
00800     // troubles, you may consider modifying the code below to
00801     // reallocate Q_ for every X that comes in.  
00802     if (Q_.is_null() || 
00803         MVText::GetGlobalLength(*Q_) != MVText::GetGlobalLength(X) ||
00804         numCols > MVT::GetNumberVecs (*Q_)) {
00805       Q_ = MVT::Clone (X, numCols);
00806     }
00807 
00808     // normalizeImpl() wants the second MV argument to have the same
00809     // number of columns as X.  To ensure this, we pass it a view of
00810     // Q_ if Q_ has more columns than X.  (This is possible if we
00811     // previously called normalize() with a different multivector,
00812     // since we never reallocate Q_ if it has more columns than
00813     // necessary.)
00814     if (MVT::GetNumberVecs(*Q_) == numCols) {
00815       return normalizeImpl (X, *Q_, B, false);
00816     } else {
00817       RCP<MV> Q_view = MVT::CloneViewNonConst (*Q_, Range1D(0, numCols-1));
00818       return normalizeImpl (X, *Q_view, B, false);
00819     }
00820   }
00821 
00822   template<class Scalar, class MV>
00823   void
00824   TsqrOrthoManagerImpl<Scalar, MV>::
00825   allocateProjectionCoefficients (Teuchos::Array<mat_ptr>& C, 
00826                                   Teuchos::ArrayView<Teuchos::RCP<const MV> > Q,
00827                                   const MV& X,
00828                                   const bool attemptToRecycle) const
00829   {
00830     const int num_Q_blocks = Q.size();
00831     const int ncols_X = MVT::GetNumberVecs (X);
00832     C.resize (num_Q_blocks);
00833     if (attemptToRecycle)
00834       {
00835         for (int i = 0; i < num_Q_blocks; ++i) 
00836           {
00837             const int ncols_Qi = MVT::GetNumberVecs (*Q[i]);
00838             // Create a new C[i] if necessary, otherwise resize if
00839             // necessary, otherwise fill with zeros.
00840             if (C[i].is_null())
00841               C[i] = rcp (new mat_type (ncols_Qi, ncols_X));
00842             else 
00843               {
00844                 mat_type& Ci = *C[i];
00845                 if (Ci.numRows() != ncols_Qi || Ci.numCols() != ncols_X)
00846                   Ci.shape (ncols_Qi, ncols_X);
00847                 else
00848                   Ci.putScalar (SCT::zero());
00849               }
00850           }
00851       }
00852     else
00853       {
00854         for (int i = 0; i < num_Q_blocks; ++i) 
00855           {
00856             const int ncols_Qi = MVT::GetNumberVecs (*Q[i]);
00857             C[i] = rcp (new mat_type (ncols_Qi, ncols_X));
00858           }
00859       }
00860   }
00861 
00862   template<class Scalar, class MV>
00863   int 
00864   TsqrOrthoManagerImpl<Scalar, MV>::
00865   normalizeOutOfPlace (MV& X, MV& Q, mat_ptr B)
00866   {
00867     const int numVecs = MVT::GetNumberVecs(X);
00868     if (numVecs == 0) {
00869       return 0; // Nothing to do.
00870     } else if (numVecs == 1) {
00871       // Special case for a single column; scale and copy over.
00872       using Teuchos::Range1D;
00873       using Teuchos::RCP;
00874       using Teuchos::rcp;
00875 
00876       // Normalize X in place (faster than TSQR for one column).
00877       const int rank = normalizeOne (X, B);
00878       // Copy results to first column of Q.
00879       RCP<MV> Q_0 = MVT::CloneViewNonConst (Q, Range1D(0,0));
00880       MVT::Assign (X, *Q_0);
00881       return rank;
00882     } else {
00883       // The "true" argument to normalizeImpl() means the output
00884       // vectors go into Q, and the contents of X are overwritten with
00885       // invalid values.
00886       return normalizeImpl (X, Q, B, true);
00887     }
00888   }
00889 
00890   template<class Scalar, class MV>
00891   int 
00892   TsqrOrthoManagerImpl<Scalar, MV>::
00893   projectAndNormalizeImpl (MV& X_in, 
00894                            MV& X_out, // Only written if outOfPlace==false.
00895                            const bool outOfPlace,
00896                            Teuchos::Array<mat_ptr> C,
00897                            mat_ptr B,
00898                            Teuchos::ArrayView<Teuchos::RCP<const MV> > Q)
00899   {
00900     using Teuchos::Range1D;
00901     using Teuchos::RCP;
00902     using Teuchos::rcp;
00903 
00904     if (outOfPlace) {
00905       // Make sure that X_out has at least as many columns as X_in.
00906       TEUCHOS_TEST_FOR_EXCEPTION(MVT::GetNumberVecs(X_out) < MVT::GetNumberVecs(X_in),
00907                          std::invalid_argument, 
00908                          "Belos::TsqrOrthoManagerImpl::"
00909                          "projectAndNormalizeOutOfPlace(...):"
00910                          "X_out has " << MVT::GetNumberVecs(X_out) 
00911                          << " columns, but X_in has "
00912                          << MVT::GetNumberVecs(X_in) << " columns.");
00913     }
00914     // Fetch dimensions of X_in and Q, and allocate space for first-
00915     // and second-pass projection coefficients (C resp. C2).
00916     int ncols_X, num_Q_blocks, ncols_Q_total;
00917     checkProjectionDims (ncols_X, num_Q_blocks, ncols_Q_total, X_in, Q);
00918 
00919     // Test for quick exit: if any dimension of X is zero.
00920     if (ncols_X == 0) {
00921       return 0;
00922     }
00923     // If there are zero Q blocks or zero Q columns, just normalize!
00924     if (num_Q_blocks == 0 || ncols_Q_total == 0) {
00925       if (outOfPlace) {
00926         return normalizeOutOfPlace (X_in, X_out, B);
00927       } else {
00928         return normalize (X_in, B);
00929       }
00930     }
00931 
00932     // The typical case is that the entries of C have been allocated
00933     // before, so we attempt to recycle the allocations.  The call
00934     // below will reallocate if it cannot recycle.
00935     allocateProjectionCoefficients (C, Q, X_in, true);
00936 
00937     // If we are doing block reorthogonalization, then compute the
00938     // column norms of X before projecting for the first time.  This
00939     // will help us decide whether we need to reorthogonalize X.
00940     std::vector<magnitude_type> normsBeforeFirstPass (ncols_X, SCTM::zero());
00941     if (reorthogonalizeBlocks_) {
00942       MVT::MvNorm (X_in, normsBeforeFirstPass);
00943     }
00944 
00945     // First (Modified) Block Gram-Schmidt pass, in place in X_in.
00946     rawProject (X_in, Q, C);
00947 
00948     // Make space for the normalization coefficients.  This will
00949     // either be a freshly allocated matrix (if B is null), or a view
00950     // of the appropriately sized upper left submatrix of *B (if B is
00951     // not null).
00952     //
00953     // Note that if we let the normalize() routine allocate (in the
00954     // case that B is null), that storage will go away at the end of
00955     // normalize().  (This is because it passes the RCP by value, not
00956     // by reference.)
00957     mat_ptr B_out;
00958     if (B.is_null()) {
00959       B_out = rcp (new mat_type (ncols_X, ncols_X));
00960     } else {
00961       // Make sure that B is no smaller than numCols x numCols.
00962       TEUCHOS_TEST_FOR_EXCEPTION(B->numRows() < ncols_X || B->numCols() < ncols_X,
00963                          std::invalid_argument,
00964                          "normalizeOne: Input matrix B must be at "
00965                          "least " << ncols_X << " x " << ncols_X 
00966                          << ", but is instead " << B->numRows()
00967                          << " x " << B->numCols() << ".");
00968       // Create a view of the ncols_X by ncols_X upper left
00969       // submatrix of *B.  TSQR will write the normalization
00970       // coefficients there.
00971       B_out = rcp (new mat_type (Teuchos::View, *B, ncols_X, ncols_X));
00972     }
00973 
00974     // Rank of X(_in) after first projection pass.  If outOfPlace,
00975     // this overwrites X_in with invalid values, and the results go in
00976     // X_out.  Otherwise, it's done in place in X_in.
00977     const int firstPassRank = outOfPlace ? 
00978       normalizeOutOfPlace (X_in, X_out, B_out) : 
00979       normalize (X_in, B_out);
00980     if (B.is_null()) {
00981       // The input matrix B is null, so assign B_out to it.  If B was
00982       // not null on input, then B_out is a view of *B, so we don't
00983       // have to do anything here.  Note that SerialDenseMatrix uses
00984       // raw pointers to store data and represent views, so we have to
00985       // be careful about scope.
00986       B = B_out;
00987     }
00988     int rank = firstPassRank; // Current rank of X.
00989 
00990     // If X was not full rank after projection and randomizeNullSpace_
00991     // is true, then normalize(OutOfPlace)() replaced the null space
00992     // basis of X with random vectors, and orthogonalized them against
00993     // the column space basis of X.  However, we still need to
00994     // orthogonalize the random vectors against the Q[i], after which
00995     // we will need to renormalize them.
00996     //
00997     // If outOfPlace, then we need to work in X_out (where
00998     // normalizeOutOfPlace() wrote the normalized vectors).
00999     // Otherwise, we need to work in X_in.
01000     //
01001     // Note: We don't need to keep the new projection coefficients,
01002     // since they are multiplied by the "small" part of B
01003     // corresponding to the null space of the original X.
01004     if (firstPassRank < ncols_X && randomizeNullSpace_) {
01005       const int numNullSpaceCols = ncols_X - firstPassRank;
01006       const Range1D nullSpaceIndices (firstPassRank, ncols_X - 1);
01007 
01008       // Space for projection coefficients (will be thrown away)
01009       Teuchos::Array<mat_ptr> C_null (num_Q_blocks);
01010       for (int k = 0; k < num_Q_blocks; ++k) {
01011         const int numColsQk = MVT::GetNumberVecs(*Q[k]);
01012         C_null[k] = rcp (new mat_type (numColsQk, numNullSpaceCols));
01013       }
01014       // Space for normalization coefficients (will be thrown away).
01015       RCP<mat_type> B_null (new mat_type (numNullSpaceCols, numNullSpaceCols));
01016 
01017       int randomVectorsRank;
01018       if (outOfPlace) {
01019         // View of the null space basis columns of X.
01020         // normalizeOutOfPlace() wrote them into X_out.
01021         RCP<MV> X_out_null = MVT::CloneViewNonConst (X_out, nullSpaceIndices);
01022         // Use X_in for scratch space.  Copy X_out_null into the
01023         // last few columns of X_in (X_in_null) and do projections
01024         // in there.  (This saves us a copy wen we renormalize
01025         // (out of place) back into X_out.)
01026         RCP<MV> X_in_null = MVT::CloneViewNonConst (X_in, nullSpaceIndices);
01027         MVT::Assign (*X_out_null, *X_in_null);
01028         // Project the new random vectors against the Q blocks, and
01029         // renormalize the result into X_out_null.  
01030         rawProject (*X_in_null, Q, C_null);
01031         randomVectorsRank = normalizeOutOfPlace (*X_in_null, *X_out_null, B_null);
01032       } else {
01033         // View of the null space columns of X.  
01034         // They live in X_in.
01035         RCP<MV> X_null = MVT::CloneViewNonConst (X_in, nullSpaceIndices);
01036         // Project the new random vectors against the Q blocks,
01037         // and renormalize the result (in place).
01038         rawProject (*X_null, Q, C_null);
01039         randomVectorsRank = normalize (*X_null, B_null);
01040       }
01041       // While unusual, it is still possible for the random data not
01042       // to be full rank after projection and normalization.  In that
01043       // case, we could try another set of random data and recurse as
01044       // necessary, but instead for now we just raise an exception.
01045       TEUCHOS_TEST_FOR_EXCEPTION(randomVectorsRank != numNullSpaceCols, 
01046                          TsqrOrthoError, 
01047                          "Belos::TsqrOrthoManagerImpl::projectAndNormalize"
01048                          "OutOfPlace(): After projecting and normalizing the "
01049                          "random vectors (used to replace the null space "
01050                          "basis vectors from normalizing X), they have rank "
01051                          << randomVectorsRank << ", but should have full "
01052                          "rank " << numNullSpaceCols << ".");
01053     }
01054 
01055     // Whether or not X_in was full rank after projection, we still
01056     // might want to reorthogonalize against Q.
01057     if (reorthogonalizeBlocks_) {
01058       // We are only interested in the column space basis of X
01059       // resp. X_out.
01060       std::vector<magnitude_type> 
01061         normsAfterFirstPass (firstPassRank, SCTM::zero());
01062       std::vector<magnitude_type> 
01063         normsAfterSecondPass (firstPassRank, SCTM::zero());
01064 
01065       // Compute post-first-pass (pre-normalization) norms.  We could
01066       // have done that using MVT::MvNorm() on X_in after projecting,
01067       // but before the first normalization.  However, that operation
01068       // may be expensive.  It is also unnecessary: after calling
01069       // normalize(), the 2-norm of B(:,j) is the 2-norm of X_in(:,j)
01070       // before normalization, in exact arithmetic.
01071       //
01072       // NOTE (mfh 06 Nov 2010) This is one way that combining
01073       // projection and normalization into a single kernel --
01074       // projectAndNormalize() -- pays off.  In project(), we have to
01075       // compute column norms of X before and after projection.  Here,
01076       // we get them for free from the normalization coefficients.
01077       Teuchos::BLAS<int, Scalar> blas;
01078       for (int j = 0; j < firstPassRank; ++j) {
01079         const Scalar* const B_j = &(*B_out)(0,j);
01080         // Teuchos::BLAS::NRM2 returns a magnitude_type result on
01081         // Scalar inputs.
01082         normsAfterFirstPass[j] = blas.NRM2 (firstPassRank, B_j, 1);
01083       }
01084       // Test whether any of the norms dropped below the
01085       // reorthogonalization threshold.
01086       bool reorthogonalize = false;
01087       for (int j = 0; j < firstPassRank; ++j) {
01088         // If any column's norm decreased too much, mark this block
01089         // for reorthogonalization.  Note that this test will _not_
01090         // activate reorthogonalization if a column's norm before the
01091         // first project-and-normalize step was zero.  It _will_
01092         // activate reorthogonalization if the column's norm before
01093         // was not zero, but is zero now.
01094         const magnitude_type curThreshold = 
01095           blockReorthogThreshold() * normsBeforeFirstPass[j];
01096         if (normsAfterFirstPass[j] < curThreshold) {
01097           reorthogonalize = true; 
01098           break;
01099         }
01100       }
01101       // Perform another Block Gram-Schmidt pass if necessary.  "Twice
01102       // is enough" (Kahan's theorem) for a Krylov method, unless
01103       // (using Stewart's term) there is an "orthogonalization fault"
01104       // (indicated by reorthogFault).
01105       //
01106       // NOTE (mfh 07 Nov 2010) For now, we include the entire block
01107       // of X, including possible random data (that was already
01108       // projected and normalized above).  It might make more sense
01109       // just to process the first firstPassRank columns of X.
01110       // However, the resulting reorthogonalization should still be
01111       // correct regardless.
01112       bool reorthogFault = false;
01113       // Indices of X at which there was an orthogonalization fault.
01114       std::vector<int> faultIndices;
01115       if (reorthogonalize) {
01116         using Teuchos::Copy;
01117         using Teuchos::NO_TRANS;
01118 
01119         // If we're using out-of-place normalization, copy X_out
01120         // (results of first project and normalize pass) back into
01121         // X_in, for the second project and normalize pass.
01122         if (outOfPlace)
01123           MVT::Assign (X_out, X_in);
01124 
01125         // C2 is only used internally, so we know that we are
01126         // allocating fresh and not recycling allocations.  Stating
01127         // this lets us save time checking dimensions.
01128         Teuchos::Array<mat_ptr> C2;
01129         allocateProjectionCoefficients (C2, Q, X_in, false);
01130 
01131         // Block Gram-Schmidt (again).  Delay updating the block
01132         // coefficients until we have the new normalization
01133         // coefficients, which we need in order to do the update.
01134         rawProject (X_in, Q, C2);
01135             
01136         // Coefficients for (re)normalization of X_in.
01137         RCP<mat_type> B2 (new mat_type (ncols_X, ncols_X));
01138 
01139         // Normalize X_in (into X_out, if working out of place).
01140         const int secondPassRank = outOfPlace ? 
01141           normalizeOutOfPlace (X_in, X_out, B2) : 
01142           normalize (X_in, B2);
01143         rank = secondPassRank; // Current rank of X
01144 
01145         // Update normalization coefficients.  We begin with copying
01146         // B_out, since the BLAS' _GEMM routine doesn't let us alias
01147         // its input and output arguments.
01148         mat_type B_copy (Copy, *B_out, B_out->numRows(), B_out->numCols());
01149         // B_out := B2 * B_out (where input B_out is in B_copy).
01150         const int err = B_out->multiply (NO_TRANS, NO_TRANS, SCT::one(), 
01151                                          *B2, B_copy, SCT::zero());
01152         TEUCHOS_TEST_FOR_EXCEPTION(err != 0, std::logic_error, 
01153                            "Teuchos::SerialDenseMatrix::multiply "
01154                            "returned err = " << err << " != 0");
01155         // Update the block coefficients from the projection step.  We
01156         // use B_copy for this (a copy of B_out, the first-pass
01157         // normalization coefficients).
01158         for (int k = 0; k < num_Q_blocks; ++k) {
01159           mat_type C_k_copy (Copy, *C[k], C[k]->numRows(), C[k]->numCols());
01160 
01161           // C[k] := C2[k]*B_copy + C[k].
01162           const int err2 = C[k]->multiply (NO_TRANS, NO_TRANS, SCT::one(), 
01163                                           *C2[k], B_copy, SCT::one());
01164           TEUCHOS_TEST_FOR_EXCEPTION(err2 != 0, std::logic_error, 
01165                              "Teuchos::SerialDenseMatrix::multiply "
01166                              "returned err = " << err << " != 0");
01167         }
01168         // Compute post-second-pass (pre-normalization) norms, using
01169         // B2 (the coefficients from the second normalization step) in
01170         // the same way as with B_out before.
01171         for (int j = 0; j < rank; ++j) {
01172           const Scalar* const B2_j = &(*B2)(0,j);
01173           normsAfterSecondPass[j] = blas.NRM2 (rank, B2_j, 1);
01174         }
01175         // Test whether any of the norms dropped below the
01176         // reorthogonalization threshold.  If so, it's an
01177         // orthogonalization fault, which requires expensive recovery.
01178         reorthogFault = false;
01179         for (int j = 0; j < rank; ++j) {
01180           const magnitude_type relativeLowerBound = 
01181             blockReorthogThreshold() * normsAfterFirstPass[j];
01182           if (normsAfterSecondPass[j] < relativeLowerBound) {
01183             reorthogFault = true; 
01184             faultIndices.push_back (j);
01185           }
01186         }
01187       } // if (reorthogonalize) // reorthogonalization pass
01188 
01189       if (reorthogFault) {
01190         if (throwOnReorthogFault_) {
01191           raiseReorthogFault (normsAfterFirstPass, 
01192                               normsAfterSecondPass, 
01193                               faultIndices);
01194         } else {
01195           // NOTE (mfh 19 Jan 2011) We could handle the fault here by
01196           // slowly reorthogonalizing, one vector at a time, the
01197           // offending vectors of X.  However, we choose not to
01198           // implement this for now.  If it becomes a problem, let us
01199           // know and we will prioritize implementing this.
01200           TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, 
01201                              "TsqrOrthoManagerImpl has not yet implemented"
01202                              " recovery from an orthogonalization fault.");
01203         }
01204       }
01205     } // if (reorthogonalizeBlocks_)
01206     return rank;
01207   }
01208 
01209 
01210   template<class Scalar, class MV>
01211   void
01212   TsqrOrthoManagerImpl<Scalar, MV>::
01213   raiseReorthogFault (const std::vector<magnitude_type>& normsAfterFirstPass,
01214                       const std::vector<magnitude_type>& normsAfterSecondPass,
01215                       const std::vector<int>& faultIndices)
01216   {
01217     using std::endl;
01218     typedef std::vector<int>::size_type size_type;
01219     std::ostringstream os;
01220                   
01221     os << "Orthogonalization fault at the following column(s) of X:" << endl;
01222     os << "Column\tNorm decrease factor" << endl;
01223     for (size_type k = 0; k < faultIndices.size(); ++k)
01224       {
01225         const int index = faultIndices[k];
01226         const magnitude_type decreaseFactor = 
01227           normsAfterSecondPass[index] / normsAfterFirstPass[index];
01228         os << index << "\t" << decreaseFactor << endl;
01229       }
01230     throw TsqrOrthoFault (os.str());
01231   }
01232 
01233   template<class Scalar, class MV>
01234   Teuchos::RCP<const Teuchos::ParameterList>
01235   TsqrOrthoManagerImpl<Scalar, MV>::getValidParameters () const
01236   {
01237     using Teuchos::ParameterList;
01238     using Teuchos::parameterList;
01239     using Teuchos::RCP;
01240 
01241     if (defaultParams_.is_null()) {
01242       RCP<ParameterList> params = parameterList ("TsqrOrthoManagerImpl");
01243       //
01244       // TSQR parameters (set as a sublist).
01245       //
01246       params->set ("TSQR implementation", *(tsqrAdaptor_.getValidParameters()),
01247                    "TSQR implementation parameters.");
01248       // 
01249       // Orthogonalization parameters
01250       //
01251       const bool defaultRandomizeNullSpace = true;
01252       params->set ("randomizeNullSpace", defaultRandomizeNullSpace, 
01253                    "Whether to fill in null space vectors with random data.");
01254 
01255       const bool defaultReorthogonalizeBlocks = true;
01256       params->set ("reorthogonalizeBlocks", defaultReorthogonalizeBlocks,
01257                    "Whether to do block reorthogonalization as necessary.");
01258 
01259       // This parameter corresponds to the "blk_tol_" parameter in
01260       // Belos' DGKSOrthoManager.  We choose the same default value.
01261       const magnitude_type defaultBlockReorthogThreshold = 
01262         magnitude_type(10) * SCTM::squareroot (SCTM::eps());
01263       params->set ("blockReorthogThreshold", defaultBlockReorthogThreshold, 
01264                    "If reorthogonalizeBlocks==true, and if the norm of "
01265                    "any column within a block decreases by this much or "
01266                    "more after orthogonalization, we reorthogonalize.");
01267 
01268       // This parameter corresponds to the "sing_tol_" parameter in
01269       // Belos' DGKSOrthoManager.  We choose the same default value.
01270       const magnitude_type defaultRelativeRankTolerance = 
01271         Teuchos::as<magnitude_type>(10) * SCTM::eps();
01272 
01273       // If the relative rank tolerance is zero, then we will always
01274       // declare blocks to be numerically full rank, as long as no
01275       // singular values are zero.
01276       params->set ("relativeRankTolerance", defaultRelativeRankTolerance,
01277                    "Relative tolerance to determine the numerical rank of a "
01278                    "block when normalizing.");
01279 
01280       // See Stewart's 2008 paper on block Gram-Schmidt for a definition
01281       // of "orthogonalization fault."
01282       const bool defaultThrowOnReorthogFault = true;
01283       params->set ("throwOnReorthogFault", defaultThrowOnReorthogFault,
01284                    "Whether to throw an exception if an orthogonalization "
01285                    "fault occurs.  This only matters if reorthogonalization "
01286                    "is enabled (reorthogonalizeBlocks==true).");
01287 
01288       const bool defaultForceNonnegativeDiagonal = false;
01289       params->set ("forceNonnegativeDiagonal", defaultForceNonnegativeDiagonal,
01290                    "Whether to force the R factor produced by the normalization "
01291                    "step to have a nonnegative diagonal.");
01292 
01293       defaultParams_ = params;
01294     }
01295     return defaultParams_;
01296   }
01297 
01298   template<class Scalar, class MV>
01299   Teuchos::RCP<const Teuchos::ParameterList>
01300   TsqrOrthoManagerImpl<Scalar, MV>::getFastParameters ()
01301   {
01302     using Teuchos::ParameterList;
01303     using Teuchos::RCP;
01304     using Teuchos::rcp;
01305 
01306     RCP<const ParameterList> defaultParams = getValidParameters();
01307     // Start with a clone of the default parameters.
01308     RCP<ParameterList> params = rcp (new ParameterList (*defaultParams));
01309         
01310     // Disable reorthogonalization and randomization of the null
01311     // space basis.  Reorthogonalization tolerances don't matter,
01312     // since we aren't reorthogonalizing blocks in the fast
01313     // settings.  We can leave the default values.  Also,
01314     // (re)orthogonalization faults may only occur with
01315     // reorthogonalization, so we don't have to worry about the
01316     // "throwOnReorthogFault" setting.
01317     const bool randomizeNullSpace = false;
01318     params->set ("randomizeNullSpace", randomizeNullSpace);      
01319     const bool reorthogonalizeBlocks = false;
01320     params->set ("reorthogonalizeBlocks", reorthogonalizeBlocks);
01321 
01322     return params;
01323   }
01324 
01325   template<class Scalar, class MV>
01326   int
01327   TsqrOrthoManagerImpl<Scalar, MV>::
01328   rawNormalize (MV& X, 
01329                 MV& Q, 
01330                 Teuchos::SerialDenseMatrix<int, Scalar>& B)
01331   {
01332     int rank;
01333     try {
01334       // This call only computes the QR factorization X = Q B.
01335       // It doesn't compute the rank of X.  That comes from
01336       // revealRank() below.
01337       tsqrAdaptor_.factorExplicit (X, Q, B, forceNonnegativeDiagonal_);
01338       // This call will only modify *B if *B on input is not of full
01339       // numerical rank.
01340       rank = tsqrAdaptor_.revealRank (Q, B, relativeRankTolerance_);
01341     } catch (std::exception& e) {
01342       throw TsqrOrthoError (e.what()); // Toss the exception up the chain.
01343     }
01344     return rank;
01345   }
01346 
01347   template<class Scalar, class MV>
01348   int
01349   TsqrOrthoManagerImpl<Scalar, MV>::
01350   normalizeOne (MV& X, 
01351                 Teuchos::RCP<Teuchos::SerialDenseMatrix<int, Scalar> > B) const
01352   {
01353     // Make space for the normalization coefficient.  This will either
01354     // be a freshly allocated matrix (if B is null), or a view of the
01355     // 1x1 upper left submatrix of *B (if B is not null).
01356     mat_ptr B_out;
01357     if (B.is_null()) {
01358       B_out = rcp (new mat_type (1, 1));
01359     } else {
01360       const int numRows = B->numRows();
01361       const int numCols = B->numCols();
01362       TEUCHOS_TEST_FOR_EXCEPTION(numRows < 1 || numCols < 1, 
01363                          std::invalid_argument,
01364                          "normalizeOne: Input matrix B must be at "
01365                          "least 1 x 1, but is instead " << numRows
01366                          << " x " << numCols << ".");
01367       // Create a view of the 1x1 upper left submatrix of *B.
01368       B_out = rcp (new mat_type (Teuchos::View, *B, 1, 1));
01369     }
01370 
01371     // Compute the norm of X, and write the result to B_out.
01372     std::vector<magnitude_type> theNorm (1, SCTM::zero());
01373     MVT::MvNorm (X, theNorm);
01374     (*B_out)(0,0) = theNorm[0];
01375 
01376     if (B.is_null()) {
01377       // The input matrix B is null, so assign B_out to it.  If B was
01378       // not null on input, then B_out is a view of *B, so we don't
01379       // have to do anything here.  Note that SerialDenseMatrix uses
01380       // raw pointers to store data and represent views, so we have to
01381       // be careful about scope.
01382       B = B_out;
01383     }
01384 
01385     // Scale X by its norm, if its norm is zero.  Otherwise, do the
01386     // right thing based on whether the user wants us to fill the null
01387     // space with random vectors.
01388     if (theNorm[0] == SCTM::zero()) {
01389       // Make a view of the first column of Q, fill it with random
01390       // data, and normalize it.  Throw away the resulting norm.
01391       if (randomizeNullSpace_) {
01392         MVT::MvRandom(X);
01393         MVT::MvNorm (X, theNorm);
01394         if (theNorm[0] == SCTM::zero()) {
01395           // It is possible that a random vector could have all zero
01396           // entries, but unlikely.  We could try again, but it's also
01397           // possible that multiple tries could result in zero
01398           // vectors.  We choose instead to give up.
01399           throw TsqrOrthoError("normalizeOne: a supposedly random "
01400                                "vector has norm zero!");
01401         } else {
01402           // NOTE (mfh 09 Nov 2010) I'm assuming that dividing a
01403           // Scalar by a magnitude_type is defined and that it results
01404           // in a Scalar.
01405           const Scalar alpha = SCT::one() / theNorm[0];
01406           MVT::MvScale (X, alpha);
01407         }
01408       }
01409       return 0; // The rank of the matrix (actually just one vector) X.
01410     } else {
01411       // NOTE (mfh 09 Nov 2010) I'm assuming that dividing a Scalar by
01412       // a magnitude_type is defined and that it results in a Scalar.
01413       const Scalar alpha = SCT::one() / theNorm[0];
01414       MVT::MvScale (X, alpha);
01415       return 1; // The rank of the matrix (actually just one vector) X.
01416     }
01417   }
01418 
01419 
01420   template<class Scalar, class MV>
01421   void
01422   TsqrOrthoManagerImpl<Scalar, MV>::
01423   rawProject (MV& X, 
01424               Teuchos::ArrayView<Teuchos::RCP<const MV> > Q,
01425               Teuchos::ArrayView<Teuchos::RCP<Teuchos::SerialDenseMatrix<int, Scalar> > > C) const
01426   {
01427     // "Modified Gram-Schmidt" version of Block Gram-Schmidt.
01428     const int num_Q_blocks = Q.size();
01429     for (int i = 0; i < num_Q_blocks; ++i)
01430       {
01431         // TEUCHOS_TEST_FOR_EXCEPTION(C[i].is_null(), std::logic_error,
01432         //                    "TsqrOrthoManagerImpl::rawProject(): C[" 
01433         //                    << i << "] is null");
01434         // TEUCHOS_TEST_FOR_EXCEPTION(Q[i].is_null(), std::logic_error,
01435         //                    "TsqrOrthoManagerImpl::rawProject(): Q[" 
01436         //                    << i << "] is null");
01437         mat_type& Ci = *C[i];
01438         const MV& Qi = *Q[i];
01439         innerProd (Qi, X, Ci);
01440         MVT::MvTimesMatAddMv (-SCT::one(), Qi, Ci, SCT::one(), X);
01441       }
01442   }
01443 
01444 
01445   template<class Scalar, class MV>
01446   void
01447   TsqrOrthoManagerImpl<Scalar, MV>::
01448   rawProject (MV& X, 
01449               const Teuchos::RCP<const MV>& Q, 
01450               const Teuchos::RCP<Teuchos::SerialDenseMatrix<int, Scalar> >& C) const
01451   {
01452     // Block Gram-Schmidt
01453     innerProd (*Q, X, *C);
01454     MVT::MvTimesMatAddMv (-SCT::one(), *Q, *C, SCT::one(), X);
01455   }
01456 
01457   template<class Scalar, class MV>
01458   int
01459   TsqrOrthoManagerImpl<Scalar, MV>::
01460   normalizeImpl (MV& X, 
01461                  MV& Q, 
01462                  Teuchos::RCP<Teuchos::SerialDenseMatrix<int, Scalar> > B, 
01463                  const bool outOfPlace)
01464   {
01465     using Teuchos::Range1D;
01466     using Teuchos::RCP;
01467     using Teuchos::rcp;
01468     using Teuchos::ScalarTraits;
01469     using Teuchos::tuple;
01470     using std::cerr;
01471     using std::endl;
01472     // Don't set this to true unless you want lots of debugging
01473     // messages written to stderr on every MPI process.
01474     const bool extraDebug = false;
01475 
01476     const int numCols = MVT::GetNumberVecs (X);
01477     if (numCols == 0) {
01478       return 0; // Fast exit for an empty input matrix.
01479     }
01480 
01481     // We allow Q to have more columns than X.  In that case, we only
01482     // touch the first numCols columns of Q.
01483     TEUCHOS_TEST_FOR_EXCEPTION(MVT::GetNumberVecs(Q) < numCols, 
01484                        std::invalid_argument, 
01485                        "TsqrOrthoManagerImpl::normalizeImpl(X,Q,B): Q has "
01486                        << MVT::GetNumberVecs(Q) << " columns.  This is too "
01487                        "few, since X has " << numCols << " columns.");
01488     // TSQR wants a Q with the same number of columns as X, so have it
01489     // work on a nonconstant view of Q with the same number of columns
01490     // as X.
01491     RCP<MV> Q_view = MVT::CloneViewNonConst (Q, Range1D(0, numCols-1));
01492 
01493     // Make space for the normalization coefficients.  This will
01494     // either be a freshly allocated matrix (if B is null), or a view
01495     // of the appropriately sized upper left submatrix of *B (if B is
01496     // not null).
01497     mat_ptr B_out;
01498     if (B.is_null()) {
01499       B_out = rcp (new mat_type (numCols, numCols));
01500     } else {
01501       // Make sure that B is no smaller than numCols x numCols.
01502       TEUCHOS_TEST_FOR_EXCEPTION(B->numRows() < numCols || B->numCols() < numCols,
01503                          std::invalid_argument,
01504                          "normalizeOne: Input matrix B must be at "
01505                          "least " << numCols << " x " << numCols 
01506                          << ", but is instead " << B->numRows()
01507                          << " x " << B->numCols() << ".");
01508       // Create a view of the numCols x numCols upper left submatrix
01509       // of *B.  TSQR will write the normalization coefficients there.
01510       B_out = rcp (new mat_type (Teuchos::View, *B, numCols, numCols));
01511     }
01512 
01513     if (extraDebug) {
01514       std::vector<magnitude_type> norms (numCols);
01515       MVT::MvNorm (X, norms);
01516       cerr << "Column norms of X before orthogonalization: ";
01517       typedef typename std::vector<magnitude_type>::const_iterator iter_type;
01518       for (iter_type iter = norms.begin(); iter != norms.end(); ++iter) {
01519         cerr << *iter;
01520         if (iter+1 != norms.end())
01521           cerr << ", ";
01522       }
01523     }
01524 
01525     // Compute rank-revealing decomposition (in this case, TSQR of X
01526     // followed by SVD of the R factor and appropriate updating of the
01527     // resulting Q factor) of X.  X is modified in place and filled
01528     // with garbage, and Q_view contains the resulting explicit Q
01529     // factor.  Later, we will copy this back into X.
01530     //
01531     // The matrix *B_out will only be upper triangular if X is of full
01532     // numerical rank.  Otherwise, the entries below the diagonal may
01533     // be filled in as well.
01534     const int rank = rawNormalize (X, *Q_view, *B_out);
01535     if (B.is_null()) {
01536       // The input matrix B is null, so assign B_out to it.  If B was
01537       // not null on input, then B_out is a view of *B, so we don't
01538       // have to do anything here.  Note that SerialDenseMatrix uses
01539       // raw pointers to store data and represent views, so we have to
01540       // be careful about scope.
01541       B = B_out;
01542     }
01543 
01544     if (extraDebug) {
01545       std::vector<magnitude_type> norms (numCols);
01546       MVT::MvNorm (*Q_view, norms);
01547       cerr << "Column norms of Q_view after orthogonalization: ";
01548       for (typename std::vector<magnitude_type>::const_iterator iter = norms.begin(); 
01549            iter != norms.end(); ++iter) {
01550         cerr << *iter;
01551         if (iter+1 != norms.end())
01552           cerr << ", ";
01553       }
01554     }
01555     TEUCHOS_TEST_FOR_EXCEPTION(rank < 0 || rank > numCols, std::logic_error,
01556                        "Belos::TsqrOrthoManagerImpl::normalizeImpl: "
01557                        "rawNormalize() returned rank = " << rank << " for a "
01558                        "matrix X with " << numCols << " columns.  Please report"
01559                        " this bug to the Belos developers.");
01560     if (extraDebug && rank == 0) {
01561       // Sanity check: ensure that the columns of X are sufficiently
01562       // small for X to be reported as rank zero.
01563       const mat_type& B_ref = *B;
01564       std::vector<magnitude_type> norms (B_ref.numCols());
01565       for (typename mat_type::ordinalType j = 0; j < B_ref.numCols(); ++j) {
01566         typedef typename mat_type::scalarType mat_scalar_type;
01567         mat_scalar_type sumOfSquares = ScalarTraits<mat_scalar_type>::zero();
01568         for (typename mat_type::ordinalType i = 0; i <= j; ++i) {
01569           const mat_scalar_type B_ij = 
01570             ScalarTraits<mat_scalar_type>::magnitude (B_ref(i,j));
01571           sumOfSquares += B_ij*B_ij;
01572         }
01573         norms[j] = ScalarTraits<mat_scalar_type>::squareroot (sumOfSquares);
01574       }
01575       using std::cerr;
01576       using std::endl;
01577       cerr << "Norms of columns of B after orthogonalization: ";
01578       for (typename mat_type::ordinalType j = 0; j < B_ref.numCols(); ++j) {
01579         cerr << norms[j];
01580         if (j != B_ref.numCols() - 1)
01581           cerr << ", ";
01582       }
01583       cerr << endl;
01584     }
01585 
01586     // If X is full rank or we don't want to replace its null space
01587     // basis with random vectors, then we're done.
01588     if (rank == numCols || ! randomizeNullSpace_) {
01589       // If we're supposed to be working in place in X, copy the
01590       // results back from Q_view into X.
01591       if (! outOfPlace) {
01592         MVT::Assign (*Q_view, X);
01593       }
01594       return rank;
01595     }
01596 
01597     if (randomizeNullSpace_ && rank < numCols) {
01598       // X wasn't full rank.  Replace the null space basis of X (in
01599       // the last numCols-rank columns of Q_view) with random data,
01600       // project it against the first rank columns of Q_view, and
01601       // normalize.
01602       //
01603       // Number of columns to fill with random data.
01604       const int nullSpaceNumCols = numCols - rank;
01605       // Inclusive range of indices of columns of X to fill with
01606       // random data.
01607       Range1D nullSpaceIndices (rank, numCols-1);
01608 
01609       // rawNormalize() wrote the null space basis vectors into
01610       // Q_view.  We continue to work in place in Q_view by writing
01611       // the random data there and (if there is a nontrival column
01612       // space) projecting in place against the column space basis
01613       // vectors (also in Q_view).
01614       RCP<MV> Q_null = MVT::CloneViewNonConst (*Q_view, nullSpaceIndices);
01615       // Replace the null space basis with random data.
01616       MVT::MvRandom (*Q_null); 
01617 
01618       // Make sure that the "random" data isn't all zeros.  This is
01619       // statistically nearly impossible, but we test for debugging
01620       // purposes.
01621       {
01622         std::vector<magnitude_type> norms (MVT::GetNumberVecs(*Q_null));
01623         MVT::MvNorm (*Q_null, norms);
01624 
01625         bool anyZero = false;
01626         typedef typename std::vector<magnitude_type>::const_iterator iter_type;
01627         for (iter_type it = norms.begin(); it != norms.end(); ++it) {
01628           if (*it == SCTM::zero()) {
01629             anyZero = true;
01630           }
01631         }
01632         if (anyZero) {
01633           std::ostringstream os;
01634           os << "TsqrOrthoManagerImpl::normalizeImpl: "
01635             "We are being asked to randomize the null space, for a matrix "
01636             "with " << numCols << " columns and reported column rank "
01637              << rank << ".  The inclusive range of columns to fill with "
01638             "random data is [" << nullSpaceIndices.lbound() << "," 
01639              << nullSpaceIndices.ubound() << "].  After filling the null "
01640             "space vectors with random numbers, at least one of the vectors"
01641             " has norm zero.  Here are the norms of all the null space "
01642             "vectors: [";
01643           for (iter_type it = norms.begin(); it != norms.end(); ++it) {
01644             os << *it;
01645             if (it+1 != norms.end())
01646               os << ", ";
01647           }
01648           os << "].)  There is a tiny probability that this could happen "
01649             "randomly, but it is likely a bug.  Please report it to the "
01650             "Belos developers, especially if you are able to reproduce the "
01651             "behavior.";
01652           TEUCHOS_TEST_FOR_EXCEPTION(anyZero, TsqrOrthoError, os.str());
01653         }
01654       }
01655 
01656       if (rank > 0) {
01657         // Project the random data against the column space basis of
01658         // X, using a simple block projection ("Block Classical
01659         // Gram-Schmidt").  This is accurate because we've already
01660         // orthogonalized the column space basis of X nearly to
01661         // machine precision via a QR factorization (TSQR) with
01662         // accuracy comparable to Householder QR.
01663         RCP<const MV> Q_col = MVT::CloneView (*Q_view, Range1D(0, rank-1));
01664 
01665         // Temporary storage for projection coefficients.  We don't
01666         // need to keep them, since they represent the null space
01667         // basis (for which the coefficients are logically zero).
01668         mat_ptr C_null (new mat_type (rank, nullSpaceNumCols));
01669         rawProject (*Q_null, Q_col, C_null);
01670       }
01671       // Normalize the projected random vectors, so that they are
01672       // mutually orthogonal (as well as orthogonal to the column
01673       // space basis of X).  We use X for the output of the
01674       // normalization: for out-of-place normalization (outOfPlace ==
01675       // true), X is overwritten with "invalid values" anyway, and for
01676       // in-place normalization (outOfPlace == false), we want the
01677       // result to be in X anyway.
01678       RCP<MV> X_null = MVT::CloneViewNonConst (X, nullSpaceIndices);
01679       // Normalization coefficients for projected random vectors.
01680       // Will be thrown away.
01681       mat_type B_null (nullSpaceNumCols, nullSpaceNumCols);
01682       // Write the normalized vectors to X_null (in X).
01683       const int nullSpaceBasisRank = rawNormalize (*Q_null, *X_null, B_null);
01684           
01685       // It's possible, but unlikely, that X_null doesn't have full
01686       // rank (after the projection step).  We could recursively fill
01687       // in more random vectors until we finally get a full rank
01688       // matrix, but instead we just throw an exception.
01689       //
01690       // NOTE (mfh 08 Nov 2010) Perhaps we should deal with this case
01691       // more elegantly.  Recursion might be one way to solve it, but
01692       // be sure to check that the recursion will terminate.  We could
01693       // do this by supplying an additional argument to rawNormalize,
01694       // which is the null space basis rank from the previous
01695       // iteration.  The rank has to decrease each time, or the
01696       // recursion may go on forever.
01697       if (nullSpaceBasisRank < nullSpaceNumCols) {
01698         std::vector<magnitude_type> norms (MVT::GetNumberVecs(*X_null));
01699         MVT::MvNorm (*X_null, norms);
01700         std::ostringstream os;
01701         os << "TsqrOrthoManagerImpl::normalizeImpl: "
01702            << "We are being asked to randomize the null space, "
01703            << "for a matrix with " << numCols << " columns and "
01704            << "column rank " << rank << ".  After projecting and "
01705            << "normalizing the generated random vectors, they "
01706            << "only have rank " << nullSpaceBasisRank << ".  They"
01707            << " should have full rank " << nullSpaceNumCols 
01708            << ".  (The inclusive range of columns to fill with "
01709            << "random data is [" << nullSpaceIndices.lbound() 
01710            << "," << nullSpaceIndices.ubound() << "].  The "
01711            << "column norms of the resulting Q factor are: [";
01712         for (typename std::vector<magnitude_type>::size_type k = 0; 
01713              k < norms.size(); ++k) {
01714           os << norms[k];
01715           if (k != norms.size()-1) {
01716             os << ", ";
01717           }
01718         }
01719         os << "].)  There is a tiny probability that this could "
01720            << "happen randomly, but it is likely a bug.  Please "
01721            << "report it to the Belos developers, especially if "
01722            << "you are able to reproduce the behavior.";
01723 
01724         TEUCHOS_TEST_FOR_EXCEPTION(nullSpaceBasisRank < nullSpaceNumCols, 
01725                            TsqrOrthoError, os.str());
01726       }
01727       // If we're normalizing out of place, copy the X_null
01728       // vectors back into Q_null; the Q_col vectors are already
01729       // where they are supposed to be in that case.
01730       //
01731       // If we're normalizing in place, leave X_null alone (it's
01732       // already where it needs to be, in X), but copy Q_col back
01733       // into the first rank columns of X.
01734       if (outOfPlace) {
01735         MVT::Assign (*X_null, *Q_null);
01736       } else if (rank > 0) {
01737         // MVT::Assign() doesn't accept empty ranges of columns.
01738         RCP<const MV> Q_col = MVT::CloneView (*Q_view, Range1D(0, rank-1));
01739         RCP<MV> X_col = MVT::CloneViewNonConst (X, Range1D(0, rank-1));
01740         MVT::Assign (*Q_col, *X_col);
01741       }
01742     }
01743     return rank;
01744   }
01745 
01746 
01747   template<class Scalar, class MV>
01748   void
01749   TsqrOrthoManagerImpl<Scalar, MV>::
01750   checkProjectionDims (int& ncols_X, 
01751                        int& num_Q_blocks,
01752                        int& ncols_Q_total,
01753                        const MV& X, 
01754                        Teuchos::ArrayView<Teuchos::RCP<const MV> > Q) const
01755   {
01756     // First assign to temporary values, so the function won't
01757     // commit any externally visible side effects unless it will
01758     // return normally (without throwing an exception).  (I'm being
01759     // cautious; MVT::GetNumberVecs() probably shouldn't have any
01760     // externally visible side effects, unless it is logging to a
01761     // file or something.)
01762     int the_ncols_X, the_num_Q_blocks, the_ncols_Q_total;
01763     the_num_Q_blocks = Q.size();
01764     the_ncols_X = MVT::GetNumberVecs (X);
01765 
01766     // Compute the total number of columns of all the Q[i] blocks.
01767     the_ncols_Q_total = 0;
01768     // You should be angry if your compiler doesn't support type
01769     // inference ("auto").  That's why I need this awful typedef.
01770     using Teuchos::ArrayView;
01771     using Teuchos::RCP;
01772     typedef typename ArrayView<RCP<const MV> >::const_iterator iter_type;
01773     for (iter_type it = Q.begin(); it != Q.end(); ++it) {
01774       const MV& Qi = **it;
01775       the_ncols_Q_total += MVT::GetNumberVecs (Qi);
01776     }
01777 
01778     // Commit temporary values to the output arguments.
01779     ncols_X = the_ncols_X;
01780     num_Q_blocks = the_num_Q_blocks;
01781     ncols_Q_total = the_ncols_Q_total;
01782   }
01783 
01784 } // namespace Anasazi
01785 
01786 #endif // __AnasaziTsqrOrthoManagerImpl_hpp
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends