Belos Package Browser (Single Doxygen Collection) Development
BelosTsqrOrthoManagerImpl.hpp
Go to the documentation of this file.
00001 //@HEADER
00002 // ************************************************************************
00003 //
00004 //                 Belos: Block Linear Solvers Package
00005 //                  Copyright 2004 Sandia Corporation
00006 //
00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00008 // the U.S. Government retains certain rights in this software.
00009 //
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00038 //
00039 // ************************************************************************
00040 //@HEADER
00041 
00045 #ifndef __BelosTsqrOrthoManagerImpl_hpp
00046 #define __BelosTsqrOrthoManagerImpl_hpp
00047 
00048 #include "BelosConfigDefs.hpp" // HAVE_BELOS_TSQR
00049 #include "BelosMultiVecTraits.hpp"
00050 #include "BelosOrthoManager.hpp" // OrthoError, etc.
00051 
00052 #include "Teuchos_as.hpp"
00053 #include "Teuchos_LAPACK.hpp"
00054 #include "Teuchos_ParameterList.hpp"
00055 #include "Teuchos_ParameterListAcceptorDefaultBase.hpp"
00056 #ifdef BELOS_TEUCHOS_TIME_MONITOR
00057 #  include "Teuchos_TimeMonitor.hpp"
00058 #endif // BELOS_TEUCHOS_TIME_MONITOR
00059 #include <algorithm>
00060 #include <functional> // std::binary_function
00061 
00062 namespace Belos {
00063 
00067   class TsqrOrthoError : public OrthoError {
00068   public:
00069     TsqrOrthoError (const std::string& what_arg) :
00070       OrthoError (what_arg) {}
00071   };
00072 
00092   class TsqrOrthoFault : public OrthoError {
00093   public:
00094     TsqrOrthoFault (const std::string& what_arg) :
00095       OrthoError (what_arg) {}
00096   };
00097 
00130   template<class Scalar>
00131   class ReorthogonalizationCallback :
00132     public std::binary_function<Teuchos::ArrayView<typename Teuchos::ScalarTraits<Scalar>::magnitudeType>,
00133                                 Teuchos::ArrayView<typename Teuchos::ScalarTraits<Scalar>::magnitudeType>,
00134                                 void>
00135   {
00136   public:
00138     typedef Scalar scalar_type;
00143     typedef typename Teuchos::ScalarTraits<Scalar>::magnitudeType magnitude_type;
00144 
00146     virtual ~ReorthogonalizationCallback ();
00147 
00152     virtual void
00153     operator() (Teuchos::ArrayView<magnitude_type> normsBeforeFirstPass,
00154                 Teuchos::ArrayView<magnitude_type> normsAfterFirstPass) = 0;
00155   };
00156 
00157   template<class Scalar>
00158   ReorthogonalizationCallback<Scalar>::~ReorthogonalizationCallback () {}
00159 
00191   template<class Scalar, class MV>
00192   class TsqrOrthoManagerImpl :
00193     public Teuchos::ParameterListAcceptorDefaultBase {
00194   public:
00195     typedef Scalar scalar_type;
00196     typedef typename Teuchos::ScalarTraits<Scalar>::magnitudeType magnitude_type;
00197     typedef MV multivector_type;
00199     typedef Teuchos::SerialDenseMatrix<int, Scalar> mat_type;
00200     typedef Teuchos::RCP<mat_type> mat_ptr;
00201 
00202   private:
00203     typedef Teuchos::ScalarTraits<Scalar> SCT;
00204     typedef Teuchos::ScalarTraits<magnitude_type> SCTM;
00205     typedef MultiVecTraits<Scalar, MV> MVT;
00206     typedef MultiVecTraitsExt<Scalar, MV> MVText;
00207     typedef typename MVT::tsqr_adaptor_type tsqr_adaptor_type;
00208 
00209   public:
00217     Teuchos::RCP<const Teuchos::ParameterList> getValidParameters () const;
00218 
00220     void setParameterList (const Teuchos::RCP<Teuchos::ParameterList>& params);
00221 
00232     Teuchos::RCP<const Teuchos::ParameterList> getFastParameters ();
00233 
00250     TsqrOrthoManagerImpl (const Teuchos::RCP<Teuchos::ParameterList>& params,
00251                           const std::string& label);
00252 
00257     TsqrOrthoManagerImpl (const std::string& label);
00258 
00278     void
00279     setReorthogonalizationCallback (const Teuchos::RCP<ReorthogonalizationCallback<Scalar> >& callback)
00280     {
00281       reorthogCallback_ = callback;
00282     }
00283 
00291     void setLabel (const std::string& label) {
00292       if (label != label_) {
00293         label_ = label;
00294 
00295 #ifdef BELOS_TEUCHOS_TIME_MONITOR
00296         clearTimer (label, "All orthogonalization");
00297         clearTimer (label, "Projection");
00298         clearTimer (label, "Normalization");
00299 
00300         timerOrtho_ = makeTimer (label, "All orthogonalization");
00301         timerProject_ = makeTimer (label, "Projection");
00302         timerNormalize_ = makeTimer (label, "Normalization");
00303 #endif // BELOS_TEUCHOS_TIME_MONITOR
00304       }
00305     }
00306 
00308     const std::string& getLabel () const { return label_; }
00309 
00318     void
00319     innerProd (const MV& X, const MV& Y, mat_type& Z) const
00320     {
00321       MVT::MvTransMv (SCT::one(), X, Y, Z);
00322     }
00323 
00341     void
00342     norm (const MV& X, std::vector<magnitude_type>& normVec) const;
00343 
00353     void
00354     project (MV& X,
00355              Teuchos::Array<mat_ptr> C,
00356              Teuchos::ArrayView<Teuchos::RCP<const MV> > Q);
00357 
00371     int normalize (MV& X, mat_ptr B);
00372 
00391     int
00392     normalizeOutOfPlace (MV& X, MV& Q, mat_ptr B);
00393 
00407     int
00408     projectAndNormalize (MV &X,
00409                          Teuchos::Array<mat_ptr> C,
00410                          mat_ptr B,
00411                          Teuchos::ArrayView<Teuchos::RCP<const MV> > Q)
00412     {
00413       // "false" means we work on X in place.  The second argument is
00414       // not read or written in that case.
00415       return projectAndNormalizeImpl (X, X, false, C, B, Q);
00416     }
00417 
00437     int
00438     projectAndNormalizeOutOfPlace (MV& X_in,
00439                                    MV& X_out,
00440                                    Teuchos::Array<mat_ptr> C,
00441                                    mat_ptr B,
00442                                    Teuchos::ArrayView<Teuchos::RCP<const MV> > Q)
00443     {
00444       // "true" means we work on X_in out of place, writing the
00445       // results into X_out.
00446       return projectAndNormalizeImpl (X_in, X_out, true, C, B, Q);
00447     }
00448 
00453     magnitude_type
00454     orthonormError (const MV &X) const
00455     {
00456       const Scalar ONE = SCT::one();
00457       const int ncols = MVT::GetNumberVecs(X);
00458       mat_type XTX (ncols, ncols);
00459       innerProd (X, X, XTX);
00460       for (int k = 0; k < ncols; ++k) {
00461         XTX(k,k) -= ONE;
00462       }
00463       return XTX.normFrobenius();
00464     }
00465 
00467     magnitude_type
00468     orthogError (const MV &X1,
00469                  const MV &X2) const
00470     {
00471       const int ncols_X1 = MVT::GetNumberVecs (X1);
00472       const int ncols_X2 = MVT::GetNumberVecs (X2);
00473       mat_type X1_T_X2 (ncols_X1, ncols_X2);
00474       innerProd (X1, X2, X1_T_X2);
00475       return X1_T_X2.normFrobenius();
00476     }
00477 
00481     magnitude_type blockReorthogThreshold() const { return blockReorthogThreshold_; }
00482 
00485     magnitude_type relativeRankTolerance() const { return relativeRankTolerance_; }
00486 
00487   private:
00489     Teuchos::RCP<Teuchos::ParameterList> params_;
00490 
00492     mutable Teuchos::RCP<const Teuchos::ParameterList> defaultParams_;
00493 
00495     std::string label_;
00496 
00498     tsqr_adaptor_type tsqrAdaptor_;
00499 
00509     Teuchos::RCP<MV> Q_;
00510 
00512     magnitude_type eps_;
00513 
00517     bool randomizeNullSpace_;
00518 
00524     bool reorthogonalizeBlocks_;
00525 
00529     bool throwOnReorthogFault_;
00530 
00532     magnitude_type blockReorthogThreshold_;
00533 
00535     magnitude_type relativeRankTolerance_;
00536 
00543     bool forceNonnegativeDiagonal_;
00544 
00545 #ifdef BELOS_TEUCHOS_TIME_MONITOR
00546 
00547     Teuchos::RCP<Teuchos::Time> timerOrtho_;
00548 
00550     Teuchos::RCP<Teuchos::Time> timerProject_;
00551 
00553     Teuchos::RCP<Teuchos::Time> timerNormalize_;
00554 #endif // BELOS_TEUCHOS_TIME_MONITOR
00555 
00557     Teuchos::RCP<ReorthogonalizationCallback<Scalar> > reorthogCallback_;
00558 
00559 #ifdef BELOS_TEUCHOS_TIME_MONITOR
00560 
00561 
00562 
00563 
00564 
00565 
00566 
00567 
00568     static Teuchos::RCP<Teuchos::Time>
00569     makeTimer (const std::string& prefix,
00570                const std::string& timerName)
00571     {
00572       const std::string timerLabel =
00573         prefix.empty() ? timerName : (prefix + ": " + timerName);
00574       return Teuchos::TimeMonitor::getNewCounter (timerLabel);
00575     }
00576 
00582     void
00583     clearTimer (const std::string& prefix,
00584                 const std::string& timerName)
00585     {
00586       const std::string timerLabel =
00587         prefix.empty() ? timerName : (prefix + ": " + timerName);
00588       Teuchos::TimeMonitor::clearCounter (timerLabel);
00589     }
00590 #endif // BELOS_TEUCHOS_TIME_MONITOR
00591 
00593     void
00594     raiseReorthogFault (const std::vector<magnitude_type>& normsAfterFirstPass,
00595                         const std::vector<magnitude_type>& normsAfterSecondPass,
00596                         const std::vector<int>& faultIndices);
00597 
00607     void
00608     checkProjectionDims (int& ncols_X,
00609                          int& num_Q_blocks,
00610                          int& ncols_Q_total,
00611                          const MV& X,
00612                          Teuchos::ArrayView<Teuchos::RCP<const MV> > Q) const;
00613 
00624     void
00625     allocateProjectionCoefficients (Teuchos::Array<mat_ptr>& C,
00626                                     Teuchos::ArrayView<Teuchos::RCP<const MV> > Q,
00627                                     const MV& X,
00628                                     const bool attemptToRecycle = true) const;
00629 
00638     int
00639     projectAndNormalizeImpl (MV& X_in,
00640                              MV& X_out,
00641                              const bool outOfPlace,
00642                              Teuchos::Array<mat_ptr> C,
00643                              mat_ptr B,
00644                              Teuchos::ArrayView<Teuchos::RCP<const MV> > Q);
00645 
00651     void
00652     rawProject (MV& X,
00653                 Teuchos::ArrayView<Teuchos::RCP<const MV> > Q,
00654                 Teuchos::ArrayView<mat_ptr> C) const;
00655 
00657     void
00658     rawProject (MV& X,
00659                 const Teuchos::RCP<const MV>& Q,
00660                 const mat_ptr& C) const;
00661 
00688     int rawNormalize (MV& X, MV& Q, mat_type& B);
00689 
00707     int normalizeOne (MV& X, mat_ptr B) const;
00708 
00736     int normalizeImpl (MV& X, MV& Q, mat_ptr B, const bool outOfPlace);
00737   };
00738 
00739   template<class Scalar, class MV>
00740   void
00741   TsqrOrthoManagerImpl<Scalar, MV>::
00742   setParameterList (const Teuchos::RCP<Teuchos::ParameterList>& params)
00743   {
00744     using Teuchos::ParameterList;
00745     using Teuchos::parameterList;
00746     using Teuchos::RCP;
00747     using Teuchos::sublist;
00748     typedef magnitude_type M; // abbreviation.
00749 
00750     RCP<const ParameterList> defaultParams = getValidParameters ();
00751     // Sublist of TSQR implementation parameters; to get below.
00752     RCP<ParameterList> tsqrParams;
00753 
00754     RCP<ParameterList> theParams;
00755     if (params.is_null()) {
00756       theParams = parameterList (*defaultParams);
00757     } else {
00758       theParams = params;
00759 
00760       // Don't call validateParametersAndSetDefaults(); we prefer to
00761       // ignore parameters that we don't recognize, at least for now.
00762       // However, we do fill in missing parameters with defaults.
00763 
00764       randomizeNullSpace_ =
00765         theParams->get<bool> ("randomizeNullSpace",
00766                               defaultParams->get<bool> ("randomizeNullSpace"));
00767       reorthogonalizeBlocks_ =
00768         theParams->get<bool> ("reorthogonalizeBlocks",
00769                               defaultParams->get<bool> ("reorthogonalizeBlocks"));
00770       throwOnReorthogFault_ =
00771         theParams->get<bool> ("throwOnReorthogFault",
00772                               defaultParams->get<bool> ("throwOnReorthogFault"));
00773       blockReorthogThreshold_ =
00774         theParams->get<M> ("blockReorthogThreshold",
00775                            defaultParams->get<M> ("blockReorthogThreshold"));
00776       relativeRankTolerance_ =
00777         theParams->get<M> ("relativeRankTolerance",
00778                            defaultParams->get<M> ("relativeRankTolerance"));
00779       forceNonnegativeDiagonal_ =
00780         theParams->get<bool> ("forceNonnegativeDiagonal",
00781                               defaultParams->get<bool> ("forceNonnegativeDiagonal"));
00782 
00783       // Get the sublist of TSQR implementation parameters.  Use the
00784       // default sublist if one isn't provided.
00785       if (! theParams->isSublist ("TSQR implementation")) {
00786         theParams->set ("TSQR implementation",
00787                         defaultParams->sublist ("TSQR implementation"));
00788       }
00789       tsqrParams = sublist (theParams, "TSQR implementation", true);
00790     }
00791 
00792     // Send the TSQR implementation parameters to the TSQR adaptor.
00793     tsqrAdaptor_.setParameterList (tsqrParams);
00794 
00795     // Save the input parameter list.
00796     setMyParamList (theParams);
00797   }
00798 
00799   template<class Scalar, class MV>
00800   TsqrOrthoManagerImpl<Scalar, MV>::
00801   TsqrOrthoManagerImpl (const Teuchos::RCP<Teuchos::ParameterList>& params,
00802                         const std::string& label) :
00803     label_ (label),
00804     Q_ (Teuchos::null),               // Initialized on demand
00805     eps_ (SCTM::eps()),               // Machine precision
00806     randomizeNullSpace_ (true),
00807     reorthogonalizeBlocks_ (true),
00808     throwOnReorthogFault_ (false),
00809     blockReorthogThreshold_ (0),
00810     relativeRankTolerance_ (0),
00811     forceNonnegativeDiagonal_ (false)
00812   {
00813     setParameterList (params); // This also sets tsqrAdaptor_'s parameters.
00814 
00815 #ifdef BELOS_TEUCHOS_TIME_MONITOR
00816     timerOrtho_ = makeTimer (label, "All orthogonalization");
00817     timerProject_ = makeTimer (label, "Projection");
00818     timerNormalize_ = makeTimer (label, "Normalization");
00819 #endif // BELOS_TEUCHOS_TIME_MONITOR
00820   }
00821 
00822   template<class Scalar, class MV>
00823   TsqrOrthoManagerImpl<Scalar, MV>::
00824   TsqrOrthoManagerImpl (const std::string& label) :
00825     label_ (label),
00826     Q_ (Teuchos::null),               // Initialized on demand
00827     eps_ (SCTM::eps()),               // Machine precision
00828     randomizeNullSpace_ (true),
00829     reorthogonalizeBlocks_ (true),
00830     throwOnReorthogFault_ (false),
00831     blockReorthogThreshold_ (0),
00832     relativeRankTolerance_ (0),
00833     forceNonnegativeDiagonal_ (false)
00834   {
00835     setParameterList (Teuchos::null); // Set default parameters.
00836 
00837 #ifdef BELOS_TEUCHOS_TIME_MONITOR
00838     timerOrtho_ = makeTimer (label, "All orthogonalization");
00839     timerProject_ = makeTimer (label, "Projection");
00840     timerNormalize_ = makeTimer (label, "Normalization");
00841 #endif // BELOS_TEUCHOS_TIME_MONITOR
00842   }
00843 
00844   template<class Scalar, class MV>
00845   void
00846   TsqrOrthoManagerImpl<Scalar, MV>::
00847   norm (const MV& X, std::vector<magnitude_type>& normVec) const
00848   {
00849     const int numCols = MVT::GetNumberVecs (X);
00850     // std::vector<T>::size_type is unsigned; int is signed.  Mixed
00851     // unsigned/signed comparisons trigger compiler warnings.
00852     if (normVec.size() < static_cast<size_t>(numCols))
00853       normVec.resize (numCols); // Resize normvec if necessary.
00854     MVT::MvNorm (X, normVec);
00855   }
00856 
00857   template<class Scalar, class MV>
00858   void
00859   TsqrOrthoManagerImpl<Scalar, MV>::project (MV& X,
00860                                              Teuchos::Array<mat_ptr> C,
00861                                              Teuchos::ArrayView<Teuchos::RCP<const MV> > Q)
00862   {
00863 #ifdef BELOS_TEUCHOS_TIME_MONITOR
00864     // "Projection" only happens in rawProject(), so we only time
00865     // projection inside rawProject().  However, we count the time
00866     // spend in project() as part of the whole orthogonalization.
00867     //
00868     // If project() is called from projectAndNormalize(), the
00869     // TimeMonitor won't start timerOrtho_, because it is already
00870     // running in projectAndNormalize().
00871     Teuchos::TimeMonitor timerMonitorOrtho(*timerOrtho_);
00872 #endif // BELOS_TEUCHOS_TIME_MONITOR
00873 
00874     int ncols_X, num_Q_blocks, ncols_Q_total;
00875     checkProjectionDims (ncols_X, num_Q_blocks, ncols_Q_total, X, Q);
00876     // Test for quick exit: any dimension of X is zero, or there are
00877     // zero Q blocks, or the total number of columns of the Q blocks
00878     // is zero.
00879     if (ncols_X == 0 || num_Q_blocks == 0 || ncols_Q_total == 0)
00880       return;
00881 
00882     // Make space for first-pass projection coefficients
00883     allocateProjectionCoefficients (C, Q, X, true);
00884 
00885     // We only use columnNormsBefore and compute pre-projection column
00886     // norms if doing block reorthogonalization.
00887     std::vector<magnitude_type> columnNormsBefore (ncols_X, magnitude_type(0));
00888     if (reorthogonalizeBlocks_)
00889       MVT::MvNorm (X, columnNormsBefore);
00890 
00891     // Project (first block orthogonalization step):
00892     // C := Q^* X, X := X - Q C.
00893     rawProject (X, Q, C);
00894 
00895     // If we are doing block reorthogonalization, reorthogonalize X if
00896     // necessary.
00897     if (reorthogonalizeBlocks_) {
00898       std::vector<magnitude_type> columnNormsAfter (ncols_X, magnitude_type(0));
00899       MVT::MvNorm (X, columnNormsAfter);
00900 
00901       // Relative block reorthogonalization threshold.
00902       const magnitude_type relThres = blockReorthogThreshold();
00903       // Reorthogonalize X if any of its column norms decreased by a
00904       // factor more than the block reorthogonalization threshold.
00905       // Don't bother trying to subset the columns; that will make the
00906       // columns noncontiguous and thus hinder BLAS 3 optimizations.
00907       bool reorthogonalize = false;
00908       for (int j = 0; j < ncols_X; ++j) {
00909         if (columnNormsAfter[j] < relThres * columnNormsBefore[j]) {
00910           reorthogonalize = true;
00911           break;
00912         }
00913       }
00914       if (reorthogonalize) {
00915         // Notify the caller via callback about the need for
00916         // reorthogonalization.
00917         if (! reorthogCallback_.is_null()) {
00918           reorthogCallback_->operator() (Teuchos::arrayViewFromVector (columnNormsBefore),
00919                                          Teuchos::arrayViewFromVector (columnNormsAfter));
00920         }
00921         // Second-pass projection coefficients
00922         Teuchos::Array<mat_ptr> C2;
00923         allocateProjectionCoefficients (C2, Q, X, false);
00924 
00925         // Perform the second projection pass:
00926         // C2 = Q' X, X = X - Q*C2
00927         rawProject (X, Q, C2);
00928         // Update the projection coefficients
00929         for (int k = 0; k < num_Q_blocks; ++k)
00930           *C[k] += *C2[k];
00931       }
00932     }
00933   }
00934 
00935 
00936   template<class Scalar, class MV>
00937   int
00938   TsqrOrthoManagerImpl<Scalar, MV>::normalize (MV& X, mat_ptr B)
00939   {
00940     using Teuchos::Range1D;
00941     using Teuchos::RCP;
00942 
00943 #ifdef BELOS_TEUCHOS_TIME_MONITOR
00944     Teuchos::TimeMonitor timerMonitorNormalize(*timerNormalize_);
00945     // If normalize() is called internally -- i.e., called from
00946     // projectAndNormalize() -- the timer will not be started or
00947     // stopped, because it is already running.  TimeMonitor handles
00948     // recursive invocation by doing nothing.
00949     Teuchos::TimeMonitor timerMonitorOrtho(*timerOrtho_);
00950 #endif // BELOS_TEUCHOS_TIME_MONITOR
00951 
00952     // MVT returns int for this, even though the "local ordinal
00953     // type" of the MV may be some other type (for example,
00954     // Tpetra::MultiVector<double, int32_t, int64_t, ...>).
00955     const int numCols = MVT::GetNumberVecs (X);
00956 
00957     // This special case (for X having only one column) makes
00958     // TsqrOrthoManagerImpl equivalent to Modified Gram-Schmidt
00959     // orthogonalization with conditional full reorthogonalization,
00960     // if all multivector inputs have only one column.  It also
00961     // avoids allocating Q_ scratch space and copying data when we
00962     // don't need to invoke TSQR at all.
00963     if (numCols == 1) {
00964       return normalizeOne (X, B);
00965     }
00966 
00967     // We use Q_ as scratch space for the normalization, since TSQR
00968     // requires a scratch multivector (it can't factor in place).  Q_
00969     // should come from a vector space compatible with X's vector
00970     // space, and Q_ should have at least as many columns as X.
00971     // Otherwise, we have to reallocate.  We also have to allocate
00972     // (not "re-") Q_ if we haven't allocated it before.  (We can't
00973     // allocate Q_ until we have some X, so we need a multivector as
00974     // the "prototype.")
00975     //
00976     // NOTE (mfh 11 Jan 2011) We only increase the number of columsn
00977     // in Q_, never decrease.  This is OK for typical uses of TSQR,
00978     // but you might prefer different behavior in some cases.
00979     //
00980     // NOTE (mfh 10 Mar 2011) We should only reuse the scratch space
00981     // Q_ if X and Q_ have compatible data distributions.  However,
00982     // Belos' current MultiVecTraits interface does not let us check
00983     // for this.  Thus, we can only check whether X and Q_ have the
00984     // same number of rows.  This will behave correctly for the common
00985     // case in Belos that all multivectors with the same number of
00986     // rows have the same data distribution.
00987     //
00988     // The specific MV implementation may do more checks than this on
00989     // its own and throw an exception if X and Q_ are not compatible,
00990     // but it may not.  If you find that recycling the Q_ space causes
00991     // troubles, you may consider modifying the code below to
00992     // reallocate Q_ for every X that comes in.
00993     if (Q_.is_null() ||
00994         MVText::GetGlobalLength(*Q_) != MVText::GetGlobalLength(X) ||
00995         numCols > MVT::GetNumberVecs (*Q_)) {
00996       Q_ = MVT::Clone (X, numCols);
00997     }
00998 
00999     // normalizeImpl() wants the second MV argument to have the same
01000     // number of columns as X.  To ensure this, we pass it a view of
01001     // Q_ if Q_ has more columns than X.  (This is possible if we
01002     // previously called normalize() with a different multivector,
01003     // since we never reallocate Q_ if it has more columns than
01004     // necessary.)
01005     if (MVT::GetNumberVecs(*Q_) == numCols) {
01006       return normalizeImpl (X, *Q_, B, false);
01007     } else {
01008       RCP<MV> Q_view = MVT::CloneViewNonConst (*Q_, Range1D(0, numCols-1));
01009       return normalizeImpl (X, *Q_view, B, false);
01010     }
01011   }
01012 
01013   template<class Scalar, class MV>
01014   void
01015   TsqrOrthoManagerImpl<Scalar, MV>::
01016   allocateProjectionCoefficients (Teuchos::Array<mat_ptr>& C,
01017                                   Teuchos::ArrayView<Teuchos::RCP<const MV> > Q,
01018                                   const MV& X,
01019                                   const bool attemptToRecycle) const
01020   {
01021     const int num_Q_blocks = Q.size();
01022     const int ncols_X = MVT::GetNumberVecs (X);
01023     C.resize (num_Q_blocks);
01024     if (attemptToRecycle)
01025       {
01026         for (int i = 0; i < num_Q_blocks; ++i)
01027           {
01028             const int ncols_Qi = MVT::GetNumberVecs (*Q[i]);
01029             // Create a new C[i] if necessary, otherwise resize if
01030             // necessary, otherwise fill with zeros.
01031             if (C[i].is_null())
01032               C[i] = rcp (new mat_type (ncols_Qi, ncols_X));
01033             else
01034               {
01035                 mat_type& Ci = *C[i];
01036                 if (Ci.numRows() != ncols_Qi || Ci.numCols() != ncols_X)
01037                   Ci.shape (ncols_Qi, ncols_X);
01038                 else
01039                   Ci.putScalar (SCT::zero());
01040               }
01041           }
01042       }
01043     else
01044       {
01045         for (int i = 0; i < num_Q_blocks; ++i)
01046           {
01047             const int ncols_Qi = MVT::GetNumberVecs (*Q[i]);
01048             C[i] = rcp (new mat_type (ncols_Qi, ncols_X));
01049           }
01050       }
01051   }
01052 
01053   template<class Scalar, class MV>
01054   int
01055   TsqrOrthoManagerImpl<Scalar, MV>::
01056   normalizeOutOfPlace (MV& X, MV& Q, mat_ptr B)
01057   {
01058 #ifdef BELOS_TEUCHOS_TIME_MONITOR
01059     Teuchos::TimeMonitor timerMonitorOrtho(*timerOrtho_);
01060     Teuchos::TimeMonitor timerMonitorNormalize(*timerNormalize_);
01061 #endif // BELOS_TEUCHOS_TIME_MONITOR
01062 
01063     const int numVecs = MVT::GetNumberVecs(X);
01064     if (numVecs == 0) {
01065       return 0; // Nothing to do.
01066     } else if (numVecs == 1) {
01067       // Special case for a single column; scale and copy over.
01068       using Teuchos::Range1D;
01069       using Teuchos::RCP;
01070       using Teuchos::rcp;
01071 
01072       // Normalize X in place (faster than TSQR for one column).
01073       const int rank = normalizeOne (X, B);
01074       // Copy results to first column of Q.
01075       RCP<MV> Q_0 = MVT::CloneViewNonConst (Q, Range1D(0,0));
01076       MVT::Assign (X, *Q_0);
01077       return rank;
01078     } else {
01079       // The "true" argument to normalizeImpl() means the output
01080       // vectors go into Q, and the contents of X are overwritten with
01081       // invalid values.
01082       return normalizeImpl (X, Q, B, true);
01083     }
01084   }
01085 
01086   template<class Scalar, class MV>
01087   int
01088   TsqrOrthoManagerImpl<Scalar, MV>::
01089   projectAndNormalizeImpl (MV& X_in,
01090                            MV& X_out, // Only written if outOfPlace==false.
01091                            const bool outOfPlace,
01092                            Teuchos::Array<mat_ptr> C,
01093                            mat_ptr B,
01094                            Teuchos::ArrayView<Teuchos::RCP<const MV> > Q)
01095   {
01096     using Teuchos::Range1D;
01097     using Teuchos::RCP;
01098     using Teuchos::rcp;
01099 
01100 #ifdef BELOS_TEUCHOS_TIME_MONITOR
01101     // Projection is only timed in rawProject(), and normalization is
01102     // only timed in normalize() and normalizeOutOfPlace().
01103     Teuchos::TimeMonitor timerMonitorOrtho(*timerOrtho_);
01104 #endif // BELOS_TEUCHOS_TIME_MONITOR
01105 
01106     if (outOfPlace) {
01107       // Make sure that X_out has at least as many columns as X_in.
01108       TEUCHOS_TEST_FOR_EXCEPTION(MVT::GetNumberVecs(X_out) < MVT::GetNumberVecs(X_in),
01109                          std::invalid_argument,
01110                          "Belos::TsqrOrthoManagerImpl::"
01111                          "projectAndNormalizeImpl(..., outOfPlace=true, ...):"
01112                          "X_out has " << MVT::GetNumberVecs(X_out)
01113                          << " columns, but X_in has "
01114                          << MVT::GetNumberVecs(X_in) << " columns.");
01115     }
01116     // Fetch dimensions of X_in and Q, and allocate space for first-
01117     // and second-pass projection coefficients (C resp. C2).
01118     int ncols_X, num_Q_blocks, ncols_Q_total;
01119     checkProjectionDims (ncols_X, num_Q_blocks, ncols_Q_total, X_in, Q);
01120 
01121     // Test for quick exit: if any dimension of X is zero.
01122     if (ncols_X == 0) {
01123       return 0;
01124     }
01125     // If there are zero Q blocks or zero Q columns, just normalize!
01126     if (num_Q_blocks == 0 || ncols_Q_total == 0) {
01127       if (outOfPlace) {
01128         return normalizeOutOfPlace (X_in, X_out, B);
01129       } else {
01130         return normalize (X_in, B);
01131       }
01132     }
01133 
01134     // The typical case is that the entries of C have been allocated
01135     // before, so we attempt to recycle the allocations.  The call
01136     // below will reallocate if it cannot recycle.
01137     allocateProjectionCoefficients (C, Q, X_in, true);
01138 
01139     // If we are doing block reorthogonalization, then compute the
01140     // column norms of X before projecting for the first time.  This
01141     // will help us decide whether we need to reorthogonalize X.
01142     std::vector<magnitude_type> normsBeforeFirstPass (ncols_X, SCTM::zero());
01143     if (reorthogonalizeBlocks_) {
01144       MVT::MvNorm (X_in, normsBeforeFirstPass);
01145     }
01146 
01147     // First (Modified) Block Gram-Schmidt pass, in place in X_in.
01148     rawProject (X_in, Q, C);
01149 
01150     // Make space for the normalization coefficients.  This will
01151     // either be a freshly allocated matrix (if B is null), or a view
01152     // of the appropriately sized upper left submatrix of *B (if B is
01153     // not null).
01154     //
01155     // Note that if we let the normalize() routine allocate (in the
01156     // case that B is null), that storage will go away at the end of
01157     // normalize().  (This is because it passes the RCP by value, not
01158     // by reference.)
01159     mat_ptr B_out;
01160     if (B.is_null()) {
01161       B_out = rcp (new mat_type (ncols_X, ncols_X));
01162     } else {
01163       // Make sure that B is no smaller than numCols x numCols.
01164       TEUCHOS_TEST_FOR_EXCEPTION(B->numRows() < ncols_X || B->numCols() < ncols_X,
01165                          std::invalid_argument,
01166                          "normalizeOne: Input matrix B must be at "
01167                          "least " << ncols_X << " x " << ncols_X
01168                          << ", but is instead " << B->numRows()
01169                          << " x " << B->numCols() << ".");
01170       // Create a view of the ncols_X by ncols_X upper left
01171       // submatrix of *B.  TSQR will write the normalization
01172       // coefficients there.
01173       B_out = rcp (new mat_type (Teuchos::View, *B, ncols_X, ncols_X));
01174     }
01175 
01176     // Rank of X(_in) after first projection pass.  If outOfPlace,
01177     // this overwrites X_in with invalid values, and the results go in
01178     // X_out.  Otherwise, it's done in place in X_in.
01179     const int firstPassRank = outOfPlace ?
01180       normalizeOutOfPlace (X_in, X_out, B_out) :
01181       normalize (X_in, B_out);
01182     if (B.is_null()) {
01183       // The input matrix B is null, so assign B_out to it.  If B was
01184       // not null on input, then B_out is a view of *B, so we don't
01185       // have to do anything here.  Note that SerialDenseMatrix uses
01186       // raw pointers to store data and represent views, so we have to
01187       // be careful about scope.
01188       B = B_out;
01189     }
01190     int rank = firstPassRank; // Current rank of X.
01191 
01192     // If X was not full rank after projection and randomizeNullSpace_
01193     // is true, then normalize(OutOfPlace)() replaced the null space
01194     // basis of X with random vectors, and orthogonalized them against
01195     // the column space basis of X.  However, we still need to
01196     // orthogonalize the random vectors against the Q[i], after which
01197     // we will need to renormalize them.
01198     //
01199     // If outOfPlace, then we need to work in X_out (where
01200     // normalizeOutOfPlace() wrote the normalized vectors).
01201     // Otherwise, we need to work in X_in.
01202     //
01203     // Note: We don't need to keep the new projection coefficients,
01204     // since they are multiplied by the "small" part of B
01205     // corresponding to the null space of the original X.
01206     if (firstPassRank < ncols_X && randomizeNullSpace_) {
01207       const int numNullSpaceCols = ncols_X - firstPassRank;
01208       const Range1D nullSpaceIndices (firstPassRank, ncols_X - 1);
01209 
01210       // Space for projection coefficients (will be thrown away)
01211       Teuchos::Array<mat_ptr> C_null (num_Q_blocks);
01212       for (int k = 0; k < num_Q_blocks; ++k) {
01213         const int numColsQk = MVT::GetNumberVecs(*Q[k]);
01214         C_null[k] = rcp (new mat_type (numColsQk, numNullSpaceCols));
01215       }
01216       // Space for normalization coefficients (will be thrown away).
01217       RCP<mat_type> B_null (new mat_type (numNullSpaceCols, numNullSpaceCols));
01218 
01219       int randomVectorsRank;
01220       if (outOfPlace) {
01221         // View of the null space basis columns of X.
01222         // normalizeOutOfPlace() wrote them into X_out.
01223         RCP<MV> X_out_null = MVT::CloneViewNonConst (X_out, nullSpaceIndices);
01224         // Use X_in for scratch space.  Copy X_out_null into the
01225         // last few columns of X_in (X_in_null) and do projections
01226         // in there.  (This saves us a copy wen we renormalize
01227         // (out of place) back into X_out.)
01228         RCP<MV> X_in_null = MVT::CloneViewNonConst (X_in, nullSpaceIndices);
01229         MVT::Assign (*X_out_null, *X_in_null);
01230         // Project the new random vectors against the Q blocks, and
01231         // renormalize the result into X_out_null.
01232         rawProject (*X_in_null, Q, C_null);
01233         randomVectorsRank = normalizeOutOfPlace (*X_in_null, *X_out_null, B_null);
01234       } else {
01235         // View of the null space columns of X.
01236         // They live in X_in.
01237         RCP<MV> X_null = MVT::CloneViewNonConst (X_in, nullSpaceIndices);
01238         // Project the new random vectors against the Q blocks,
01239         // and renormalize the result (in place).
01240         rawProject (*X_null, Q, C_null);
01241         randomVectorsRank = normalize (*X_null, B_null);
01242       }
01243       // While unusual, it is still possible for the random data not
01244       // to be full rank after projection and normalization.  In that
01245       // case, we could try another set of random data and recurse as
01246       // necessary, but instead for now we just raise an exception.
01247       TEUCHOS_TEST_FOR_EXCEPTION(randomVectorsRank != numNullSpaceCols,
01248                          TsqrOrthoError,
01249                          "Belos::TsqrOrthoManagerImpl::projectAndNormalize"
01250                          "OutOfPlace(): After projecting and normalizing the "
01251                          "random vectors (used to replace the null space "
01252                          "basis vectors from normalizing X), they have rank "
01253                          << randomVectorsRank << ", but should have full "
01254                          "rank " << numNullSpaceCols << ".");
01255     }
01256 
01257     // Whether or not X_in was full rank after projection, we still
01258     // might want to reorthogonalize against Q.
01259     if (reorthogonalizeBlocks_) {
01260       // We are only interested in the column space basis of X
01261       // resp. X_out.
01262       std::vector<magnitude_type>
01263         normsAfterFirstPass (firstPassRank, SCTM::zero());
01264       std::vector<magnitude_type>
01265         normsAfterSecondPass (firstPassRank, SCTM::zero());
01266 
01267       // Compute post-first-pass (pre-normalization) norms.  We could
01268       // have done that using MVT::MvNorm() on X_in after projecting,
01269       // but before the first normalization.  However, that operation
01270       // may be expensive.  It is also unnecessary: after calling
01271       // normalize(), the 2-norm of B(:,j) is the 2-norm of X_in(:,j)
01272       // before normalization, in exact arithmetic.
01273       //
01274       // NOTE (mfh 06 Nov 2010) This is one way that combining
01275       // projection and normalization into a single kernel --
01276       // projectAndNormalize() -- pays off.  In project(), we have to
01277       // compute column norms of X before and after projection.  Here,
01278       // we get them for free from the normalization coefficients.
01279       Teuchos::BLAS<int, Scalar> blas;
01280       for (int j = 0; j < firstPassRank; ++j) {
01281         const Scalar* const B_j = &(*B_out)(0,j);
01282         // Teuchos::BLAS::NRM2 returns a magnitude_type result on
01283         // Scalar inputs.
01284         normsAfterFirstPass[j] = blas.NRM2 (firstPassRank, B_j, 1);
01285       }
01286       // Test whether any of the norms dropped below the
01287       // reorthogonalization threshold.
01288       bool reorthogonalize = false;
01289       for (int j = 0; j < firstPassRank; ++j) {
01290         // If any column's norm decreased too much, mark this block
01291         // for reorthogonalization.  Note that this test will _not_
01292         // activate reorthogonalization if a column's norm before the
01293         // first project-and-normalize step was zero.  It _will_
01294         // activate reorthogonalization if the column's norm before
01295         // was not zero, but is zero now.
01296         const magnitude_type curThreshold =
01297           blockReorthogThreshold() * normsBeforeFirstPass[j];
01298         if (normsAfterFirstPass[j] < curThreshold) {
01299           reorthogonalize = true;
01300           break;
01301         }
01302       }
01303 
01304       // Notify the caller via callback about the need for
01305       // reorthogonalization.
01306       if (! reorthogCallback_.is_null()) {
01307         using Teuchos::arrayViewFromVector;
01308         (*reorthogCallback_) (arrayViewFromVector (normsBeforeFirstPass),
01309                               arrayViewFromVector (normsAfterFirstPass));
01310       }
01311 
01312       // Perform another Block Gram-Schmidt pass if necessary.  "Twice
01313       // is enough" (Kahan's theorem) for a Krylov method, unless
01314       // (using Stewart's term) there is an "orthogonalization fault"
01315       // (indicated by reorthogFault).
01316       //
01317       // NOTE (mfh 07 Nov 2010) For now, we include the entire block
01318       // of X, including possible random data (that was already
01319       // projected and normalized above).  It might make more sense
01320       // just to process the first firstPassRank columns of X.
01321       // However, the resulting reorthogonalization should still be
01322       // correct regardless.
01323       bool reorthogFault = false;
01324       // Indices of X at which there was an orthogonalization fault.
01325       std::vector<int> faultIndices;
01326       if (reorthogonalize) {
01327         using Teuchos::Copy;
01328         using Teuchos::NO_TRANS;
01329 
01330         // If we're using out-of-place normalization, copy X_out
01331         // (results of first project and normalize pass) back into
01332         // X_in, for the second project and normalize pass.
01333         if (outOfPlace) {
01334           MVT::Assign (X_out, X_in);
01335         }
01336 
01337         // C2 is only used internally, so we know that we are
01338         // allocating fresh and not recycling allocations.  Stating
01339         // this lets us save time checking dimensions.
01340         Teuchos::Array<mat_ptr> C2;
01341         allocateProjectionCoefficients (C2, Q, X_in, false);
01342 
01343         // Block Gram-Schmidt (again).  Delay updating the block
01344         // coefficients until we have the new normalization
01345         // coefficients, which we need in order to do the update.
01346         rawProject (X_in, Q, C2);
01347 
01348         // Coefficients for (re)normalization of X_in.
01349         RCP<mat_type> B2 (new mat_type (ncols_X, ncols_X));
01350 
01351         // Normalize X_in (into X_out, if working out of place).
01352         const int secondPassRank = outOfPlace ?
01353           normalizeOutOfPlace (X_in, X_out, B2) :
01354           normalize (X_in, B2);
01355         rank = secondPassRank; // Current rank of X
01356 
01357         // Update normalization coefficients.  We begin with copying
01358         // B_out, since the BLAS' _GEMM routine doesn't let us alias
01359         // its input and output arguments.
01360         mat_type B_copy (Copy, *B_out, B_out->numRows(), B_out->numCols());
01361         // B_out := B2 * B_out (where input B_out is in B_copy).
01362         const int err = B_out->multiply (NO_TRANS, NO_TRANS, SCT::one(),
01363                                          *B2, B_copy, SCT::zero());
01364         TEUCHOS_TEST_FOR_EXCEPTION(err != 0, std::logic_error,
01365                            "Teuchos::SerialDenseMatrix::multiply "
01366                            "returned err = " << err << " != 0");
01367         // Update the block coefficients from the projection step.  We
01368         // use B_copy for this (a copy of B_out, the first-pass
01369         // normalization coefficients).
01370         for (int k = 0; k < num_Q_blocks; ++k) {
01371           mat_type C_k_copy (Copy, *C[k], C[k]->numRows(), C[k]->numCols());
01372 
01373           // C[k] := C2[k]*B_copy + C[k].
01374           const int err1 = C[k]->multiply (NO_TRANS, NO_TRANS, SCT::one(),
01375                                           *C2[k], B_copy, SCT::one());
01376           TEUCHOS_TEST_FOR_EXCEPTION(err1 != 0, std::logic_error,
01377                              "Teuchos::SerialDenseMatrix::multiply "
01378                              "returned err = " << err1 << " != 0");
01379         }
01380         // Compute post-second-pass (pre-normalization) norms, using
01381         // B2 (the coefficients from the second normalization step) in
01382         // the same way as with B_out before.
01383         for (int j = 0; j < rank; ++j) {
01384           const Scalar* const B2_j = &(*B2)(0,j);
01385           normsAfterSecondPass[j] = blas.NRM2 (rank, B2_j, 1);
01386         }
01387         // Test whether any of the norms dropped below the
01388         // reorthogonalization threshold.  If so, it's an
01389         // orthogonalization fault, which requires expensive recovery.
01390         reorthogFault = false;
01391         for (int j = 0; j < rank; ++j) {
01392           const magnitude_type relativeLowerBound =
01393             blockReorthogThreshold() * normsAfterFirstPass[j];
01394           if (normsAfterSecondPass[j] < relativeLowerBound) {
01395             reorthogFault = true;
01396             faultIndices.push_back (j);
01397           }
01398         }
01399       } // if (reorthogonalize) // reorthogonalization pass
01400 
01401       if (reorthogFault) {
01402         if (throwOnReorthogFault_) {
01403           raiseReorthogFault (normsAfterFirstPass,
01404                               normsAfterSecondPass,
01405                               faultIndices);
01406         } else {
01407           // NOTE (mfh 19 Jan 2011) We could handle the fault here by
01408           // slowly reorthogonalizing, one vector at a time, the
01409           // offending vectors of X.  However, we choose not to
01410           // implement this for now.  If it becomes a problem, let us
01411           // know and we will prioritize implementing this.
01412           TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
01413                              "TsqrOrthoManagerImpl has not yet implemented"
01414                              " recovery from an orthogonalization fault.");
01415         }
01416       }
01417     } // if (reorthogonalizeBlocks_)
01418     return rank;
01419   }
01420 
01421 
01422   template<class Scalar, class MV>
01423   void
01424   TsqrOrthoManagerImpl<Scalar, MV>::
01425   raiseReorthogFault (const std::vector<magnitude_type>& normsAfterFirstPass,
01426                       const std::vector<magnitude_type>& normsAfterSecondPass,
01427                       const std::vector<int>& faultIndices)
01428   {
01429     using std::endl;
01430     typedef std::vector<int>::size_type size_type;
01431     std::ostringstream os;
01432 
01433     os << "Orthogonalization fault at the following column(s) of X:" << endl;
01434     os << "Column\tNorm decrease factor" << endl;
01435     for (size_type k = 0; k < faultIndices.size(); ++k) {
01436       const int index = faultIndices[k];
01437       const magnitude_type decreaseFactor =
01438         normsAfterSecondPass[index] / normsAfterFirstPass[index];
01439       os << index << "\t" << decreaseFactor << endl;
01440     }
01441     throw TsqrOrthoFault (os.str());
01442   }
01443 
01444   template<class Scalar, class MV>
01445   Teuchos::RCP<const Teuchos::ParameterList>
01446   TsqrOrthoManagerImpl<Scalar, MV>::getValidParameters () const
01447   {
01448     using Teuchos::ParameterList;
01449     using Teuchos::parameterList;
01450     using Teuchos::RCP;
01451 
01452     if (defaultParams_.is_null()) {
01453       RCP<ParameterList> params = parameterList ("TsqrOrthoManagerImpl");
01454       //
01455       // TSQR parameters (set as a sublist).
01456       //
01457       params->set ("TSQR implementation", *(tsqrAdaptor_.getValidParameters()),
01458                    "TSQR implementation parameters.");
01459       //
01460       // Orthogonalization parameters
01461       //
01462       const bool defaultRandomizeNullSpace = true;
01463       params->set ("randomizeNullSpace", defaultRandomizeNullSpace,
01464                    "Whether to fill in null space vectors with random data.");
01465 
01466       const bool defaultReorthogonalizeBlocks = true;
01467       params->set ("reorthogonalizeBlocks", defaultReorthogonalizeBlocks,
01468                    "Whether to do block reorthogonalization as necessary.");
01469 
01470       // This parameter corresponds to the "blk_tol_" parameter in
01471       // Belos' DGKSOrthoManager.  We choose the same default value.
01472       const magnitude_type defaultBlockReorthogThreshold =
01473         magnitude_type(10) * SCTM::squareroot (SCTM::eps());
01474       params->set ("blockReorthogThreshold", defaultBlockReorthogThreshold,
01475                    "If reorthogonalizeBlocks==true, and if the norm of "
01476                    "any column within a block decreases by this much or "
01477                    "more after orthogonalization, we reorthogonalize.");
01478 
01479       // This parameter corresponds to the "sing_tol_" parameter in
01480       // Belos' DGKSOrthoManager.  We choose the same default value.
01481       const magnitude_type defaultRelativeRankTolerance =
01482         Teuchos::as<magnitude_type>(10) * SCTM::eps();
01483 
01484       // If the relative rank tolerance is zero, then we will always
01485       // declare blocks to be numerically full rank, as long as no
01486       // singular values are zero.
01487       params->set ("relativeRankTolerance", defaultRelativeRankTolerance,
01488                    "Relative tolerance to determine the numerical rank of a "
01489                    "block when normalizing.");
01490 
01491       // See Stewart's 2008 paper on block Gram-Schmidt for a definition
01492       // of "orthogonalization fault."
01493       const bool defaultThrowOnReorthogFault = true;
01494       params->set ("throwOnReorthogFault", defaultThrowOnReorthogFault,
01495                    "Whether to throw an exception if an orthogonalization "
01496                    "fault occurs.  This only matters if reorthogonalization "
01497                    "is enabled (reorthogonalizeBlocks==true).");
01498 
01499       const bool defaultForceNonnegativeDiagonal = false;
01500       params->set ("forceNonnegativeDiagonal", defaultForceNonnegativeDiagonal,
01501                    "Whether to force the R factor produced by the normalization "
01502                    "step to have a nonnegative diagonal.");
01503 
01504       defaultParams_ = params;
01505     }
01506     return defaultParams_;
01507   }
01508 
01509   template<class Scalar, class MV>
01510   Teuchos::RCP<const Teuchos::ParameterList>
01511   TsqrOrthoManagerImpl<Scalar, MV>::getFastParameters ()
01512   {
01513     using Teuchos::ParameterList;
01514     using Teuchos::RCP;
01515     using Teuchos::rcp;
01516 
01517     RCP<const ParameterList> defaultParams = getValidParameters();
01518     // Start with a clone of the default parameters.
01519     RCP<ParameterList> params = rcp (new ParameterList (*defaultParams));
01520 
01521     // Disable reorthogonalization and randomization of the null
01522     // space basis.  Reorthogonalization tolerances don't matter,
01523     // since we aren't reorthogonalizing blocks in the fast
01524     // settings.  We can leave the default values.  Also,
01525     // (re)orthogonalization faults may only occur with
01526     // reorthogonalization, so we don't have to worry about the
01527     // "throwOnReorthogFault" setting.
01528     const bool randomizeNullSpace = false;
01529     params->set ("randomizeNullSpace", randomizeNullSpace);
01530     const bool reorthogonalizeBlocks = false;
01531     params->set ("reorthogonalizeBlocks", reorthogonalizeBlocks);
01532 
01533     return params;
01534   }
01535 
01536   template<class Scalar, class MV>
01537   int
01538   TsqrOrthoManagerImpl<Scalar, MV>::
01539   rawNormalize (MV& X,
01540                 MV& Q,
01541                 Teuchos::SerialDenseMatrix<int, Scalar>& B)
01542   {
01543     int rank;
01544     try {
01545       // This call only computes the QR factorization X = Q B.
01546       // It doesn't compute the rank of X.  That comes from
01547       // revealRank() below.
01548       tsqrAdaptor_.factorExplicit (X, Q, B, forceNonnegativeDiagonal_);
01549       // This call will only modify *B if *B on input is not of full
01550       // numerical rank.
01551       rank = tsqrAdaptor_.revealRank (Q, B, relativeRankTolerance_);
01552     } catch (std::exception& e) {
01553       throw TsqrOrthoError (e.what()); // Toss the exception up the chain.
01554     }
01555     return rank;
01556   }
01557 
01558   template<class Scalar, class MV>
01559   int
01560   TsqrOrthoManagerImpl<Scalar, MV>::
01561   normalizeOne (MV& X,
01562                 Teuchos::RCP<Teuchos::SerialDenseMatrix<int, Scalar> > B) const
01563   {
01564     // Make space for the normalization coefficient.  This will either
01565     // be a freshly allocated matrix (if B is null), or a view of the
01566     // 1x1 upper left submatrix of *B (if B is not null).
01567     mat_ptr B_out;
01568     if (B.is_null()) {
01569       B_out = rcp (new mat_type (1, 1));
01570     } else {
01571       const int theNumRows = B->numRows ();
01572       const int theNumCols = B->numCols ();
01573       TEUCHOS_TEST_FOR_EXCEPTION(
01574         theNumRows < 1 || theNumCols < 1, std::invalid_argument,
01575         "normalizeOne: Input matrix B must be at least 1 x 1, but "
01576         "is instead " << theNumRows << " x " << theNumCols << ".");
01577       // Create a view of the 1x1 upper left submatrix of *B.
01578       B_out = rcp (new mat_type (Teuchos::View, *B, 1, 1));
01579     }
01580 
01581     // Compute the norm of X, and write the result to B_out.
01582     std::vector<magnitude_type> theNorm (1, SCTM::zero());
01583     MVT::MvNorm (X, theNorm);
01584     (*B_out)(0,0) = theNorm[0];
01585 
01586     if (B.is_null()) {
01587       // The input matrix B is null, so assign B_out to it.  If B was
01588       // not null on input, then B_out is a view of *B, so we don't
01589       // have to do anything here.  Note that SerialDenseMatrix uses
01590       // raw pointers to store data and represent views, so we have to
01591       // be careful about scope.
01592       B = B_out;
01593     }
01594 
01595     // Scale X by its norm, if its norm is zero.  Otherwise, do the
01596     // right thing based on whether the user wants us to fill the null
01597     // space with random vectors.
01598     if (theNorm[0] == SCTM::zero()) {
01599       // Make a view of the first column of Q, fill it with random
01600       // data, and normalize it.  Throw away the resulting norm.
01601       if (randomizeNullSpace_) {
01602         MVT::MvRandom(X);
01603         MVT::MvNorm (X, theNorm);
01604         if (theNorm[0] == SCTM::zero()) {
01605           // It is possible that a random vector could have all zero
01606           // entries, but unlikely.  We could try again, but it's also
01607           // possible that multiple tries could result in zero
01608           // vectors.  We choose instead to give up.
01609           throw TsqrOrthoError("normalizeOne: a supposedly random "
01610                                "vector has norm zero!");
01611         } else {
01612           // NOTE (mfh 09 Nov 2010) I'm assuming that dividing a
01613           // Scalar by a magnitude_type is defined and that it results
01614           // in a Scalar.
01615           const Scalar alpha = SCT::one() / theNorm[0];
01616           MVT::MvScale (X, alpha);
01617         }
01618       }
01619       return 0; // The rank of the matrix (actually just one vector) X.
01620     } else {
01621       // NOTE (mfh 09 Nov 2010) I'm assuming that dividing a Scalar by
01622       // a magnitude_type is defined and that it results in a Scalar.
01623       const Scalar alpha = SCT::one() / theNorm[0];
01624       MVT::MvScale (X, alpha);
01625       return 1; // The rank of the matrix (actually just one vector) X.
01626     }
01627   }
01628 
01629 
01630   template<class Scalar, class MV>
01631   void
01632   TsqrOrthoManagerImpl<Scalar, MV>::
01633   rawProject (MV& X,
01634               Teuchos::ArrayView<Teuchos::RCP<const MV> > Q,
01635               Teuchos::ArrayView<Teuchos::RCP<Teuchos::SerialDenseMatrix<int, Scalar> > > C) const
01636   {
01637 #ifdef BELOS_TEUCHOS_TIME_MONITOR
01638     Teuchos::TimeMonitor timerMonitorNormalize(*timerProject_);
01639 #endif // BELOS_TEUCHOS_TIME_MONITOR
01640 
01641     // "Modified Gram-Schmidt" version of Block Gram-Schmidt.
01642     const int num_Q_blocks = Q.size();
01643     for (int i = 0; i < num_Q_blocks; ++i)
01644       {
01645         // TEUCHOS_TEST_FOR_EXCEPTION(C[i].is_null(), std::logic_error,
01646         //                 "TsqrOrthoManagerImpl::rawProject(): C["
01647         //                 << i << "] is null");
01648         // TEUCHOS_TEST_FOR_EXCEPTION(Q[i].is_null(), std::logic_error,
01649         //                 "TsqrOrthoManagerImpl::rawProject(): Q["
01650         //                 << i << "] is null");
01651         mat_type& Ci = *C[i];
01652         const MV& Qi = *Q[i];
01653         innerProd (Qi, X, Ci);
01654         MVT::MvTimesMatAddMv (-SCT::one(), Qi, Ci, SCT::one(), X);
01655       }
01656   }
01657 
01658 
01659   template<class Scalar, class MV>
01660   void
01661   TsqrOrthoManagerImpl<Scalar, MV>::
01662   rawProject (MV& X,
01663               const Teuchos::RCP<const MV>& Q,
01664               const Teuchos::RCP<Teuchos::SerialDenseMatrix<int, Scalar> >& C) const
01665   {
01666 #ifdef BELOS_TEUCHOS_TIME_MONITOR
01667     Teuchos::TimeMonitor timerMonitorNormalize(*timerProject_);
01668 #endif // BELOS_TEUCHOS_TIME_MONITOR
01669 
01670     // Block Gram-Schmidt
01671     innerProd (*Q, X, *C);
01672     MVT::MvTimesMatAddMv (-SCT::one(), *Q, *C, SCT::one(), X);
01673   }
01674 
01675   template<class Scalar, class MV>
01676   int
01677   TsqrOrthoManagerImpl<Scalar, MV>::
01678   normalizeImpl (MV& X,
01679                  MV& Q,
01680                  Teuchos::RCP<Teuchos::SerialDenseMatrix<int, Scalar> > B,
01681                  const bool outOfPlace)
01682   {
01683     using Teuchos::Range1D;
01684     using Teuchos::RCP;
01685     using Teuchos::rcp;
01686     using Teuchos::ScalarTraits;
01687     using Teuchos::tuple;
01688 
01689     const int numCols = MVT::GetNumberVecs (X);
01690     if (numCols == 0) {
01691       return 0; // Fast exit for an empty input matrix.
01692     }
01693 
01694     // We allow Q to have more columns than X.  In that case, we only
01695     // touch the first numCols columns of Q.
01696     TEUCHOS_TEST_FOR_EXCEPTION(
01697       MVT::GetNumberVecs (Q) < numCols, std::invalid_argument,
01698       "TsqrOrthoManagerImpl::normalizeImpl: Q has "
01699       << MVT::GetNumberVecs(Q) << " columns.  This is too "
01700       "few, since X has " << numCols << " columns.");
01701     // TSQR wants a Q with the same number of columns as X, so have it
01702     // work on a nonconstant view of Q with the same number of columns
01703     // as X.
01704     RCP<MV> Q_view = MVT::CloneViewNonConst (Q, Range1D (0, numCols-1));
01705 
01706     // Make space for the normalization coefficients.  This will
01707     // either be a freshly allocated matrix (if B is null), or a view
01708     // of the appropriately sized upper left submatrix of *B (if B is
01709     // not null).
01710     mat_ptr B_out;
01711     if (B.is_null ()) {
01712       B_out = rcp (new mat_type (numCols, numCols));
01713     } else {
01714       // Make sure that B is no smaller than numCols x numCols.
01715       TEUCHOS_TEST_FOR_EXCEPTION(
01716         B->numRows() < numCols || B->numCols() < numCols, std::invalid_argument,
01717         "TsqrOrthoManagerImpl::normalizeImpl: Input matrix B must be at least "
01718         << numCols << " x " << numCols << ", but is instead " << B->numRows ()
01719         << " x " << B->numCols() << ".");
01720       // Create a view of the numCols x numCols upper left submatrix
01721       // of *B.  TSQR will write the normalization coefficients there.
01722       B_out = rcp (new mat_type (Teuchos::View, *B, numCols, numCols));
01723     }
01724 
01725     // Compute rank-revealing decomposition (in this case, TSQR of X
01726     // followed by SVD of the R factor and appropriate updating of the
01727     // resulting Q factor) of X.  X is modified in place and filled
01728     // with garbage, and Q_view contains the resulting explicit Q
01729     // factor.  Later, we will copy this back into X.
01730     //
01731     // The matrix *B_out will only be upper triangular if X is of full
01732     // numerical rank.  Otherwise, the entries below the diagonal may
01733     // be filled in as well.
01734     const int rank = rawNormalize (X, *Q_view, *B_out);
01735     if (B.is_null ()) {
01736       // The input matrix B is null, so assign B_out to it.  If B was
01737       // not null on input, then B_out is a view of *B, so we don't
01738       // have to do anything here.  Note that SerialDenseMatrix uses
01739       // raw pointers to store data and represent views, so we have to
01740       // be careful about scope.
01741       B = B_out;
01742     }
01743 
01744     TEUCHOS_TEST_FOR_EXCEPTION(
01745       rank < 0 || rank > numCols, std::logic_error,
01746       "Belos::TsqrOrthoManagerImpl::normalizeImpl: rawNormalize returned rank "
01747       " = " << rank << " for a matrix X with " << numCols << " columns.  "
01748       "Please report this bug to the Belos developers.");
01749 
01750     // If X is full rank or we don't want to replace its null space
01751     // basis with random vectors, then we're done.
01752     if (rank == numCols || ! randomizeNullSpace_) {
01753       // If we're supposed to be working in place in X, copy the
01754       // results back from Q_view into X.
01755       if (! outOfPlace) {
01756         MVT::Assign (*Q_view, X);
01757       }
01758       return rank;
01759     }
01760 
01761     if (randomizeNullSpace_ && rank < numCols) {
01762       // X wasn't full rank.  Replace the null space basis of X (in
01763       // the last numCols-rank columns of Q_view) with random data,
01764       // project it against the first rank columns of Q_view, and
01765       // normalize.
01766       //
01767       // Number of columns to fill with random data.
01768       const int nullSpaceNumCols = numCols - rank;
01769       // Inclusive range of indices of columns of X to fill with
01770       // random data.
01771       Range1D nullSpaceIndices (rank, numCols-1);
01772 
01773       // rawNormalize wrote the null space basis vectors into Q_view.
01774       // We continue to work in place in Q_view by writing the random
01775       // data there and (if there is a nontrival column space)
01776       // projecting in place against the column space basis vectors
01777       // (also in Q_view).
01778       RCP<MV> Q_null = MVT::CloneViewNonConst (*Q_view, nullSpaceIndices);
01779       // Replace the null space basis with random data.
01780       MVT::MvRandom (*Q_null);
01781 
01782       // Make sure that the "random" data isn't all zeros.  This is
01783       // statistically nearly impossible, but we test for debugging
01784       // purposes.
01785       {
01786         std::vector<magnitude_type> norms (MVT::GetNumberVecs (*Q_null));
01787         MVT::MvNorm (*Q_null, norms);
01788 
01789         bool anyZero = false;
01790         typedef typename std::vector<magnitude_type>::const_iterator iter_type;
01791         for (iter_type it = norms.begin(); it != norms.end(); ++it) {
01792           if (*it == SCTM::zero()) {
01793             anyZero = true;
01794           }
01795         }
01796         if (anyZero) {
01797           std::ostringstream os;
01798           os << "TsqrOrthoManagerImpl::normalizeImpl: "
01799             "We are being asked to randomize the null space, for a matrix "
01800             "with " << numCols << " columns and reported column rank "
01801              << rank << ".  The inclusive range of columns to fill with "
01802             "random data is [" << nullSpaceIndices.lbound() << ","
01803              << nullSpaceIndices.ubound() << "].  After filling the null "
01804             "space vectors with random numbers, at least one of the vectors"
01805             " has norm zero.  Here are the norms of all the null space "
01806             "vectors: [";
01807           for (iter_type it = norms.begin(); it != norms.end(); ++it) {
01808             os << *it;
01809             if (it+1 != norms.end())
01810               os << ", ";
01811           }
01812           os << "].)  There is a tiny probability that this could happen "
01813             "randomly, but it is likely a bug.  Please report it to the "
01814             "Belos developers, especially if you are able to reproduce the "
01815             "behavior.";
01816           TEUCHOS_TEST_FOR_EXCEPTION(anyZero, TsqrOrthoError, os.str());
01817         }
01818       }
01819 
01820       if (rank > 0) {
01821         // Project the random data against the column space basis of
01822         // X, using a simple block projection ("Block Classical
01823         // Gram-Schmidt").  This is accurate because we've already
01824         // orthogonalized the column space basis of X nearly to
01825         // machine precision via a QR factorization (TSQR) with
01826         // accuracy comparable to Householder QR.
01827         RCP<const MV> Q_col = MVT::CloneView (*Q_view, Range1D (0, rank-1));
01828 
01829         // Temporary storage for projection coefficients.  We don't
01830         // need to keep them, since they represent the null space
01831         // basis (for which the coefficients are logically zero).
01832         mat_ptr C_null (new mat_type (rank, nullSpaceNumCols));
01833         rawProject (*Q_null, Q_col, C_null);
01834       }
01835       // Normalize the projected random vectors, so that they are
01836       // mutually orthogonal (as well as orthogonal to the column
01837       // space basis of X).  We use X for the output of the
01838       // normalization: for out-of-place normalization (outOfPlace ==
01839       // true), X is overwritten with "invalid values" anyway, and for
01840       // in-place normalization (outOfPlace == false), we want the
01841       // result to be in X anyway.
01842       RCP<MV> X_null = MVT::CloneViewNonConst (X, nullSpaceIndices);
01843       // Normalization coefficients for projected random vectors.
01844       // Will be thrown away.
01845       mat_type B_null (nullSpaceNumCols, nullSpaceNumCols);
01846       // Write the normalized vectors to X_null (in X).
01847       const int nullSpaceBasisRank = rawNormalize (*Q_null, *X_null, B_null);
01848 
01849       // It's possible, but unlikely, that X_null doesn't have full
01850       // rank (after the projection step).  We could recursively fill
01851       // in more random vectors until we finally get a full rank
01852       // matrix, but instead we just throw an exception.
01853       //
01854       // NOTE (mfh 08 Nov 2010) Perhaps we should deal with this case
01855       // more elegantly.  Recursion might be one way to solve it, but
01856       // be sure to check that the recursion will terminate.  We could
01857       // do this by supplying an additional argument to rawNormalize,
01858       // which is the null space basis rank from the previous
01859       // iteration.  The rank has to decrease each time, or the
01860       // recursion may go on forever.
01861       if (nullSpaceBasisRank < nullSpaceNumCols) {
01862         std::vector<magnitude_type> norms (MVT::GetNumberVecs(*X_null));
01863         MVT::MvNorm (*X_null, norms);
01864         std::ostringstream os;
01865         os << "TsqrOrthoManagerImpl::normalizeImpl: "
01866            << "We are being asked to randomize the null space, "
01867            << "for a matrix with " << numCols << " columns and "
01868            << "column rank " << rank << ".  After projecting and "
01869            << "normalizing the generated random vectors, they "
01870            << "only have rank " << nullSpaceBasisRank << ".  They"
01871            << " should have full rank " << nullSpaceNumCols
01872            << ".  (The inclusive range of columns to fill with "
01873            << "random data is [" << nullSpaceIndices.lbound()
01874            << "," << nullSpaceIndices.ubound() << "].  The "
01875            << "column norms of the resulting Q factor are: [";
01876         for (typename std::vector<magnitude_type>::size_type k = 0;
01877              k < norms.size(); ++k) {
01878           os << norms[k];
01879           if (k != norms.size()-1) {
01880             os << ", ";
01881           }
01882         }
01883         os << "].)  There is a tiny probability that this could "
01884            << "happen randomly, but it is likely a bug.  Please "
01885            << "report it to the Belos developers, especially if "
01886            << "you are able to reproduce the behavior.";
01887 
01888         TEUCHOS_TEST_FOR_EXCEPTION(nullSpaceBasisRank < nullSpaceNumCols,
01889          TsqrOrthoError, os.str ());
01890       }
01891       // If we're normalizing out of place, copy the X_null
01892       // vectors back into Q_null; the Q_col vectors are already
01893       // where they are supposed to be in that case.
01894       //
01895       // If we're normalizing in place, leave X_null alone (it's
01896       // already where it needs to be, in X), but copy Q_col back
01897       // into the first rank columns of X.
01898       if (outOfPlace) {
01899         MVT::Assign (*X_null, *Q_null);
01900       } else if (rank > 0) {
01901         // MVT::Assign() doesn't accept empty ranges of columns.
01902         RCP<const MV> Q_col = MVT::CloneView (*Q_view, Range1D (0, rank-1));
01903         RCP<MV> X_col = MVT::CloneViewNonConst (X, Range1D (0, rank-1));
01904         MVT::Assign (*Q_col, *X_col);
01905       }
01906     }
01907     return rank;
01908   }
01909 
01910 
01911   template<class Scalar, class MV>
01912   void
01913   TsqrOrthoManagerImpl<Scalar, MV>::
01914   checkProjectionDims (int& ncols_X,
01915                        int& num_Q_blocks,
01916                        int& ncols_Q_total,
01917                        const MV& X,
01918                        Teuchos::ArrayView<Teuchos::RCP<const MV> > Q) const
01919   {
01920     // First assign to temporary values, so the function won't
01921     // commit any externally visible side effects unless it will
01922     // return normally (without throwing an exception).  (I'm being
01923     // cautious; MVT::GetNumberVecs() probably shouldn't have any
01924     // externally visible side effects, unless it is logging to a
01925     // file or something.)
01926     int the_ncols_X, the_num_Q_blocks, the_ncols_Q_total;
01927     the_num_Q_blocks = Q.size();
01928     the_ncols_X = MVT::GetNumberVecs (X);
01929 
01930     // Compute the total number of columns of all the Q[i] blocks.
01931     the_ncols_Q_total = 0;
01932     // You should be angry if your compiler doesn't support type
01933     // inference ("auto").  That's why I need this awful typedef.
01934     using Teuchos::ArrayView;
01935     using Teuchos::RCP;
01936     typedef typename ArrayView<RCP<const MV> >::const_iterator iter_type;
01937     for (iter_type it = Q.begin(); it != Q.end(); ++it) {
01938       const MV& Qi = **it;
01939       the_ncols_Q_total += MVT::GetNumberVecs (Qi);
01940     }
01941 
01942     // Commit temporary values to the output arguments.
01943     ncols_X = the_ncols_X;
01944     num_Q_blocks = the_num_Q_blocks;
01945     ncols_Q_total = the_ncols_Q_total;
01946   }
01947 
01948 } // namespace Belos
01949 
01950 #endif // __BelosTsqrOrthoManagerImpl_hpp
01951 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines