Belos Package Browser (Single Doxygen Collection) Development
BelosTsqrOrthoManagerImpl.hpp
Go to the documentation of this file.
00001 //@HEADER
00002 // ************************************************************************
00003 //
00004 //                 Belos: Block Linear Solvers Package
00005 //                  Copyright 2004 Sandia Corporation
00006 //
00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00008 // the U.S. Government retains certain rights in this software.
00009 //
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00038 //
00039 // ************************************************************************
00040 //@HEADER
00041 
00045 #ifndef __BelosTsqrOrthoManagerImpl_hpp
00046 #define __BelosTsqrOrthoManagerImpl_hpp
00047 
00048 #include "BelosConfigDefs.hpp" // HAVE_BELOS_TSQR
00049 #include "BelosMultiVecTraits.hpp"
00050 #include "BelosOrthoManager.hpp" // OrthoError, etc.
00051 
00052 #include "Teuchos_as.hpp"
00053 #include "Teuchos_LAPACK.hpp"
00054 #include "Teuchos_ParameterList.hpp"
00055 #include "Teuchos_ParameterListAcceptorDefaultBase.hpp"
00056 #ifdef BELOS_TEUCHOS_TIME_MONITOR
00057 #  include "Teuchos_TimeMonitor.hpp"
00058 #endif // BELOS_TEUCHOS_TIME_MONITOR
00059 #include <algorithm>
00060 #include <functional> // std::binary_function
00061 
00062 namespace Belos {
00063 
00067   class TsqrOrthoError : public OrthoError {
00068   public: 
00069     TsqrOrthoError (const std::string& what_arg) : 
00070       OrthoError (what_arg) {}
00071   };
00072 
00092   class TsqrOrthoFault : public OrthoError {
00093   public: 
00094     TsqrOrthoFault (const std::string& what_arg) : 
00095       OrthoError (what_arg) {}
00096   };
00097 
00132   template<class Scalar>
00133   class ReorthogonalizationCallback : 
00134     public std::binary_function<Teuchos::ArrayView<typename Teuchos::ScalarTraits<Scalar>::magnitudeType>, 
00135         Teuchos::ArrayView<typename Teuchos::ScalarTraits<Scalar>::magnitudeType>,
00136         void>
00137   {
00138   public:
00139     typedef Scalar scalar_type;
00140     typedef typename Teuchos::ScalarTraits<Scalar>::magnitudeType magnitude_type;
00141 
00146     virtual void
00147     operator() (Teuchos::ArrayView<magnitude_type> normsBeforeFirstPass,
00148     Teuchos::ArrayView<magnitude_type> normsAfterFirstPass) = 0;
00149   };
00150 
00151 
00184   template<class Scalar, class MV>
00185   class TsqrOrthoManagerImpl : 
00186     public Teuchos::ParameterListAcceptorDefaultBase {
00187   public:
00188     typedef Scalar scalar_type;
00189     typedef typename Teuchos::ScalarTraits<Scalar>::magnitudeType magnitude_type;
00190     typedef MV multivector_type;
00193     typedef Teuchos::SerialDenseMatrix<int, Scalar> mat_type;
00194     typedef Teuchos::RCP<mat_type> mat_ptr;
00195 
00196   private:
00197     typedef Teuchos::ScalarTraits<Scalar> SCT;
00198     typedef Teuchos::ScalarTraits<magnitude_type> SCTM;
00199     typedef MultiVecTraits<Scalar, MV> MVT;
00200     typedef typename MVT::tsqr_adaptor_type tsqr_adaptor_type;
00201 
00202   public:
00210     Teuchos::RCP<const Teuchos::ParameterList> getValidParameters () const;
00211 
00213     void setParameterList (const Teuchos::RCP<Teuchos::ParameterList>& params);
00214 
00225     Teuchos::RCP<const Teuchos::ParameterList> getFastParameters ();
00226 
00243     TsqrOrthoManagerImpl (const Teuchos::RCP<Teuchos::ParameterList>& params,
00244         const std::string& label);
00245 
00250     TsqrOrthoManagerImpl (const std::string& label);
00251     
00273     void 
00274     setReorthogonalizationCallback (const Teuchos::RCP<ReorthogonalizationCallback<Scalar> >& callback)
00275     {
00276       reorthogCallback_ = callback;
00277     }
00278 
00286     void setLabel (const std::string& label) { 
00287       if (label != label_) {
00288   label_ = label; 
00289 
00290 #ifdef BELOS_TEUCHOS_TIME_MONITOR
00291   clearTimer (label, "All orthogonalization");
00292   clearTimer (label, "Projection");
00293   clearTimer (label, "Normalization");
00294 
00295   timerOrtho_ = makeTimer (label, "All orthogonalization");
00296   timerProject_ = makeTimer (label, "Projection");
00297   timerNormalize_ = makeTimer (label, "Normalization");
00298 #endif // BELOS_TEUCHOS_TIME_MONITOR  
00299       }
00300     }
00301 
00303     const std::string& getLabel () const { return label_; }
00304 
00313     void 
00314     innerProd (const MV& X, const MV& Y, mat_type& Z) const
00315     {
00316       MVT::MvTransMv (SCT::one(), X, Y, Z);
00317     }
00318 
00336     void
00337     norm (const MV& X, std::vector<magnitude_type>& normVec) const;
00338 
00348     void 
00349     project (MV& X, 
00350        Teuchos::Array<mat_ptr> C, 
00351        Teuchos::ArrayView<Teuchos::RCP<const MV> > Q);
00352 
00366     int normalize (MV& X, mat_ptr B);
00367 
00386     int 
00387     normalizeOutOfPlace (MV& X, MV& Q, mat_ptr B);
00388 
00402     int 
00403     projectAndNormalize (MV &X,
00404        Teuchos::Array<mat_ptr> C,
00405        mat_ptr B,
00406        Teuchos::ArrayView<Teuchos::RCP<const MV> > Q)
00407     {
00408       // "false" means we work on X in place.  The second argument is
00409       // not read or written in that case.
00410       return projectAndNormalizeImpl (X, X, false, C, B, Q);
00411     }
00412 
00432     int 
00433     projectAndNormalizeOutOfPlace (MV& X_in, 
00434            MV& X_out,
00435            Teuchos::Array<mat_ptr> C,
00436            mat_ptr B,
00437            Teuchos::ArrayView<Teuchos::RCP<const MV> > Q)
00438     {
00439       // "true" means we work on X_in out of place, writing the
00440       // results into X_out.
00441       return projectAndNormalizeImpl (X_in, X_out, true, C, B, Q);
00442     }
00443     
00448     magnitude_type 
00449     orthonormError (const MV &X) const
00450     {
00451       const Scalar ONE = SCT::one();
00452       const int ncols = MVT::GetNumberVecs(X);
00453       mat_type XTX (ncols, ncols);
00454       innerProd (X, X, XTX);
00455       for (int k = 0; k < ncols; ++k) {
00456   XTX(k,k) -= ONE;
00457       }
00458       return XTX.normFrobenius();
00459     }
00460 
00462     magnitude_type 
00463     orthogError (const MV &X1, 
00464      const MV &X2) const
00465     {
00466       const int ncols_X1 = MVT::GetNumberVecs (X1);
00467       const int ncols_X2 = MVT::GetNumberVecs (X2);
00468       mat_type X1_T_X2 (ncols_X1, ncols_X2);
00469       innerProd (X1, X2, X1_T_X2);
00470       return X1_T_X2.normFrobenius();
00471     }
00472 
00476     magnitude_type blockReorthogThreshold() const { return blockReorthogThreshold_; }
00477 
00480     magnitude_type relativeRankTolerance() const { return relativeRankTolerance_; }
00481 
00482   private:
00484     Teuchos::RCP<Teuchos::ParameterList> params_;
00485 
00487     mutable Teuchos::RCP<const Teuchos::ParameterList> defaultParams_;
00488 
00490     std::string label_;
00491 
00493     tsqr_adaptor_type tsqrAdaptor_;
00494 
00504     Teuchos::RCP<MV> Q_;
00505 
00507     magnitude_type eps_;
00508 
00512     bool randomizeNullSpace_;
00513 
00519     bool reorthogonalizeBlocks_;
00520 
00524     bool throwOnReorthogFault_;
00525 
00527     magnitude_type blockReorthogThreshold_;
00528 
00530     magnitude_type relativeRankTolerance_;
00531 
00538     bool forceNonnegativeDiagonal_;
00539 
00540 #ifdef BELOS_TEUCHOS_TIME_MONITOR
00541 
00542     Teuchos::RCP<Teuchos::Time> timerOrtho_;
00543 
00545     Teuchos::RCP<Teuchos::Time> timerProject_;
00546 
00548     Teuchos::RCP<Teuchos::Time> timerNormalize_;
00549 
00551     Teuchos::RCP<ReorthogonalizationCallback<Scalar> > reorthogCallback_;
00552 
00561     static Teuchos::RCP<Teuchos::Time>
00562     makeTimer (const std::string& prefix, 
00563          const std::string& timerName)
00564     {
00565       const std::string timerLabel = 
00566   prefix.empty() ? timerName : (prefix + ": " + timerName);
00567       return Teuchos::TimeMonitor::getNewCounter (timerLabel);
00568     }
00569 
00575     void
00576     clearTimer (const std::string& prefix, 
00577     const std::string& timerName)
00578     {
00579       const std::string timerLabel = 
00580   prefix.empty() ? timerName : (prefix + ": " + timerName);
00581       Teuchos::TimeMonitor::clearCounter (timerLabel);
00582     }
00583 #endif // BELOS_TEUCHOS_TIME_MONITOR
00584 
00586     void
00587     raiseReorthogFault (const std::vector<magnitude_type>& normsAfterFirstPass,
00588       const std::vector<magnitude_type>& normsAfterSecondPass,
00589       const std::vector<int>& faultIndices);
00590 
00600     void
00601     checkProjectionDims (int& ncols_X, 
00602        int& num_Q_blocks,
00603        int& ncols_Q_total,
00604        const MV& X, 
00605        Teuchos::ArrayView<Teuchos::RCP<const MV> > Q) const;
00606 
00617     void
00618     allocateProjectionCoefficients (Teuchos::Array<mat_ptr>& C, 
00619             Teuchos::ArrayView<Teuchos::RCP<const MV> > Q,
00620             const MV& X,
00621             const bool attemptToRecycle = true) const;
00622 
00631     int 
00632     projectAndNormalizeImpl (MV& X_in, 
00633            MV& X_out,
00634            const bool outOfPlace,
00635            Teuchos::Array<mat_ptr> C,
00636            mat_ptr B,
00637            Teuchos::ArrayView<Teuchos::RCP<const MV> > Q);
00638 
00644     void
00645     rawProject (MV& X, 
00646     Teuchos::ArrayView<Teuchos::RCP<const MV> > Q,
00647     Teuchos::ArrayView<mat_ptr> C) const;
00648 
00650     void
00651     rawProject (MV& X, 
00652     const Teuchos::RCP<const MV>& Q, 
00653     const mat_ptr& C) const;
00654 
00681     int rawNormalize (MV& X, MV& Q, mat_type& B);
00682 
00700     int normalizeOne (MV& X, mat_ptr B) const;
00701 
00729     int normalizeImpl (MV& X, MV& Q, mat_ptr B, const bool outOfPlace);
00730   };
00731 
00732   template<class Scalar, class MV>
00733   void
00734   TsqrOrthoManagerImpl<Scalar, MV>::
00735   setParameterList (const Teuchos::RCP<Teuchos::ParameterList>& params)
00736   {
00737     using Teuchos::ParameterList;
00738     using Teuchos::parameterList;
00739     using Teuchos::RCP;
00740     using Teuchos::sublist;
00741     typedef magnitude_type M; // abbreviation.
00742 
00743     RCP<const ParameterList> defaultParams = getValidParameters ();
00744     // Sublist of TSQR implementation parameters; to get below.
00745     RCP<ParameterList> tsqrParams;
00746 
00747     RCP<ParameterList> theParams;
00748     if (params.is_null()) {
00749       theParams = parameterList (*defaultParams);
00750     } else {
00751       theParams = params;
00752 
00753       // Don't call validateParametersAndSetDefaults(); we prefer to
00754       // ignore parameters that we don't recognize, at least for now.
00755       // However, we do fill in missing parameters with defaults.
00756 
00757       randomizeNullSpace_ = 
00758   theParams->get<bool> ("randomizeNullSpace", 
00759             defaultParams->get<bool> ("randomizeNullSpace"));
00760       reorthogonalizeBlocks_ = 
00761   theParams->get<bool> ("reorthogonalizeBlocks", 
00762             defaultParams->get<bool> ("reorthogonalizeBlocks"));
00763       throwOnReorthogFault_ = 
00764   theParams->get<bool> ("throwOnReorthogFault", 
00765             defaultParams->get<bool> ("throwOnReorthogFault"));
00766       blockReorthogThreshold_ = 
00767   theParams->get<M> ("blockReorthogThreshold",
00768          defaultParams->get<M> ("blockReorthogThreshold"));
00769       relativeRankTolerance_ = 
00770   theParams->get<M> ("relativeRankTolerance", 
00771          defaultParams->get<M> ("relativeRankTolerance"));
00772       forceNonnegativeDiagonal_ = 
00773   theParams->get<bool> ("forceNonnegativeDiagonal", 
00774             defaultParams->get<bool> ("forceNonnegativeDiagonal"));
00775 
00776       // Get the sublist of TSQR implementation parameters.  Use the
00777       // default sublist if one isn't provided.
00778       if (! theParams->isSublist ("TSQR implementation")) {
00779   theParams->set ("TSQR implementation", 
00780       defaultParams->sublist ("TSQR implementation"));
00781       }
00782       tsqrParams = sublist (theParams, "TSQR implementation", true);
00783     }
00784 
00785     // Send the TSQR implementation parameters to the TSQR adaptor.
00786     tsqrAdaptor_.setParameterList (tsqrParams);
00787 
00788     // Save the input parameter list.
00789     setMyParamList (theParams);
00790   }
00791  
00792   template<class Scalar, class MV>
00793   TsqrOrthoManagerImpl<Scalar, MV>::
00794   TsqrOrthoManagerImpl (const Teuchos::RCP<Teuchos::ParameterList>& params,
00795       const std::string& label) :
00796     label_ (label),
00797     Q_ (Teuchos::null),               // Initialized on demand
00798     eps_ (SCTM::eps()),               // Machine precision
00799     randomizeNullSpace_ (true),
00800     reorthogonalizeBlocks_ (true),
00801     throwOnReorthogFault_ (false),
00802     blockReorthogThreshold_ (0),
00803     relativeRankTolerance_ (0),
00804     forceNonnegativeDiagonal_ (false)
00805   {
00806     setParameterList (params); // This also sets tsqrAdaptor_'s parameters.
00807 
00808 #ifdef BELOS_TEUCHOS_TIME_MONITOR
00809     timerOrtho_ = makeTimer (label, "All orthogonalization");
00810     timerProject_ = makeTimer (label, "Projection");
00811     timerNormalize_ = makeTimer (label, "Normalization");
00812 #endif // BELOS_TEUCHOS_TIME_MONITOR
00813   }
00814 
00815   template<class Scalar, class MV>
00816   TsqrOrthoManagerImpl<Scalar, MV>::
00817   TsqrOrthoManagerImpl (const std::string& label) :
00818     label_ (label),
00819     Q_ (Teuchos::null),               // Initialized on demand
00820     eps_ (SCTM::eps()),               // Machine precision
00821     randomizeNullSpace_ (true),
00822     reorthogonalizeBlocks_ (true),
00823     throwOnReorthogFault_ (false),
00824     blockReorthogThreshold_ (0),
00825     relativeRankTolerance_ (0), 
00826     forceNonnegativeDiagonal_ (false) 
00827   {
00828     setParameterList (Teuchos::null); // Set default parameters.
00829 
00830 #ifdef BELOS_TEUCHOS_TIME_MONITOR
00831     timerOrtho_ = makeTimer (label, "All orthogonalization");
00832     timerProject_ = makeTimer (label, "Projection");
00833     timerNormalize_ = makeTimer (label, "Normalization");
00834 #endif // BELOS_TEUCHOS_TIME_MONITOR
00835   }
00836 
00837   template<class Scalar, class MV>
00838   void
00839   TsqrOrthoManagerImpl<Scalar, MV>::
00840   norm (const MV& X, std::vector<magnitude_type>& normVec) const
00841   {
00842     const int numCols = MVT::GetNumberVecs (X);
00843     // std::vector<T>::size_type is unsigned; int is signed.  Mixed
00844     // unsigned/signed comparisons trigger compiler warnings.
00845     if (normVec.size() < static_cast<size_t>(numCols))
00846       normVec.resize (numCols); // Resize normvec if necessary.
00847     MVT::MvNorm (X, normVec);
00848   }
00849 
00850   template<class Scalar, class MV>
00851   void
00852   TsqrOrthoManagerImpl<Scalar, MV>::project (MV& X, 
00853                Teuchos::Array<mat_ptr> C,
00854                Teuchos::ArrayView<Teuchos::RCP<const MV> > Q)
00855   {
00856 #ifdef BELOS_TEUCHOS_TIME_MONITOR
00857     // "Projection" only happens in rawProject(), so we only time
00858     // projection inside rawProject().  However, we count the time
00859     // spend in project() as part of the whole orthogonalization.
00860     //
00861     // If project() is called from projectAndNormalize(), the
00862     // TimeMonitor won't start timerOrtho_, because it is already
00863     // running in projectAndNormalize().
00864     Teuchos::TimeMonitor timerMonitorOrtho(*timerOrtho_);
00865 #endif // BELOS_TEUCHOS_TIME_MONITOR
00866 
00867     int ncols_X, num_Q_blocks, ncols_Q_total;
00868     checkProjectionDims (ncols_X, num_Q_blocks, ncols_Q_total, X, Q);
00869     // Test for quick exit: any dimension of X is zero, or there are
00870     // zero Q blocks, or the total number of columns of the Q blocks
00871     // is zero.
00872     if (ncols_X == 0 || num_Q_blocks == 0 || ncols_Q_total == 0)
00873       return;
00874 
00875     // Make space for first-pass projection coefficients
00876     allocateProjectionCoefficients (C, Q, X, true);
00877 
00878     // We only use columnNormsBefore and compute pre-projection column
00879     // norms if doing block reorthogonalization.
00880     std::vector<magnitude_type> columnNormsBefore (ncols_X, magnitude_type(0));
00881     if (reorthogonalizeBlocks_)
00882       MVT::MvNorm (X, columnNormsBefore);
00883 
00884     // Project (first block orthogonalization step): 
00885     // C := Q^* X, X := X - Q C.
00886     rawProject (X, Q, C); 
00887 
00888     // If we are doing block reorthogonalization, reorthogonalize X if
00889     // necessary.
00890     if (reorthogonalizeBlocks_) {
00891       std::vector<magnitude_type> columnNormsAfter (ncols_X, magnitude_type(0));
00892       MVT::MvNorm (X, columnNormsAfter);
00893   
00894       // Relative block reorthogonalization threshold.
00895       const magnitude_type relThres = blockReorthogThreshold();
00896       // Reorthogonalize X if any of its column norms decreased by a
00897       // factor more than the block reorthogonalization threshold.
00898       // Don't bother trying to subset the columns; that will make the
00899       // columns noncontiguous and thus hinder BLAS 3 optimizations.
00900       bool reorthogonalize = false;
00901       for (int j = 0; j < ncols_X; ++j) {
00902   if (columnNormsAfter[j] < relThres * columnNormsBefore[j]) {
00903     reorthogonalize = true;
00904     break;
00905   }
00906       }
00907       if (reorthogonalize) {
00908   // Notify the caller via callback about the need for
00909   // reorthogonalization.
00910   if (! reorthogCallback_.is_null()) {
00911     reorthogCallback_->operator() (Teuchos::arrayViewFromVector (columnNormsBefore), 
00912            Teuchos::arrayViewFromVector (columnNormsAfter));
00913   }
00914   // Second-pass projection coefficients
00915   Teuchos::Array<mat_ptr> C2;
00916   allocateProjectionCoefficients (C2, Q, X, false);
00917 
00918   // Perform the second projection pass:
00919   // C2 = Q' X, X = X - Q*C2
00920   rawProject (X, Q, C2);
00921   // Update the projection coefficients
00922   for (int k = 0; k < num_Q_blocks; ++k)
00923     *C[k] += *C2[k];
00924       }
00925     }
00926   }
00927 
00928 
00929   template<class Scalar, class MV>
00930   int 
00931   TsqrOrthoManagerImpl<Scalar, MV>::normalize (MV& X, mat_ptr B)
00932   {
00933     using Teuchos::Range1D;
00934     using Teuchos::RCP;
00935 
00936 #ifdef BELOS_TEUCHOS_TIME_MONITOR
00937     Teuchos::TimeMonitor timerMonitorNormalize(*timerNormalize_);
00938     // If normalize() is called internally -- i.e., called from
00939     // projectAndNormalize() -- the timer will not be started or 
00940     // stopped, because it is already running.  TimeMonitor handles
00941     // recursive invocation by doing nothing.
00942     Teuchos::TimeMonitor timerMonitorOrtho(*timerOrtho_);
00943 #endif // BELOS_TEUCHOS_TIME_MONITOR
00944 
00945     // MVT returns int for this, even though the "local ordinal
00946     // type" of the MV may be some other type (for example,
00947     // Tpetra::MultiVector<double, int32_t, int64_t, ...>).
00948     const int numCols = MVT::GetNumberVecs (X);
00949 
00950     // This special case (for X having only one column) makes
00951     // TsqrOrthoManagerImpl equivalent to Modified Gram-Schmidt
00952     // orthogonalization with conditional full reorthogonalization,
00953     // if all multivector inputs have only one column.  It also
00954     // avoids allocating Q_ scratch space and copying data when we
00955     // don't need to invoke TSQR at all.
00956     if (numCols == 1) {
00957       return normalizeOne (X, B);
00958     }
00959 
00960     // We use Q_ as scratch space for the normalization, since TSQR
00961     // requires a scratch multivector (it can't factor in place).  Q_
00962     // should come from a vector space compatible with X's vector
00963     // space, and Q_ should have at least as many columns as X.
00964     // Otherwise, we have to reallocate.  We also have to allocate
00965     // (not "re-") Q_ if we haven't allocated it before.  (We can't
00966     // allocate Q_ until we have some X, so we need a multivector as
00967     // the "prototype.")
00968     //
00969     // NOTE (mfh 11 Jan 2011) We only increase the number of columsn
00970     // in Q_, never decrease.  This is OK for typical uses of TSQR,
00971     // but you might prefer different behavior in some cases.
00972     //
00973     // NOTE (mfh 10 Mar 2011) We should only reuse the scratch space
00974     // Q_ if X and Q_ have compatible data distributions.  However,
00975     // Belos' current MultiVecTraits interface does not let us check
00976     // for this.  Thus, we can only check whether X and Q_ have the
00977     // same number of rows.  This will behave correctly for the common
00978     // case in Belos that all multivectors with the same number of
00979     // rows have the same data distribution.
00980     //
00981     // The specific MV implementation may do more checks than this on
00982     // its own and throw an exception if X and Q_ are not compatible,
00983     // but it may not.  If you find that recycling the Q_ space causes
00984     // troubles, you may consider modifying the code below to
00985     // reallocate Q_ for every X that comes in.  
00986     if (Q_.is_null() || 
00987   MVT::GetVecLength(*Q_) != MVT::GetVecLength(X) ||
00988   numCols > MVT::GetNumberVecs (*Q_)) {
00989       Q_ = MVT::Clone (X, numCols);
00990     }
00991 
00992     // normalizeImpl() wants the second MV argument to have the same
00993     // number of columns as X.  To ensure this, we pass it a view of
00994     // Q_ if Q_ has more columns than X.  (This is possible if we
00995     // previously called normalize() with a different multivector,
00996     // since we never reallocate Q_ if it has more columns than
00997     // necessary.)
00998     if (MVT::GetNumberVecs(*Q_) == numCols) {
00999       return normalizeImpl (X, *Q_, B, false);
01000     } else {
01001       RCP<MV> Q_view = MVT::CloneViewNonConst (*Q_, Range1D(0, numCols-1));
01002       return normalizeImpl (X, *Q_view, B, false);
01003     }
01004   }
01005 
01006   template<class Scalar, class MV>
01007   void
01008   TsqrOrthoManagerImpl<Scalar, MV>::
01009   allocateProjectionCoefficients (Teuchos::Array<mat_ptr>& C, 
01010           Teuchos::ArrayView<Teuchos::RCP<const MV> > Q,
01011           const MV& X,
01012           const bool attemptToRecycle) const
01013   {
01014     const int num_Q_blocks = Q.size();
01015     const int ncols_X = MVT::GetNumberVecs (X);
01016     C.resize (num_Q_blocks);
01017     if (attemptToRecycle)
01018       {
01019   for (int i = 0; i < num_Q_blocks; ++i) 
01020     {
01021       const int ncols_Qi = MVT::GetNumberVecs (*Q[i]);
01022       // Create a new C[i] if necessary, otherwise resize if
01023       // necessary, otherwise fill with zeros.
01024       if (C[i].is_null())
01025         C[i] = rcp (new mat_type (ncols_Qi, ncols_X));
01026       else 
01027         {
01028     mat_type& Ci = *C[i];
01029     if (Ci.numRows() != ncols_Qi || Ci.numCols() != ncols_X)
01030       Ci.shape (ncols_Qi, ncols_X);
01031     else
01032       Ci.putScalar (SCT::zero());
01033         }
01034     }
01035       }
01036     else
01037       {
01038   for (int i = 0; i < num_Q_blocks; ++i) 
01039     {
01040       const int ncols_Qi = MVT::GetNumberVecs (*Q[i]);
01041       C[i] = rcp (new mat_type (ncols_Qi, ncols_X));
01042     }
01043       }
01044   }
01045 
01046   template<class Scalar, class MV>
01047   int 
01048   TsqrOrthoManagerImpl<Scalar, MV>::
01049   normalizeOutOfPlace (MV& X, MV& Q, mat_ptr B)
01050   {
01051 #ifdef BELOS_TEUCHOS_TIME_MONITOR
01052     Teuchos::TimeMonitor timerMonitorOrtho(*timerOrtho_);
01053     Teuchos::TimeMonitor timerMonitorNormalize(*timerNormalize_);
01054 #endif // BELOS_TEUCHOS_TIME_MONITOR
01055 
01056     const int numVecs = MVT::GetNumberVecs(X);
01057     if (numVecs == 0) {
01058       return 0; // Nothing to do.
01059     } else if (numVecs == 1) {
01060       // Special case for a single column; scale and copy over.
01061       using Teuchos::Range1D;
01062       using Teuchos::RCP;
01063       using Teuchos::rcp;
01064 
01065       // Normalize X in place (faster than TSQR for one column).
01066       const int rank = normalizeOne (X, B);
01067       // Copy results to first column of Q.
01068       RCP<MV> Q_0 = MVT::CloneViewNonConst (Q, Range1D(0,0));
01069       MVT::Assign (X, *Q_0);
01070       return rank;
01071     } else {
01072       // The "true" argument to normalizeImpl() means the output
01073       // vectors go into Q, and the contents of X are overwritten with
01074       // invalid values.
01075       return normalizeImpl (X, Q, B, true);
01076     }
01077   }
01078 
01079   template<class Scalar, class MV>
01080   int 
01081   TsqrOrthoManagerImpl<Scalar, MV>::
01082   projectAndNormalizeImpl (MV& X_in, 
01083          MV& X_out, // Only written if outOfPlace==false.
01084          const bool outOfPlace,
01085          Teuchos::Array<mat_ptr> C,
01086          mat_ptr B,
01087          Teuchos::ArrayView<Teuchos::RCP<const MV> > Q)
01088   {
01089     using Teuchos::Range1D;
01090     using Teuchos::RCP;
01091     using Teuchos::rcp;
01092 
01093 #ifdef BELOS_TEUCHOS_TIME_MONITOR
01094     // Projection is only timed in rawProject(), and normalization is
01095     // only timed in normalize() and normalizeOutOfPlace().
01096     Teuchos::TimeMonitor timerMonitorOrtho(*timerOrtho_);
01097 #endif // BELOS_TEUCHOS_TIME_MONITOR
01098 
01099     if (outOfPlace) {
01100       // Make sure that X_out has at least as many columns as X_in.
01101       TEUCHOS_TEST_FOR_EXCEPTION(MVT::GetNumberVecs(X_out) < MVT::GetNumberVecs(X_in),
01102        std::invalid_argument, 
01103        "Belos::TsqrOrthoManagerImpl::"
01104        "projectAndNormalizeImpl(..., outOfPlace=true, ...):"
01105        "X_out has " << MVT::GetNumberVecs(X_out) 
01106        << " columns, but X_in has "
01107        << MVT::GetNumberVecs(X_in) << " columns.");
01108     }
01109     // Fetch dimensions of X_in and Q, and allocate space for first-
01110     // and second-pass projection coefficients (C resp. C2).
01111     int ncols_X, num_Q_blocks, ncols_Q_total;
01112     checkProjectionDims (ncols_X, num_Q_blocks, ncols_Q_total, X_in, Q);
01113 
01114     // Test for quick exit: if any dimension of X is zero.
01115     if (ncols_X == 0) {
01116       return 0;
01117     }
01118     // If there are zero Q blocks or zero Q columns, just normalize!
01119     if (num_Q_blocks == 0 || ncols_Q_total == 0) {
01120       if (outOfPlace) {
01121   return normalizeOutOfPlace (X_in, X_out, B);
01122       } else {
01123   return normalize (X_in, B);
01124       }
01125     }
01126 
01127     // The typical case is that the entries of C have been allocated
01128     // before, so we attempt to recycle the allocations.  The call
01129     // below will reallocate if it cannot recycle.
01130     allocateProjectionCoefficients (C, Q, X_in, true);
01131 
01132     // If we are doing block reorthogonalization, then compute the
01133     // column norms of X before projecting for the first time.  This
01134     // will help us decide whether we need to reorthogonalize X.
01135     std::vector<magnitude_type> normsBeforeFirstPass (ncols_X, SCTM::zero());
01136     if (reorthogonalizeBlocks_) {
01137       MVT::MvNorm (X_in, normsBeforeFirstPass);
01138     }
01139 
01140     // First (Modified) Block Gram-Schmidt pass, in place in X_in.
01141     rawProject (X_in, Q, C);
01142 
01143     // Make space for the normalization coefficients.  This will
01144     // either be a freshly allocated matrix (if B is null), or a view
01145     // of the appropriately sized upper left submatrix of *B (if B is
01146     // not null).
01147     //
01148     // Note that if we let the normalize() routine allocate (in the
01149     // case that B is null), that storage will go away at the end of
01150     // normalize().  (This is because it passes the RCP by value, not
01151     // by reference.)
01152     mat_ptr B_out;
01153     if (B.is_null()) {
01154       B_out = rcp (new mat_type (ncols_X, ncols_X));
01155     } else {
01156       // Make sure that B is no smaller than numCols x numCols.
01157       TEUCHOS_TEST_FOR_EXCEPTION(B->numRows() < ncols_X || B->numCols() < ncols_X,
01158        std::invalid_argument,
01159        "normalizeOne: Input matrix B must be at "
01160        "least " << ncols_X << " x " << ncols_X 
01161        << ", but is instead " << B->numRows()
01162        << " x " << B->numCols() << ".");
01163       // Create a view of the ncols_X by ncols_X upper left
01164       // submatrix of *B.  TSQR will write the normalization
01165       // coefficients there.
01166       B_out = rcp (new mat_type (Teuchos::View, *B, ncols_X, ncols_X));
01167     }
01168 
01169     // Rank of X(_in) after first projection pass.  If outOfPlace,
01170     // this overwrites X_in with invalid values, and the results go in
01171     // X_out.  Otherwise, it's done in place in X_in.
01172     const int firstPassRank = outOfPlace ? 
01173       normalizeOutOfPlace (X_in, X_out, B_out) : 
01174       normalize (X_in, B_out);
01175     if (B.is_null()) {
01176       // The input matrix B is null, so assign B_out to it.  If B was
01177       // not null on input, then B_out is a view of *B, so we don't
01178       // have to do anything here.  Note that SerialDenseMatrix uses
01179       // raw pointers to store data and represent views, so we have to
01180       // be careful about scope.
01181       B = B_out;
01182     }
01183     int rank = firstPassRank; // Current rank of X.
01184 
01185     // If X was not full rank after projection and randomizeNullSpace_
01186     // is true, then normalize(OutOfPlace)() replaced the null space
01187     // basis of X with random vectors, and orthogonalized them against
01188     // the column space basis of X.  However, we still need to
01189     // orthogonalize the random vectors against the Q[i], after which
01190     // we will need to renormalize them.
01191     //
01192     // If outOfPlace, then we need to work in X_out (where
01193     // normalizeOutOfPlace() wrote the normalized vectors).
01194     // Otherwise, we need to work in X_in.
01195     //
01196     // Note: We don't need to keep the new projection coefficients,
01197     // since they are multiplied by the "small" part of B
01198     // corresponding to the null space of the original X.
01199     if (firstPassRank < ncols_X && randomizeNullSpace_) {
01200       const int numNullSpaceCols = ncols_X - firstPassRank;
01201       const Range1D nullSpaceIndices (firstPassRank, ncols_X - 1);
01202 
01203       // Space for projection coefficients (will be thrown away)
01204       Teuchos::Array<mat_ptr> C_null (num_Q_blocks);
01205       for (int k = 0; k < num_Q_blocks; ++k) {
01206   const int numColsQk = MVT::GetNumberVecs(*Q[k]);
01207   C_null[k] = rcp (new mat_type (numColsQk, numNullSpaceCols));
01208       }
01209       // Space for normalization coefficients (will be thrown away).
01210       RCP<mat_type> B_null (new mat_type (numNullSpaceCols, numNullSpaceCols));
01211 
01212       int randomVectorsRank;
01213       if (outOfPlace) {
01214   // View of the null space basis columns of X.
01215   // normalizeOutOfPlace() wrote them into X_out.
01216   RCP<MV> X_out_null = MVT::CloneViewNonConst (X_out, nullSpaceIndices);
01217   // Use X_in for scratch space.  Copy X_out_null into the
01218   // last few columns of X_in (X_in_null) and do projections
01219   // in there.  (This saves us a copy wen we renormalize
01220   // (out of place) back into X_out.)
01221   RCP<MV> X_in_null = MVT::CloneViewNonConst (X_in, nullSpaceIndices);
01222   MVT::Assign (*X_out_null, *X_in_null);
01223   // Project the new random vectors against the Q blocks, and
01224   // renormalize the result into X_out_null.  
01225   rawProject (*X_in_null, Q, C_null);
01226   randomVectorsRank = normalizeOutOfPlace (*X_in_null, *X_out_null, B_null);
01227       } else {
01228   // View of the null space columns of X.  
01229   // They live in X_in.
01230   RCP<MV> X_null = MVT::CloneViewNonConst (X_in, nullSpaceIndices);
01231   // Project the new random vectors against the Q blocks,
01232   // and renormalize the result (in place).
01233   rawProject (*X_null, Q, C_null);
01234   randomVectorsRank = normalize (*X_null, B_null);
01235       }
01236       // While unusual, it is still possible for the random data not
01237       // to be full rank after projection and normalization.  In that
01238       // case, we could try another set of random data and recurse as
01239       // necessary, but instead for now we just raise an exception.
01240       TEUCHOS_TEST_FOR_EXCEPTION(randomVectorsRank != numNullSpaceCols, 
01241        TsqrOrthoError, 
01242        "Belos::TsqrOrthoManagerImpl::projectAndNormalize"
01243        "OutOfPlace(): After projecting and normalizing the "
01244        "random vectors (used to replace the null space "
01245        "basis vectors from normalizing X), they have rank "
01246        << randomVectorsRank << ", but should have full "
01247        "rank " << numNullSpaceCols << ".");
01248     }
01249 
01250     // Whether or not X_in was full rank after projection, we still
01251     // might want to reorthogonalize against Q.
01252     if (reorthogonalizeBlocks_) {
01253       // We are only interested in the column space basis of X
01254       // resp. X_out.
01255       std::vector<magnitude_type> 
01256   normsAfterFirstPass (firstPassRank, SCTM::zero());
01257       std::vector<magnitude_type> 
01258   normsAfterSecondPass (firstPassRank, SCTM::zero());
01259 
01260       // Compute post-first-pass (pre-normalization) norms.  We could
01261       // have done that using MVT::MvNorm() on X_in after projecting,
01262       // but before the first normalization.  However, that operation
01263       // may be expensive.  It is also unnecessary: after calling
01264       // normalize(), the 2-norm of B(:,j) is the 2-norm of X_in(:,j)
01265       // before normalization, in exact arithmetic.
01266       //
01267       // NOTE (mfh 06 Nov 2010) This is one way that combining
01268       // projection and normalization into a single kernel --
01269       // projectAndNormalize() -- pays off.  In project(), we have to
01270       // compute column norms of X before and after projection.  Here,
01271       // we get them for free from the normalization coefficients.
01272       Teuchos::BLAS<int, Scalar> blas;
01273       for (int j = 0; j < firstPassRank; ++j) {
01274   const Scalar* const B_j = &(*B_out)(0,j);
01275   // Teuchos::BLAS::NRM2 returns a magnitude_type result on
01276   // Scalar inputs.
01277   normsAfterFirstPass[j] = blas.NRM2 (firstPassRank, B_j, 1);
01278       }
01279       // Test whether any of the norms dropped below the
01280       // reorthogonalization threshold.
01281       bool reorthogonalize = false;
01282       for (int j = 0; j < firstPassRank; ++j) {
01283   // If any column's norm decreased too much, mark this block
01284   // for reorthogonalization.  Note that this test will _not_
01285   // activate reorthogonalization if a column's norm before the
01286   // first project-and-normalize step was zero.  It _will_
01287   // activate reorthogonalization if the column's norm before
01288   // was not zero, but is zero now.
01289   const magnitude_type curThreshold = 
01290     blockReorthogThreshold() * normsBeforeFirstPass[j];
01291   if (normsAfterFirstPass[j] < curThreshold) {
01292     reorthogonalize = true; 
01293     break;
01294   }
01295       }
01296 
01297       // Notify the caller via callback about the need for
01298       // reorthogonalization.
01299       if (! reorthogCallback_.is_null()) {
01300   using Teuchos::arrayViewFromVector;
01301   (*reorthogCallback_) (arrayViewFromVector (normsBeforeFirstPass),
01302             arrayViewFromVector (normsAfterFirstPass));
01303       }
01304 
01305       // Perform another Block Gram-Schmidt pass if necessary.  "Twice
01306       // is enough" (Kahan's theorem) for a Krylov method, unless
01307       // (using Stewart's term) there is an "orthogonalization fault"
01308       // (indicated by reorthogFault).
01309       //
01310       // NOTE (mfh 07 Nov 2010) For now, we include the entire block
01311       // of X, including possible random data (that was already
01312       // projected and normalized above).  It might make more sense
01313       // just to process the first firstPassRank columns of X.
01314       // However, the resulting reorthogonalization should still be
01315       // correct regardless.
01316       bool reorthogFault = false;
01317       // Indices of X at which there was an orthogonalization fault.
01318       std::vector<int> faultIndices;
01319       if (reorthogonalize) {
01320   using Teuchos::Copy;
01321   using Teuchos::NO_TRANS;
01322 
01323   // If we're using out-of-place normalization, copy X_out
01324   // (results of first project and normalize pass) back into
01325   // X_in, for the second project and normalize pass.
01326   if (outOfPlace) {
01327     MVT::Assign (X_out, X_in);
01328   }
01329 
01330   // C2 is only used internally, so we know that we are
01331   // allocating fresh and not recycling allocations.  Stating
01332   // this lets us save time checking dimensions.
01333   Teuchos::Array<mat_ptr> C2;
01334   allocateProjectionCoefficients (C2, Q, X_in, false);
01335 
01336   // Block Gram-Schmidt (again).  Delay updating the block
01337   // coefficients until we have the new normalization
01338   // coefficients, which we need in order to do the update.
01339   rawProject (X_in, Q, C2);
01340       
01341   // Coefficients for (re)normalization of X_in.
01342   RCP<mat_type> B2 (new mat_type (ncols_X, ncols_X));
01343 
01344   // Normalize X_in (into X_out, if working out of place).
01345   const int secondPassRank = outOfPlace ? 
01346     normalizeOutOfPlace (X_in, X_out, B2) : 
01347     normalize (X_in, B2);
01348   rank = secondPassRank; // Current rank of X
01349 
01350   // Update normalization coefficients.  We begin with copying
01351   // B_out, since the BLAS' _GEMM routine doesn't let us alias
01352   // its input and output arguments.
01353   mat_type B_copy (Copy, *B_out, B_out->numRows(), B_out->numCols());
01354   // B_out := B2 * B_out (where input B_out is in B_copy).
01355   const int err = B_out->multiply (NO_TRANS, NO_TRANS, SCT::one(), 
01356            *B2, B_copy, SCT::zero());
01357   TEUCHOS_TEST_FOR_EXCEPTION(err != 0, std::logic_error, 
01358          "Teuchos::SerialDenseMatrix::multiply "
01359          "returned err = " << err << " != 0");
01360   // Update the block coefficients from the projection step.  We
01361   // use B_copy for this (a copy of B_out, the first-pass
01362   // normalization coefficients).
01363   for (int k = 0; k < num_Q_blocks; ++k) {
01364     mat_type C_k_copy (Copy, *C[k], C[k]->numRows(), C[k]->numCols());
01365 
01366     // C[k] := C2[k]*B_copy + C[k].
01367     const int err = C[k]->multiply (NO_TRANS, NO_TRANS, SCT::one(), 
01368             *C2[k], B_copy, SCT::one());
01369     TEUCHOS_TEST_FOR_EXCEPTION(err != 0, std::logic_error, 
01370            "Teuchos::SerialDenseMatrix::multiply "
01371            "returned err = " << err << " != 0");
01372   }
01373   // Compute post-second-pass (pre-normalization) norms, using
01374   // B2 (the coefficients from the second normalization step) in
01375   // the same way as with B_out before.
01376   for (int j = 0; j < rank; ++j) {
01377     const Scalar* const B2_j = &(*B2)(0,j);
01378     normsAfterSecondPass[j] = blas.NRM2 (rank, B2_j, 1);
01379   }
01380   // Test whether any of the norms dropped below the
01381   // reorthogonalization threshold.  If so, it's an
01382   // orthogonalization fault, which requires expensive recovery.
01383   reorthogFault = false;
01384   for (int j = 0; j < rank; ++j) {
01385     const magnitude_type relativeLowerBound = 
01386       blockReorthogThreshold() * normsAfterFirstPass[j];
01387     if (normsAfterSecondPass[j] < relativeLowerBound) {
01388       reorthogFault = true; 
01389       faultIndices.push_back (j);
01390     }
01391   }
01392       } // if (reorthogonalize) // reorthogonalization pass
01393 
01394       if (reorthogFault) {
01395   if (throwOnReorthogFault_) {
01396     raiseReorthogFault (normsAfterFirstPass, 
01397             normsAfterSecondPass, 
01398             faultIndices);
01399   } else {
01400     // NOTE (mfh 19 Jan 2011) We could handle the fault here by
01401     // slowly reorthogonalizing, one vector at a time, the
01402     // offending vectors of X.  However, we choose not to
01403     // implement this for now.  If it becomes a problem, let us
01404     // know and we will prioritize implementing this.
01405     TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, 
01406            "TsqrOrthoManagerImpl has not yet implemented"
01407            " recovery from an orthogonalization fault.");
01408   }
01409       }
01410     } // if (reorthogonalizeBlocks_)
01411     return rank;
01412   }
01413 
01414 
01415   template<class Scalar, class MV>
01416   void
01417   TsqrOrthoManagerImpl<Scalar, MV>::
01418   raiseReorthogFault (const std::vector<magnitude_type>& normsAfterFirstPass,
01419           const std::vector<magnitude_type>& normsAfterSecondPass,
01420           const std::vector<int>& faultIndices)
01421   {
01422     using std::endl;
01423     typedef std::vector<int>::size_type size_type;
01424     std::ostringstream os;
01425       
01426     os << "Orthogonalization fault at the following column(s) of X:" << endl;
01427     os << "Column\tNorm decrease factor" << endl;
01428     for (size_type k = 0; k < faultIndices.size(); ++k) {
01429       const int index = faultIndices[k];
01430       const magnitude_type decreaseFactor = 
01431   normsAfterSecondPass[index] / normsAfterFirstPass[index];
01432       os << index << "\t" << decreaseFactor << endl;
01433     }
01434     throw TsqrOrthoFault (os.str());
01435   }
01436 
01437   template<class Scalar, class MV>
01438   Teuchos::RCP<const Teuchos::ParameterList>
01439   TsqrOrthoManagerImpl<Scalar, MV>::getValidParameters () const
01440   {
01441     using Teuchos::ParameterList;
01442     using Teuchos::parameterList;
01443     using Teuchos::RCP;
01444     typedef Teuchos::ScalarTraits<magnitude_type> SCTM;
01445 
01446     if (defaultParams_.is_null()) {
01447       RCP<ParameterList> params = parameterList ("TsqrOrthoManagerImpl");
01448       //
01449       // TSQR parameters (set as a sublist).
01450       //
01451       params->set ("TSQR implementation", *(tsqrAdaptor_.getValidParameters()),
01452        "TSQR implementation parameters.");
01453       // 
01454       // Orthogonalization parameters
01455       //
01456       const bool defaultRandomizeNullSpace = true;
01457       params->set ("randomizeNullSpace", defaultRandomizeNullSpace, 
01458        "Whether to fill in null space vectors with random data.");
01459 
01460       const bool defaultReorthogonalizeBlocks = true;
01461       params->set ("reorthogonalizeBlocks", defaultReorthogonalizeBlocks,
01462        "Whether to do block reorthogonalization as necessary.");
01463 
01464       // This parameter corresponds to the "blk_tol_" parameter in
01465       // Belos' DGKSOrthoManager.  We choose the same default value.
01466       const magnitude_type defaultBlockReorthogThreshold = 
01467   magnitude_type(10) * SCTM::squareroot (SCTM::eps());
01468       params->set ("blockReorthogThreshold", defaultBlockReorthogThreshold, 
01469        "If reorthogonalizeBlocks==true, and if the norm of "
01470        "any column within a block decreases by this much or "
01471        "more after orthogonalization, we reorthogonalize.");
01472 
01473       // This parameter corresponds to the "sing_tol_" parameter in
01474       // Belos' DGKSOrthoManager.  We choose the same default value.
01475       const magnitude_type defaultRelativeRankTolerance = 
01476   Teuchos::as<magnitude_type>(10) * SCTM::eps();
01477 
01478       // If the relative rank tolerance is zero, then we will always
01479       // declare blocks to be numerically full rank, as long as no
01480       // singular values are zero.
01481       params->set ("relativeRankTolerance", defaultRelativeRankTolerance,
01482        "Relative tolerance to determine the numerical rank of a "
01483        "block when normalizing.");
01484 
01485       // See Stewart's 2008 paper on block Gram-Schmidt for a definition
01486       // of "orthogonalization fault."
01487       const bool defaultThrowOnReorthogFault = true;
01488       params->set ("throwOnReorthogFault", defaultThrowOnReorthogFault,
01489        "Whether to throw an exception if an orthogonalization "
01490        "fault occurs.  This only matters if reorthogonalization "
01491        "is enabled (reorthogonalizeBlocks==true).");
01492 
01493       const bool defaultForceNonnegativeDiagonal = false;
01494       params->set ("forceNonnegativeDiagonal", defaultForceNonnegativeDiagonal,
01495        "Whether to force the R factor produced by the normalization "
01496        "step to have a nonnegative diagonal.");
01497 
01498       defaultParams_ = params;
01499     }
01500     return defaultParams_;
01501   }
01502 
01503   template<class Scalar, class MV>
01504   Teuchos::RCP<const Teuchos::ParameterList>
01505   TsqrOrthoManagerImpl<Scalar, MV>::getFastParameters ()
01506   {
01507     using Teuchos::ParameterList;
01508     using Teuchos::RCP;
01509     using Teuchos::rcp;
01510 
01511     RCP<const ParameterList> defaultParams = getValidParameters();
01512     // Start with a clone of the default parameters.
01513     RCP<ParameterList> params = rcp (new ParameterList (*defaultParams));
01514   
01515     // Disable reorthogonalization and randomization of the null
01516     // space basis.  Reorthogonalization tolerances don't matter,
01517     // since we aren't reorthogonalizing blocks in the fast
01518     // settings.  We can leave the default values.  Also,
01519     // (re)orthogonalization faults may only occur with
01520     // reorthogonalization, so we don't have to worry about the
01521     // "throwOnReorthogFault" setting.
01522     const bool randomizeNullSpace = false;
01523     params->set ("randomizeNullSpace", randomizeNullSpace);      
01524     const bool reorthogonalizeBlocks = false;
01525     params->set ("reorthogonalizeBlocks", reorthogonalizeBlocks);
01526 
01527     return params;
01528   }
01529 
01530   template<class Scalar, class MV>
01531   int
01532   TsqrOrthoManagerImpl<Scalar, MV>::
01533   rawNormalize (MV& X, 
01534     MV& Q, 
01535     Teuchos::SerialDenseMatrix<int, Scalar>& B)
01536   {
01537     int rank;
01538     try {
01539       // This call only computes the QR factorization X = Q B.
01540       // It doesn't compute the rank of X.  That comes from
01541       // revealRank() below.
01542       tsqrAdaptor_.factorExplicit (X, Q, B, forceNonnegativeDiagonal_);
01543       // This call will only modify *B if *B on input is not of full
01544       // numerical rank.
01545       rank = tsqrAdaptor_.revealRank (Q, B, relativeRankTolerance_);
01546     } catch (std::exception& e) {
01547       throw TsqrOrthoError (e.what()); // Toss the exception up the chain.
01548     }
01549     return rank;
01550   }
01551 
01552   template<class Scalar, class MV>
01553   int
01554   TsqrOrthoManagerImpl<Scalar, MV>::
01555   normalizeOne (MV& X, 
01556     Teuchos::RCP<Teuchos::SerialDenseMatrix<int, Scalar> > B) const
01557   {
01558     // Make space for the normalization coefficient.  This will either
01559     // be a freshly allocated matrix (if B is null), or a view of the
01560     // 1x1 upper left submatrix of *B (if B is not null).
01561     mat_ptr B_out;
01562     if (B.is_null()) {
01563       B_out = rcp (new mat_type (1, 1));
01564     } else {
01565       const int numRows = B->numRows();
01566       const int numCols = B->numCols();
01567       TEUCHOS_TEST_FOR_EXCEPTION(numRows < 1 || numCols < 1, 
01568        std::invalid_argument,
01569        "normalizeOne: Input matrix B must be at "
01570        "least 1 x 1, but is instead " << numRows
01571        << " x " << numCols << ".");
01572       // Create a view of the 1x1 upper left submatrix of *B.
01573       B_out = rcp (new mat_type (Teuchos::View, *B, 1, 1));
01574     }
01575 
01576     // Compute the norm of X, and write the result to B_out.
01577     std::vector<magnitude_type> theNorm (1, SCTM::zero());
01578     MVT::MvNorm (X, theNorm);
01579     (*B_out)(0,0) = theNorm[0];
01580 
01581     if (B.is_null()) {
01582       // The input matrix B is null, so assign B_out to it.  If B was
01583       // not null on input, then B_out is a view of *B, so we don't
01584       // have to do anything here.  Note that SerialDenseMatrix uses
01585       // raw pointers to store data and represent views, so we have to
01586       // be careful about scope.
01587       B = B_out;
01588     }
01589 
01590     // Scale X by its norm, if its norm is zero.  Otherwise, do the
01591     // right thing based on whether the user wants us to fill the null
01592     // space with random vectors.
01593     if (theNorm[0] == SCTM::zero()) {
01594       // Make a view of the first column of Q, fill it with random
01595       // data, and normalize it.  Throw away the resulting norm.
01596       if (randomizeNullSpace_) {
01597   MVT::MvRandom(X);
01598   MVT::MvNorm (X, theNorm);
01599   if (theNorm[0] == SCTM::zero()) {
01600     // It is possible that a random vector could have all zero
01601     // entries, but unlikely.  We could try again, but it's also
01602     // possible that multiple tries could result in zero
01603     // vectors.  We choose instead to give up.
01604     throw TsqrOrthoError("normalizeOne: a supposedly random "
01605              "vector has norm zero!");
01606   } else {
01607     // NOTE (mfh 09 Nov 2010) I'm assuming that dividing a
01608     // Scalar by a magnitude_type is defined and that it results
01609     // in a Scalar.
01610     const Scalar alpha = SCT::one() / theNorm[0];
01611     MVT::MvScale (X, alpha);
01612   }
01613       }
01614       return 0; // The rank of the matrix (actually just one vector) X.
01615     } else {
01616       // NOTE (mfh 09 Nov 2010) I'm assuming that dividing a Scalar by
01617       // a magnitude_type is defined and that it results in a Scalar.
01618       const Scalar alpha = SCT::one() / theNorm[0];
01619       MVT::MvScale (X, alpha);
01620       return 1; // The rank of the matrix (actually just one vector) X.
01621     }
01622   }
01623 
01624 
01625   template<class Scalar, class MV>
01626   void
01627   TsqrOrthoManagerImpl<Scalar, MV>::
01628   rawProject (MV& X, 
01629         Teuchos::ArrayView<Teuchos::RCP<const MV> > Q,
01630         Teuchos::ArrayView<Teuchos::RCP<Teuchos::SerialDenseMatrix<int, Scalar> > > C) const
01631   {
01632 #ifdef BELOS_TEUCHOS_TIME_MONITOR
01633     Teuchos::TimeMonitor timerMonitorNormalize(*timerProject_);
01634 #endif // BELOS_TEUCHOS_TIME_MONITOR
01635 
01636     // "Modified Gram-Schmidt" version of Block Gram-Schmidt.
01637     const int num_Q_blocks = Q.size();
01638     for (int i = 0; i < num_Q_blocks; ++i)
01639       {
01640   // TEUCHOS_TEST_FOR_EXCEPTION(C[i].is_null(), std::logic_error,
01641   //       "TsqrOrthoManagerImpl::rawProject(): C[" 
01642   //       << i << "] is null");
01643   // TEUCHOS_TEST_FOR_EXCEPTION(Q[i].is_null(), std::logic_error,
01644   //       "TsqrOrthoManagerImpl::rawProject(): Q[" 
01645   //       << i << "] is null");
01646   mat_type& Ci = *C[i];
01647   const MV& Qi = *Q[i];
01648   innerProd (Qi, X, Ci);
01649   MVT::MvTimesMatAddMv (-SCT::one(), Qi, Ci, SCT::one(), X);
01650       }
01651   }
01652 
01653 
01654   template<class Scalar, class MV>
01655   void
01656   TsqrOrthoManagerImpl<Scalar, MV>::
01657   rawProject (MV& X, 
01658         const Teuchos::RCP<const MV>& Q, 
01659         const Teuchos::RCP<Teuchos::SerialDenseMatrix<int, Scalar> >& C) const
01660   {
01661 #ifdef BELOS_TEUCHOS_TIME_MONITOR
01662     Teuchos::TimeMonitor timerMonitorNormalize(*timerProject_);
01663 #endif // BELOS_TEUCHOS_TIME_MONITOR
01664 
01665     // Block Gram-Schmidt
01666     innerProd (*Q, X, *C);
01667     MVT::MvTimesMatAddMv (-SCT::one(), *Q, *C, SCT::one(), X);
01668   }
01669 
01670   template<class Scalar, class MV>
01671   int
01672   TsqrOrthoManagerImpl<Scalar, MV>::
01673   normalizeImpl (MV& X, 
01674      MV& Q, 
01675      Teuchos::RCP<Teuchos::SerialDenseMatrix<int, Scalar> > B, 
01676      const bool outOfPlace)
01677   {
01678     using Teuchos::Range1D;
01679     using Teuchos::RCP;
01680     using Teuchos::rcp;
01681     using Teuchos::ScalarTraits;
01682     using Teuchos::tuple;
01683     using std::cerr;
01684     using std::endl;
01685     // Don't set this to true unless you want lots of debugging
01686     // messages written to stderr on every MPI process.
01687     const bool extraDebug = false;
01688 
01689     const int numCols = MVT::GetNumberVecs (X);
01690     if (numCols == 0) {
01691       return 0; // Fast exit for an empty input matrix.
01692     }
01693 
01694     // We allow Q to have more columns than X.  In that case, we only
01695     // touch the first numCols columns of Q.
01696     TEUCHOS_TEST_FOR_EXCEPTION(MVT::GetNumberVecs(Q) < numCols, 
01697            std::invalid_argument, 
01698            "TsqrOrthoManagerImpl::normalizeImpl(X,Q,B): Q has "
01699            << MVT::GetNumberVecs(Q) << " columns.  This is too "
01700            "few, since X has " << numCols << " columns.");
01701     // TSQR wants a Q with the same number of columns as X, so have it
01702     // work on a nonconstant view of Q with the same number of columns
01703     // as X.
01704     RCP<MV> Q_view = MVT::CloneViewNonConst (Q, Range1D(0, numCols-1));
01705 
01706     // Make space for the normalization coefficients.  This will
01707     // either be a freshly allocated matrix (if B is null), or a view
01708     // of the appropriately sized upper left submatrix of *B (if B is
01709     // not null).
01710     mat_ptr B_out;
01711     if (B.is_null()) {
01712       B_out = rcp (new mat_type (numCols, numCols));
01713     } else {
01714       // Make sure that B is no smaller than numCols x numCols.
01715       TEUCHOS_TEST_FOR_EXCEPTION(B->numRows() < numCols || B->numCols() < numCols,
01716        std::invalid_argument,
01717        "normalizeOne: Input matrix B must be at "
01718        "least " << numCols << " x " << numCols 
01719        << ", but is instead " << B->numRows()
01720        << " x " << B->numCols() << ".");
01721       // Create a view of the numCols x numCols upper left submatrix
01722       // of *B.  TSQR will write the normalization coefficients there.
01723       B_out = rcp (new mat_type (Teuchos::View, *B, numCols, numCols));
01724     }
01725 
01726     if (extraDebug) {
01727       std::vector<magnitude_type> norms (numCols);
01728       MVT::MvNorm (X, norms);
01729       cerr << "Column norms of X before orthogonalization: ";
01730       typedef typename std::vector<magnitude_type>::const_iterator iter_type;
01731       for (iter_type iter = norms.begin(); iter != norms.end(); ++iter) {
01732   cerr << *iter;
01733   if (iter+1 != norms.end())
01734     cerr << ", ";
01735       }
01736     }
01737 
01738     // Compute rank-revealing decomposition (in this case, TSQR of X
01739     // followed by SVD of the R factor and appropriate updating of the
01740     // resulting Q factor) of X.  X is modified in place and filled
01741     // with garbage, and Q_view contains the resulting explicit Q
01742     // factor.  Later, we will copy this back into X.
01743     //
01744     // The matrix *B_out will only be upper triangular if X is of full
01745     // numerical rank.  Otherwise, the entries below the diagonal may
01746     // be filled in as well.
01747     const int rank = rawNormalize (X, *Q_view, *B_out);
01748     if (B.is_null()) {
01749       // The input matrix B is null, so assign B_out to it.  If B was
01750       // not null on input, then B_out is a view of *B, so we don't
01751       // have to do anything here.  Note that SerialDenseMatrix uses
01752       // raw pointers to store data and represent views, so we have to
01753       // be careful about scope.
01754       B = B_out;
01755     }
01756 
01757     if (extraDebug) {
01758       std::vector<magnitude_type> norms (numCols);
01759       MVT::MvNorm (*Q_view, norms);
01760       cerr << "Column norms of Q_view after orthogonalization: ";
01761       for (typename std::vector<magnitude_type>::const_iterator iter = norms.begin(); 
01762      iter != norms.end(); ++iter) {
01763   cerr << *iter;
01764   if (iter+1 != norms.end())
01765     cerr << ", ";
01766       }
01767     }
01768     TEUCHOS_TEST_FOR_EXCEPTION(rank < 0 || rank > numCols, std::logic_error,
01769            "Belos::TsqrOrthoManagerImpl::normalizeImpl: "
01770            "rawNormalize() returned rank = " << rank << " for a "
01771            "matrix X with " << numCols << " columns.  Please report"
01772            " this bug to the Belos developers.");
01773     if (extraDebug && rank == 0) {
01774       // Sanity check: ensure that the columns of X are sufficiently
01775       // small for X to be reported as rank zero.
01776       const mat_type& B_ref = *B;
01777       std::vector<magnitude_type> norms (B_ref.numCols());
01778       for (typename mat_type::ordinalType j = 0; j < B_ref.numCols(); ++j) {
01779   typedef typename mat_type::scalarType mat_scalar_type;
01780   mat_scalar_type sumOfSquares = ScalarTraits<mat_scalar_type>::zero();
01781   for (typename mat_type::ordinalType i = 0; i <= j; ++i) {
01782     const mat_scalar_type B_ij = 
01783       ScalarTraits<mat_scalar_type>::magnitude (B_ref(i,j));
01784     sumOfSquares += B_ij*B_ij;
01785   }
01786   norms[j] = ScalarTraits<mat_scalar_type>::squareroot (sumOfSquares);
01787       }
01788       bool anyNonzero = false;
01789       typedef typename std::vector<magnitude_type>::const_iterator iter_type;
01790       for (iter_type it = norms.begin(); it != norms.end(); ++it) {
01791   if (*it > relativeRankTolerance_) {
01792     anyNonzero = true;
01793   }
01794       }
01795       using std::cerr;
01796       using std::endl;
01797       cerr << "Norms of columns of B after orthogonalization: ";
01798       for (typename mat_type::ordinalType j = 0; j < B_ref.numCols(); ++j) {
01799   cerr << norms[j];
01800   if (j != B_ref.numCols() - 1)
01801     cerr << ", ";
01802       }
01803       cerr << endl;
01804     }
01805 
01806     // If X is full rank or we don't want to replace its null space
01807     // basis with random vectors, then we're done.
01808     if (rank == numCols || ! randomizeNullSpace_) {
01809       // If we're supposed to be working in place in X, copy the
01810       // results back from Q_view into X.
01811       if (! outOfPlace) {
01812   MVT::Assign (*Q_view, X);
01813       }
01814       return rank;
01815     }
01816 
01817     if (randomizeNullSpace_ && rank < numCols) {
01818       // X wasn't full rank.  Replace the null space basis of X (in
01819       // the last numCols-rank columns of Q_view) with random data,
01820       // project it against the first rank columns of Q_view, and
01821       // normalize.
01822       //
01823       // Number of columns to fill with random data.
01824       const int nullSpaceNumCols = numCols - rank;
01825       // Inclusive range of indices of columns of X to fill with
01826       // random data.
01827       Range1D nullSpaceIndices (rank, numCols-1);
01828 
01829       // rawNormalize() wrote the null space basis vectors into
01830       // Q_view.  We continue to work in place in Q_view by writing
01831       // the random data there and (if there is a nontrival column
01832       // space) projecting in place against the column space basis
01833       // vectors (also in Q_view).
01834       RCP<MV> Q_null = MVT::CloneViewNonConst (*Q_view, nullSpaceIndices);
01835       // Replace the null space basis with random data.
01836       MVT::MvRandom (*Q_null); 
01837 
01838       // Make sure that the "random" data isn't all zeros.  This is
01839       // statistically nearly impossible, but we test for debugging
01840       // purposes.
01841       {
01842   std::vector<magnitude_type> norms (MVT::GetNumberVecs(*Q_null));
01843   MVT::MvNorm (*Q_null, norms);
01844 
01845   bool anyZero = false;
01846   typedef typename std::vector<magnitude_type>::const_iterator iter_type;
01847   for (iter_type it = norms.begin(); it != norms.end(); ++it) {
01848     if (*it == SCTM::zero()) {
01849       anyZero = true;
01850     }
01851   }
01852   if (anyZero) {
01853     std::ostringstream os;
01854     os << "TsqrOrthoManagerImpl::normalizeImpl: "
01855       "We are being asked to randomize the null space, for a matrix "
01856       "with " << numCols << " columns and reported column rank "
01857        << rank << ".  The inclusive range of columns to fill with "
01858       "random data is [" << nullSpaceIndices.lbound() << "," 
01859        << nullSpaceIndices.ubound() << "].  After filling the null "
01860       "space vectors with random numbers, at least one of the vectors"
01861       " has norm zero.  Here are the norms of all the null space "
01862       "vectors: [";
01863     for (iter_type it = norms.begin(); it != norms.end(); ++it) {
01864       os << *it;
01865       if (it+1 != norms.end())
01866         os << ", ";
01867     }
01868     os << "].)  There is a tiny probability that this could happen "
01869       "randomly, but it is likely a bug.  Please report it to the "
01870       "Belos developers, especially if you are able to reproduce the "
01871       "behavior.";
01872     TEUCHOS_TEST_FOR_EXCEPTION(anyZero, TsqrOrthoError, os.str());
01873   }
01874       }
01875 
01876       if (rank > 0) {
01877   // Project the random data against the column space basis of
01878   // X, using a simple block projection ("Block Classical
01879   // Gram-Schmidt").  This is accurate because we've already
01880   // orthogonalized the column space basis of X nearly to
01881   // machine precision via a QR factorization (TSQR) with
01882   // accuracy comparable to Householder QR.
01883   RCP<const MV> Q_col = MVT::CloneView (*Q_view, Range1D(0, rank-1));
01884 
01885   // Temporary storage for projection coefficients.  We don't
01886   // need to keep them, since they represent the null space
01887   // basis (for which the coefficients are logically zero).
01888   mat_ptr C_null (new mat_type (rank, nullSpaceNumCols));
01889   rawProject (*Q_null, Q_col, C_null);
01890       }
01891       // Normalize the projected random vectors, so that they are
01892       // mutually orthogonal (as well as orthogonal to the column
01893       // space basis of X).  We use X for the output of the
01894       // normalization: for out-of-place normalization (outOfPlace ==
01895       // true), X is overwritten with "invalid values" anyway, and for
01896       // in-place normalization (outOfPlace == false), we want the
01897       // result to be in X anyway.
01898       RCP<MV> X_null = MVT::CloneViewNonConst (X, nullSpaceIndices);
01899       // Normalization coefficients for projected random vectors.
01900       // Will be thrown away.
01901       mat_type B_null (nullSpaceNumCols, nullSpaceNumCols);
01902       // Write the normalized vectors to X_null (in X).
01903       const int nullSpaceBasisRank = rawNormalize (*Q_null, *X_null, B_null);
01904     
01905       // It's possible, but unlikely, that X_null doesn't have full
01906       // rank (after the projection step).  We could recursively fill
01907       // in more random vectors until we finally get a full rank
01908       // matrix, but instead we just throw an exception.
01909       //
01910       // NOTE (mfh 08 Nov 2010) Perhaps we should deal with this case
01911       // more elegantly.  Recursion might be one way to solve it, but
01912       // be sure to check that the recursion will terminate.  We could
01913       // do this by supplying an additional argument to rawNormalize,
01914       // which is the null space basis rank from the previous
01915       // iteration.  The rank has to decrease each time, or the
01916       // recursion may go on forever.
01917       if (nullSpaceBasisRank < nullSpaceNumCols) {
01918   std::vector<magnitude_type> norms (MVT::GetNumberVecs(*X_null));
01919   MVT::MvNorm (*X_null, norms);
01920   std::ostringstream os;
01921   os << "TsqrOrthoManagerImpl::normalizeImpl: "
01922      << "We are being asked to randomize the null space, "
01923      << "for a matrix with " << numCols << " columns and "
01924      << "column rank " << rank << ".  After projecting and "
01925      << "normalizing the generated random vectors, they "
01926      << "only have rank " << nullSpaceBasisRank << ".  They"
01927      << " should have full rank " << nullSpaceNumCols 
01928      << ".  (The inclusive range of columns to fill with "
01929      << "random data is [" << nullSpaceIndices.lbound() 
01930      << "," << nullSpaceIndices.ubound() << "].  The "
01931      << "column norms of the resulting Q factor are: [";
01932   for (typename std::vector<magnitude_type>::size_type k = 0; 
01933        k < norms.size(); ++k) {
01934     os << norms[k];
01935     if (k != norms.size()-1) {
01936       os << ", ";
01937     }
01938   }
01939   os << "].)  There is a tiny probability that this could "
01940      << "happen randomly, but it is likely a bug.  Please "
01941      << "report it to the Belos developers, especially if "
01942      << "you are able to reproduce the behavior.";
01943 
01944   TEUCHOS_TEST_FOR_EXCEPTION(nullSpaceBasisRank < nullSpaceNumCols, 
01945          TsqrOrthoError, os.str());
01946       }
01947       // If we're normalizing out of place, copy the X_null
01948       // vectors back into Q_null; the Q_col vectors are already
01949       // where they are supposed to be in that case.
01950       //
01951       // If we're normalizing in place, leave X_null alone (it's
01952       // already where it needs to be, in X), but copy Q_col back
01953       // into the first rank columns of X.
01954       if (outOfPlace) {
01955   MVT::Assign (*X_null, *Q_null);
01956       } else if (rank > 0) {
01957   // MVT::Assign() doesn't accept empty ranges of columns.
01958   RCP<const MV> Q_col = MVT::CloneView (*Q_view, Range1D(0, rank-1));
01959   RCP<MV> X_col = MVT::CloneViewNonConst (X, Range1D(0, rank-1));
01960   MVT::Assign (*Q_col, *X_col);
01961       }
01962     }
01963     return rank;
01964   }
01965 
01966 
01967   template<class Scalar, class MV>
01968   void
01969   TsqrOrthoManagerImpl<Scalar, MV>::
01970   checkProjectionDims (int& ncols_X, 
01971            int& num_Q_blocks,
01972            int& ncols_Q_total,
01973            const MV& X, 
01974            Teuchos::ArrayView<Teuchos::RCP<const MV> > Q) const
01975   {
01976     // First assign to temporary values, so the function won't
01977     // commit any externally visible side effects unless it will
01978     // return normally (without throwing an exception).  (I'm being
01979     // cautious; MVT::GetNumberVecs() probably shouldn't have any
01980     // externally visible side effects, unless it is logging to a
01981     // file or something.)
01982     int the_ncols_X, the_num_Q_blocks, the_ncols_Q_total;
01983     the_num_Q_blocks = Q.size();
01984     the_ncols_X = MVT::GetNumberVecs (X);
01985 
01986     // Compute the total number of columns of all the Q[i] blocks.
01987     the_ncols_Q_total = 0;
01988     // You should be angry if your compiler doesn't support type
01989     // inference ("auto").  That's why I need this awful typedef.
01990     using Teuchos::ArrayView;
01991     using Teuchos::RCP;
01992     typedef typename ArrayView<RCP<const MV> >::const_iterator iter_type;
01993     for (iter_type it = Q.begin(); it != Q.end(); ++it) {
01994       const MV& Qi = **it;
01995       the_ncols_Q_total += MVT::GetNumberVecs (Qi);
01996     }
01997 
01998     // Commit temporary values to the output arguments.
01999     ncols_X = the_ncols_X;
02000     num_Q_blocks = the_num_Q_blocks;
02001     ncols_Q_total = the_ncols_Q_total;
02002   }
02003 
02004 } // namespace Belos
02005 
02006 #endif // __BelosTsqrOrthoManagerImpl_hpp
02007 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines