Anasazi Version of the Day
AnasaziTsqrOrthoManagerImpl.hpp
00001 // @HEADER
00002 // ***********************************************************************
00003 //
00004 //                 Anasazi: Block Eigensolvers Package
00005 //                 Copyright (2010) Sandia Corporation
00006 //
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 //
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00025 //
00026 // ***********************************************************************
00027 // @HEADER
00028 
00032 #ifndef __AnasaziTsqrOrthoManagerImpl_hpp
00033 #define __AnasaziTsqrOrthoManagerImpl_hpp
00034 
00035 #include "AnasaziConfigDefs.hpp"
00036 #include "AnasaziMultiVecTraits.hpp"
00037 #include "AnasaziOrthoManager.hpp" // OrthoError, etc.
00038 
00039 #include "Teuchos_as.hpp"
00040 #include "Teuchos_LAPACK.hpp"
00041 #include "Teuchos_ParameterList.hpp"
00042 #include "Teuchos_ParameterListAcceptorDefaultBase.hpp"
00043 #include <algorithm>
00044 
00045 
00046 namespace Anasazi {
00047 
00051   class TsqrOrthoError : public OrthoError {
00052   public: 
00053     TsqrOrthoError (const std::string& what_arg) : 
00054       OrthoError (what_arg) {}
00055   };
00056 
00076   class TsqrOrthoFault : public OrthoError {
00077   public: 
00078     TsqrOrthoFault (const std::string& what_arg) : 
00079       OrthoError (what_arg) {}
00080   };
00081 
00114   template<class Scalar, class MV>
00115   class TsqrOrthoManagerImpl :
00116     public Teuchos::ParameterListAcceptorDefaultBase {
00117   public:
00118     typedef Scalar scalar_type;
00119     typedef typename Teuchos::ScalarTraits<Scalar>::magnitudeType magnitude_type;
00120     typedef MV multivector_type;
00123     typedef Teuchos::SerialDenseMatrix<int, Scalar> mat_type;
00124     typedef Teuchos::RCP<mat_type> mat_ptr;
00125 
00126   private:
00127     typedef Teuchos::ScalarTraits<Scalar> SCT;
00128     typedef Teuchos::ScalarTraits<magnitude_type> SCTM;
00129     typedef MultiVecTraits<Scalar, MV> MVT;
00130     typedef typename MVT::tsqr_adaptor_type tsqr_adaptor_type;
00131 
00132   public:
00140     Teuchos::RCP<const Teuchos::ParameterList> getValidParameters () const;
00141 
00143     void setParameterList (const Teuchos::RCP<Teuchos::ParameterList>& params);
00144 
00155     Teuchos::RCP<const Teuchos::ParameterList> getFastParameters ();
00156 
00173     TsqrOrthoManagerImpl (const Teuchos::RCP<Teuchos::ParameterList>& params,
00174         const std::string& label);
00175 
00180     TsqrOrthoManagerImpl (const std::string& label);
00181 
00189     void setLabel (const std::string& label) { 
00190       if (label != label_) {
00191   label_ = label; 
00192       }
00193     }
00194 
00196     const std::string& getLabel () const { return label_; }
00197 
00206     void 
00207     innerProd (const MV& X, const MV& Y, mat_type& Z) const
00208     {
00209       MVT::MvTransMv (SCT::one(), X, Y, Z);
00210     }
00211 
00229     void
00230     norm (const MV& X, std::vector<magnitude_type>& normvec) const;
00231 
00241     void 
00242     project (MV& X, 
00243        Teuchos::Array<mat_ptr> C, 
00244        Teuchos::ArrayView<Teuchos::RCP<const MV> > Q);
00245 
00259     int normalize (MV& X, mat_ptr B);
00260 
00279     int 
00280     normalizeOutOfPlace (MV& X, MV& Q, mat_ptr B);
00281 
00295     int 
00296     projectAndNormalize (MV &X,
00297        Teuchos::Array<mat_ptr> C,
00298        mat_ptr B,
00299        Teuchos::ArrayView<Teuchos::RCP<const MV> > Q)
00300     {
00301       // "false" means we work on X in place.  The second argument is
00302       // not read or written in that case.
00303       return projectAndNormalizeImpl (X, X, false, C, B, Q);
00304     }
00305 
00325     int 
00326     projectAndNormalizeOutOfPlace (MV& X_in, 
00327            MV& X_out,
00328            Teuchos::Array<mat_ptr> C,
00329            mat_ptr B,
00330            Teuchos::ArrayView<Teuchos::RCP<const MV> > Q)
00331     {
00332       // "true" means we work on X_in out of place, writing the
00333       // results into X_out.
00334       return projectAndNormalizeImpl (X_in, X_out, true, C, B, Q);
00335     }
00336 
00341     magnitude_type
00342     orthonormError (const MV &X) const
00343     {
00344       const Scalar ONE = SCT::one();
00345       const int ncols = MVT::GetNumberVecs(X);
00346       mat_type XTX (ncols, ncols);
00347       innerProd (X, X, XTX);
00348       for (int k = 0; k < ncols; ++k) {
00349   XTX(k,k) -= ONE;
00350       }
00351       return XTX.normFrobenius();
00352     }
00353 
00355     magnitude_type 
00356     orthogError (const MV &X1, 
00357      const MV &X2) const
00358     {
00359       const int ncols_X1 = MVT::GetNumberVecs (X1);
00360       const int ncols_X2 = MVT::GetNumberVecs (X2);
00361       mat_type X1_T_X2 (ncols_X1, ncols_X2);
00362       innerProd (X1, X2, X1_T_X2);
00363       return X1_T_X2.normFrobenius();
00364     }
00365 
00369     magnitude_type blockReorthogThreshold() const { return blockReorthogThreshold_; }
00370 
00373     magnitude_type relativeRankTolerance() const { return relativeRankTolerance_; }
00374 
00375   private:
00377     Teuchos::RCP<Teuchos::ParameterList> params_;
00378 
00380     mutable Teuchos::RCP<const Teuchos::ParameterList> defaultParams_;
00381 
00383     std::string label_;
00384 
00386     tsqr_adaptor_type tsqrAdaptor_;
00387 
00397     Teuchos::RCP<MV> Q_;
00398 
00400     magnitude_type eps_;
00401 
00402 
00406     bool randomizeNullSpace_;
00407 
00413     bool reorthogonalizeBlocks_;
00414 
00418     bool throwOnReorthogFault_;
00419 
00421     magnitude_type blockReorthogThreshold_;
00422 
00424     magnitude_type relativeRankTolerance_;
00425 
00432     bool forceNonnegativeDiagonal_;
00433 
00435     void
00436     raiseReorthogFault (const std::vector<magnitude_type>& normsAfterFirstPass,
00437       const std::vector<magnitude_type>& normsAfterSecondPass,
00438       const std::vector<int>& faultIndices);
00439 
00449     void
00450     checkProjectionDims (int& ncols_X, 
00451        int& num_Q_blocks,
00452        int& ncols_Q_total,
00453        const MV& X, 
00454        Teuchos::ArrayView<Teuchos::RCP<const MV> > Q) const;
00455 
00466     void
00467     allocateProjectionCoefficients (Teuchos::Array<mat_ptr>& C, 
00468             Teuchos::ArrayView<Teuchos::RCP<const MV> > Q,
00469             const MV& X,
00470             const bool attemptToRecycle = true) const;
00471 
00480     int 
00481     projectAndNormalizeImpl (MV& X_in, 
00482            MV& X_out,
00483            const bool outOfPlace,
00484            Teuchos::Array<mat_ptr> C,
00485            mat_ptr B,
00486            Teuchos::ArrayView<Teuchos::RCP<const MV> > Q);
00487 
00493     void
00494     rawProject (MV& X, 
00495     Teuchos::ArrayView<Teuchos::RCP<const MV> > Q,
00496     Teuchos::ArrayView<mat_ptr> C) const;
00497 
00499     void
00500     rawProject (MV& X, 
00501     const Teuchos::RCP<const MV>& Q, 
00502     const mat_ptr& C) const;
00503 
00530     int rawNormalize (MV& X, MV& Q, mat_type& B);
00531 
00549     int normalizeOne (MV& X, mat_ptr B) const;
00550 
00578     int normalizeImpl (MV& X, MV& Q, mat_ptr B, const bool outOfPlace);
00579   };
00580 
00581   template<class Scalar, class MV>
00582   void
00583   TsqrOrthoManagerImpl<Scalar, MV>::
00584   setParameterList (const Teuchos::RCP<Teuchos::ParameterList>& params)
00585   {
00586     using Teuchos::ParameterList;
00587     using Teuchos::parameterList;
00588     using Teuchos::RCP;
00589     using Teuchos::sublist;
00590     typedef magnitude_type M; // abbreviation.
00591 
00592     RCP<const ParameterList> defaultParams = getValidParameters ();
00593     // Sublist of TSQR implementation parameters; to get below.
00594     RCP<ParameterList> tsqrParams;
00595 
00596     RCP<ParameterList> theParams;
00597     if (params.is_null()) {
00598       theParams = parameterList (*defaultParams);
00599     } else {
00600       theParams = params;
00601 
00602       // Don't call validateParametersAndSetDefaults(); we prefer to
00603       // ignore parameters that we don't recognize, at least for now.
00604       // However, we do fill in missing parameters with defaults.
00605 
00606       randomizeNullSpace_ = 
00607   theParams->get<bool> ("randomizeNullSpace", 
00608             defaultParams->get<bool> ("randomizeNullSpace"));
00609       reorthogonalizeBlocks_ = 
00610   theParams->get<bool> ("reorthogonalizeBlocks", 
00611             defaultParams->get<bool> ("reorthogonalizeBlocks"));
00612       throwOnReorthogFault_ = 
00613   theParams->get<bool> ("throwOnReorthogFault", 
00614             defaultParams->get<bool> ("throwOnReorthogFault"));
00615       blockReorthogThreshold_ = 
00616   theParams->get<M> ("blockReorthogThreshold",
00617          defaultParams->get<M> ("blockReorthogThreshold"));
00618       relativeRankTolerance_ = 
00619   theParams->get<M> ("relativeRankTolerance", 
00620          defaultParams->get<M> ("relativeRankTolerance"));
00621       forceNonnegativeDiagonal_ = 
00622   theParams->get<bool> ("forceNonnegativeDiagonal", 
00623             defaultParams->get<bool> ("forceNonnegativeDiagonal"));
00624 
00625       // Get the sublist of TSQR implementation parameters.  Use the
00626       // default sublist if one isn't provided.
00627       if (! theParams->isSublist ("TSQR implementation")) {
00628   theParams->set ("TSQR implementation", 
00629       defaultParams->sublist ("TSQR implementation"));
00630       }
00631       tsqrParams = sublist (theParams, "TSQR implementation", true);
00632     }
00633 
00634     // Send the TSQR implementation parameters to the TSQR adaptor.
00635     tsqrAdaptor_.setParameterList (tsqrParams);
00636 
00637     // Save the input parameter list.
00638     setMyParamList (theParams);
00639   }
00640 
00641   template<class Scalar, class MV>
00642   TsqrOrthoManagerImpl<Scalar, MV>::
00643   TsqrOrthoManagerImpl (const Teuchos::RCP<Teuchos::ParameterList>& params,
00644       const std::string& label) :
00645     label_ (label),
00646     Q_ (Teuchos::null),               // Initialized on demand
00647     eps_ (SCTM::eps()),               // Machine precision
00648     randomizeNullSpace_ (true),
00649     reorthogonalizeBlocks_ (true),
00650     throwOnReorthogFault_ (false),
00651     blockReorthogThreshold_ (0),
00652     relativeRankTolerance_ (0),
00653     forceNonnegativeDiagonal_ (false)
00654   {
00655     setParameterList (params); // This also sets tsqrAdaptor_'s parameters.
00656   }
00657 
00658   template<class Scalar, class MV>
00659   TsqrOrthoManagerImpl<Scalar, MV>::
00660   TsqrOrthoManagerImpl (const std::string& label) :
00661     label_ (label),
00662     Q_ (Teuchos::null),               // Initialized on demand
00663     eps_ (SCTM::eps()),               // Machine precision
00664     randomizeNullSpace_ (true),
00665     reorthogonalizeBlocks_ (true),
00666     throwOnReorthogFault_ (false),
00667     blockReorthogThreshold_ (0),
00668     relativeRankTolerance_ (0), 
00669     forceNonnegativeDiagonal_ (false) 
00670   {
00671     setParameterList (Teuchos::null); // Set default parameters.
00672   }
00673 
00674   template<class Scalar, class MV>
00675   void
00676   TsqrOrthoManagerImpl<Scalar, MV>::
00677   norm (const MV& X, std::vector<magnitude_type>& normVec) const
00678   {
00679     const int numCols = MVT::GetNumberVecs (X);
00680     // std::vector<T>::size_type is unsigned; int is signed.  Mixed
00681     // unsigned/signed comparisons trigger compiler warnings.
00682     if (normVec.size() < static_cast<size_t>(numCols))
00683       normVec.resize (numCols); // Resize normvec if necessary.
00684     MVT::MvNorm (X, normVec);
00685   }
00686 
00687   template<class Scalar, class MV>
00688   void
00689   TsqrOrthoManagerImpl<Scalar, MV>::project (MV& X, 
00690                Teuchos::Array<mat_ptr> C,
00691                Teuchos::ArrayView<Teuchos::RCP<const MV> > Q)
00692   {
00693     int ncols_X, num_Q_blocks, ncols_Q_total;
00694     checkProjectionDims (ncols_X, num_Q_blocks, ncols_Q_total, X, Q);
00695     // Test for quick exit: any dimension of X is zero, or there are
00696     // zero Q blocks, or the total number of columns of the Q blocks
00697     // is zero.
00698     if (ncols_X == 0 || num_Q_blocks == 0 || ncols_Q_total == 0)
00699       return;
00700 
00701     // Make space for first-pass projection coefficients
00702     allocateProjectionCoefficients (C, Q, X, true);
00703 
00704     // We only use columnNormsBefore and compute pre-projection column
00705     // norms if doing block reorthogonalization.
00706     std::vector<magnitude_type> columnNormsBefore (ncols_X, magnitude_type(0));
00707     if (reorthogonalizeBlocks_)
00708       MVT::MvNorm (X, columnNormsBefore);
00709 
00710     // Project (first block orthogonalization step): 
00711     // C := Q^* X, X := X - Q C.
00712     rawProject (X, Q, C); 
00713 
00714     // If we are doing block reorthogonalization, reorthogonalize X if
00715     // necessary.
00716     if (reorthogonalizeBlocks_)
00717       {
00718   std::vector<magnitude_type> columnNormsAfter (ncols_X, magnitude_type(0));
00719   MVT::MvNorm (X, columnNormsAfter);
00720   
00721   // Relative block reorthogonalization threshold
00722   const magnitude_type relThres = blockReorthogThreshold();
00723   // Reorthogonalize X if any of its column norms decreased by a
00724   // factor more than the block reorthogonalization threshold.
00725   // Don't bother trying to subset the columns; that will make
00726   // the columns noncontiguous and thus hinder BLAS 3
00727   // optimizations.
00728   bool reorthogonalize = false;
00729   for (int j = 0; j < ncols_X; ++j)
00730     if (columnNormsAfter[j] < relThres * columnNormsBefore[j])
00731       {
00732         reorthogonalize = true;
00733         break;
00734       }
00735   if (reorthogonalize)
00736     {
00737       // Second-pass projection coefficients
00738       Teuchos::Array<mat_ptr> C2;
00739       allocateProjectionCoefficients (C2, Q, X, false);
00740 
00741       // Perform the second projection pass:
00742       // C2 = Q' X, X = X - Q*C2
00743       rawProject (X, Q, C2);
00744       // Update the projection coefficients
00745       for (int k = 0; k < num_Q_blocks; ++k)
00746         *C[k] += *C2[k];
00747     }
00748       }
00749   }
00750 
00751 
00752 
00753   template<class Scalar, class MV>
00754   int 
00755   TsqrOrthoManagerImpl<Scalar, MV>::normalize (MV& X, mat_ptr B)
00756   {
00757     using Teuchos::Range1D;
00758     using Teuchos::RCP;
00759 
00760     // MVT returns int for this, even though the "local ordinal
00761     // type" of the MV may be some other type (for example,
00762     // Tpetra::MultiVector<double, int32_t, int64_t, ...>).
00763     const int numCols = MVT::GetNumberVecs (X);
00764 
00765     // This special case (for X having only one column) makes
00766     // TsqrOrthoManagerImpl equivalent to Modified Gram-Schmidt
00767     // orthogonalization with conditional full reorthogonalization,
00768     // if all multivector inputs have only one column.  It also
00769     // avoids allocating Q_ scratch space and copying data when we
00770     // don't need to invoke TSQR at all.
00771     if (numCols == 1) {
00772       return normalizeOne (X, B);
00773     }
00774 
00775     // We use Q_ as scratch space for the normalization, since TSQR
00776     // requires a scratch multivector (it can't factor in place).  Q_
00777     // should come from a vector space compatible with X's vector
00778     // space, and Q_ should have at least as many columns as X.
00779     // Otherwise, we have to reallocate.  We also have to allocate
00780     // (not "re-") Q_ if we haven't allocated it before.  (We can't
00781     // allocate Q_ until we have some X, so we need a multivector as
00782     // the "prototype.")
00783     //
00784     // NOTE (mfh 11 Jan 2011) We only increase the number of columsn
00785     // in Q_, never decrease.  This is OK for typical uses of TSQR,
00786     // but you might prefer different behavior in some cases.
00787     //
00788     // NOTE (mfh 10 Mar 2011) We should only reuse the scratch space
00789     // Q_ if X and Q_ have compatible data distributions.  However,
00790     // Belos' current MultiVecTraits interface does not let us check
00791     // for this.  Thus, we can only check whether X and Q_ have the
00792     // same number of rows.  This will behave correctly for the common
00793     // case in Belos that all multivectors with the same number of
00794     // rows have the same data distribution.
00795     //
00796     // The specific MV implementation may do more checks than this on
00797     // its own and throw an exception if X and Q_ are not compatible,
00798     // but it may not.  If you find that recycling the Q_ space causes
00799     // troubles, you may consider modifying the code below to
00800     // reallocate Q_ for every X that comes in.  
00801     if (Q_.is_null() || 
00802   MVT::GetVecLength(*Q_) != MVT::GetVecLength(X) ||
00803   numCols > MVT::GetNumberVecs (*Q_)) {
00804       Q_ = MVT::Clone (X, numCols);
00805     }
00806 
00807     // normalizeImpl() wants the second MV argument to have the same
00808     // number of columns as X.  To ensure this, we pass it a view of
00809     // Q_ if Q_ has more columns than X.  (This is possible if we
00810     // previously called normalize() with a different multivector,
00811     // since we never reallocate Q_ if it has more columns than
00812     // necessary.)
00813     if (MVT::GetNumberVecs(*Q_) == numCols) {
00814       return normalizeImpl (X, *Q_, B, false);
00815     } else {
00816       RCP<MV> Q_view = MVT::CloneViewNonConst (*Q_, Range1D(0, numCols-1));
00817       return normalizeImpl (X, *Q_view, B, false);
00818     }
00819   }
00820 
00821   template<class Scalar, class MV>
00822   void
00823   TsqrOrthoManagerImpl<Scalar, MV>::
00824   allocateProjectionCoefficients (Teuchos::Array<mat_ptr>& C, 
00825           Teuchos::ArrayView<Teuchos::RCP<const MV> > Q,
00826           const MV& X,
00827           const bool attemptToRecycle) const
00828   {
00829     const int num_Q_blocks = Q.size();
00830     const int ncols_X = MVT::GetNumberVecs (X);
00831     C.resize (num_Q_blocks);
00832     if (attemptToRecycle)
00833       {
00834   for (int i = 0; i < num_Q_blocks; ++i) 
00835     {
00836       const int ncols_Qi = MVT::GetNumberVecs (*Q[i]);
00837       // Create a new C[i] if necessary, otherwise resize if
00838       // necessary, otherwise fill with zeros.
00839       if (C[i].is_null())
00840         C[i] = rcp (new mat_type (ncols_Qi, ncols_X));
00841       else 
00842         {
00843     mat_type& Ci = *C[i];
00844     if (Ci.numRows() != ncols_Qi || Ci.numCols() != ncols_X)
00845       Ci.shape (ncols_Qi, ncols_X);
00846     else
00847       Ci.putScalar (SCT::zero());
00848         }
00849     }
00850       }
00851     else
00852       {
00853   for (int i = 0; i < num_Q_blocks; ++i) 
00854     {
00855       const int ncols_Qi = MVT::GetNumberVecs (*Q[i]);
00856       C[i] = rcp (new mat_type (ncols_Qi, ncols_X));
00857     }
00858       }
00859   }
00860 
00861   template<class Scalar, class MV>
00862   int 
00863   TsqrOrthoManagerImpl<Scalar, MV>::
00864   normalizeOutOfPlace (MV& X, MV& Q, mat_ptr B)
00865   {
00866     const int numVecs = MVT::GetNumberVecs(X);
00867     if (numVecs == 0) {
00868       return 0; // Nothing to do.
00869     } else if (numVecs == 1) {
00870       // Special case for a single column; scale and copy over.
00871       using Teuchos::Range1D;
00872       using Teuchos::RCP;
00873       using Teuchos::rcp;
00874 
00875       // Normalize X in place (faster than TSQR for one column).
00876       const int rank = normalizeOne (X, B);
00877       // Copy results to first column of Q.
00878       RCP<MV> Q_0 = MVT::CloneViewNonConst (Q, Range1D(0,0));
00879       MVT::Assign (X, *Q_0);
00880       return rank;
00881     } else {
00882       // The "true" argument to normalizeImpl() means the output
00883       // vectors go into Q, and the contents of X are overwritten with
00884       // invalid values.
00885       return normalizeImpl (X, Q, B, true);
00886     }
00887   }
00888 
00889   template<class Scalar, class MV>
00890   int 
00891   TsqrOrthoManagerImpl<Scalar, MV>::
00892   projectAndNormalizeImpl (MV& X_in, 
00893          MV& X_out, // Only written if outOfPlace==false.
00894          const bool outOfPlace,
00895          Teuchos::Array<mat_ptr> C,
00896          mat_ptr B,
00897          Teuchos::ArrayView<Teuchos::RCP<const MV> > Q)
00898   {
00899     using Teuchos::Range1D;
00900     using Teuchos::RCP;
00901     using Teuchos::rcp;
00902 
00903     if (outOfPlace) {
00904       // Make sure that X_out has at least as many columns as X_in.
00905       TEUCHOS_TEST_FOR_EXCEPTION(MVT::GetNumberVecs(X_out) < MVT::GetNumberVecs(X_in),
00906        std::invalid_argument, 
00907        "Belos::TsqrOrthoManagerImpl::"
00908        "projectAndNormalizeOutOfPlace(...):"
00909        "X_out has " << MVT::GetNumberVecs(X_out) 
00910        << " columns, but X_in has "
00911        << MVT::GetNumberVecs(X_in) << " columns.");
00912     }
00913     // Fetch dimensions of X_in and Q, and allocate space for first-
00914     // and second-pass projection coefficients (C resp. C2).
00915     int ncols_X, num_Q_blocks, ncols_Q_total;
00916     checkProjectionDims (ncols_X, num_Q_blocks, ncols_Q_total, X_in, Q);
00917 
00918     // Test for quick exit: if any dimension of X is zero.
00919     if (ncols_X == 0) {
00920       return 0;
00921     }
00922     // If there are zero Q blocks or zero Q columns, just normalize!
00923     if (num_Q_blocks == 0 || ncols_Q_total == 0) {
00924       if (outOfPlace) {
00925   return normalizeOutOfPlace (X_in, X_out, B);
00926       } else {
00927   return normalize (X_in, B);
00928       }
00929     }
00930 
00931     // The typical case is that the entries of C have been allocated
00932     // before, so we attempt to recycle the allocations.  The call
00933     // below will reallocate if it cannot recycle.
00934     allocateProjectionCoefficients (C, Q, X_in, true);
00935 
00936     // If we are doing block reorthogonalization, then compute the
00937     // column norms of X before projecting for the first time.  This
00938     // will help us decide whether we need to reorthogonalize X.
00939     std::vector<magnitude_type> normsBeforeFirstPass (ncols_X, SCTM::zero());
00940     if (reorthogonalizeBlocks_) {
00941       MVT::MvNorm (X_in, normsBeforeFirstPass);
00942     }
00943 
00944     // First (Modified) Block Gram-Schmidt pass, in place in X_in.
00945     rawProject (X_in, Q, C);
00946 
00947     // Make space for the normalization coefficients.  This will
00948     // either be a freshly allocated matrix (if B is null), or a view
00949     // of the appropriately sized upper left submatrix of *B (if B is
00950     // not null).
00951     //
00952     // Note that if we let the normalize() routine allocate (in the
00953     // case that B is null), that storage will go away at the end of
00954     // normalize().  (This is because it passes the RCP by value, not
00955     // by reference.)
00956     mat_ptr B_out;
00957     if (B.is_null()) {
00958       B_out = rcp (new mat_type (ncols_X, ncols_X));
00959     } else {
00960       // Make sure that B is no smaller than numCols x numCols.
00961       TEUCHOS_TEST_FOR_EXCEPTION(B->numRows() < ncols_X || B->numCols() < ncols_X,
00962        std::invalid_argument,
00963        "normalizeOne: Input matrix B must be at "
00964        "least " << ncols_X << " x " << ncols_X 
00965        << ", but is instead " << B->numRows()
00966        << " x " << B->numCols() << ".");
00967       // Create a view of the ncols_X by ncols_X upper left
00968       // submatrix of *B.  TSQR will write the normalization
00969       // coefficients there.
00970       B_out = rcp (new mat_type (Teuchos::View, *B, ncols_X, ncols_X));
00971     }
00972 
00973     // Rank of X(_in) after first projection pass.  If outOfPlace,
00974     // this overwrites X_in with invalid values, and the results go in
00975     // X_out.  Otherwise, it's done in place in X_in.
00976     const int firstPassRank = outOfPlace ? 
00977       normalizeOutOfPlace (X_in, X_out, B_out) : 
00978       normalize (X_in, B_out);
00979     if (B.is_null()) {
00980       // The input matrix B is null, so assign B_out to it.  If B was
00981       // not null on input, then B_out is a view of *B, so we don't
00982       // have to do anything here.  Note that SerialDenseMatrix uses
00983       // raw pointers to store data and represent views, so we have to
00984       // be careful about scope.
00985       B = B_out;
00986     }
00987     int rank = firstPassRank; // Current rank of X.
00988 
00989     // If X was not full rank after projection and randomizeNullSpace_
00990     // is true, then normalize(OutOfPlace)() replaced the null space
00991     // basis of X with random vectors, and orthogonalized them against
00992     // the column space basis of X.  However, we still need to
00993     // orthogonalize the random vectors against the Q[i], after which
00994     // we will need to renormalize them.
00995     //
00996     // If outOfPlace, then we need to work in X_out (where
00997     // normalizeOutOfPlace() wrote the normalized vectors).
00998     // Otherwise, we need to work in X_in.
00999     //
01000     // Note: We don't need to keep the new projection coefficients,
01001     // since they are multiplied by the "small" part of B
01002     // corresponding to the null space of the original X.
01003     if (firstPassRank < ncols_X && randomizeNullSpace_) {
01004       const int numNullSpaceCols = ncols_X - firstPassRank;
01005       const Range1D nullSpaceIndices (firstPassRank, ncols_X - 1);
01006 
01007       // Space for projection coefficients (will be thrown away)
01008       Teuchos::Array<mat_ptr> C_null (num_Q_blocks);
01009       for (int k = 0; k < num_Q_blocks; ++k) {
01010   const int numColsQk = MVT::GetNumberVecs(*Q[k]);
01011   C_null[k] = rcp (new mat_type (numColsQk, numNullSpaceCols));
01012       }
01013       // Space for normalization coefficients (will be thrown away).
01014       RCP<mat_type> B_null (new mat_type (numNullSpaceCols, numNullSpaceCols));
01015 
01016       int randomVectorsRank;
01017       if (outOfPlace) {
01018   // View of the null space basis columns of X.
01019   // normalizeOutOfPlace() wrote them into X_out.
01020   RCP<MV> X_out_null = MVT::CloneViewNonConst (X_out, nullSpaceIndices);
01021   // Use X_in for scratch space.  Copy X_out_null into the
01022   // last few columns of X_in (X_in_null) and do projections
01023   // in there.  (This saves us a copy wen we renormalize
01024   // (out of place) back into X_out.)
01025   RCP<MV> X_in_null = MVT::CloneViewNonConst (X_in, nullSpaceIndices);
01026   MVT::Assign (*X_out_null, *X_in_null);
01027   // Project the new random vectors against the Q blocks, and
01028   // renormalize the result into X_out_null.  
01029   rawProject (*X_in_null, Q, C_null);
01030   randomVectorsRank = normalizeOutOfPlace (*X_in_null, *X_out_null, B_null);
01031       } else {
01032   // View of the null space columns of X.  
01033   // They live in X_in.
01034   RCP<MV> X_null = MVT::CloneViewNonConst (X_in, nullSpaceIndices);
01035   // Project the new random vectors against the Q blocks,
01036   // and renormalize the result (in place).
01037   rawProject (*X_null, Q, C_null);
01038   randomVectorsRank = normalize (*X_null, B_null);
01039       }
01040       // While unusual, it is still possible for the random data not
01041       // to be full rank after projection and normalization.  In that
01042       // case, we could try another set of random data and recurse as
01043       // necessary, but instead for now we just raise an exception.
01044       TEUCHOS_TEST_FOR_EXCEPTION(randomVectorsRank != numNullSpaceCols, 
01045        TsqrOrthoError, 
01046        "Belos::TsqrOrthoManagerImpl::projectAndNormalize"
01047        "OutOfPlace(): After projecting and normalizing the "
01048        "random vectors (used to replace the null space "
01049        "basis vectors from normalizing X), they have rank "
01050        << randomVectorsRank << ", but should have full "
01051        "rank " << numNullSpaceCols << ".");
01052     }
01053 
01054     // Whether or not X_in was full rank after projection, we still
01055     // might want to reorthogonalize against Q.
01056     if (reorthogonalizeBlocks_) {
01057       // We are only interested in the column space basis of X
01058       // resp. X_out.
01059       std::vector<magnitude_type> 
01060   normsAfterFirstPass (firstPassRank, SCTM::zero());
01061       std::vector<magnitude_type> 
01062   normsAfterSecondPass (firstPassRank, SCTM::zero());
01063 
01064       // Compute post-first-pass (pre-normalization) norms.  We could
01065       // have done that using MVT::MvNorm() on X_in after projecting,
01066       // but before the first normalization.  However, that operation
01067       // may be expensive.  It is also unnecessary: after calling
01068       // normalize(), the 2-norm of B(:,j) is the 2-norm of X_in(:,j)
01069       // before normalization, in exact arithmetic.
01070       //
01071       // NOTE (mfh 06 Nov 2010) This is one way that combining
01072       // projection and normalization into a single kernel --
01073       // projectAndNormalize() -- pays off.  In project(), we have to
01074       // compute column norms of X before and after projection.  Here,
01075       // we get them for free from the normalization coefficients.
01076       Teuchos::BLAS<int, Scalar> blas;
01077       for (int j = 0; j < firstPassRank; ++j) {
01078   const Scalar* const B_j = &(*B_out)(0,j);
01079   // Teuchos::BLAS::NRM2 returns a magnitude_type result on
01080   // Scalar inputs.
01081   normsAfterFirstPass[j] = blas.NRM2 (firstPassRank, B_j, 1);
01082       }
01083       // Test whether any of the norms dropped below the
01084       // reorthogonalization threshold.
01085       bool reorthogonalize = false;
01086       for (int j = 0; j < firstPassRank; ++j) {
01087   // If any column's norm decreased too much, mark this block
01088   // for reorthogonalization.  Note that this test will _not_
01089   // activate reorthogonalization if a column's norm before the
01090   // first project-and-normalize step was zero.  It _will_
01091   // activate reorthogonalization if the column's norm before
01092   // was not zero, but is zero now.
01093   const magnitude_type curThreshold = 
01094     blockReorthogThreshold() * normsBeforeFirstPass[j];
01095   if (normsAfterFirstPass[j] < curThreshold) {
01096     reorthogonalize = true; 
01097     break;
01098   }
01099       }
01100       // Perform another Block Gram-Schmidt pass if necessary.  "Twice
01101       // is enough" (Kahan's theorem) for a Krylov method, unless
01102       // (using Stewart's term) there is an "orthogonalization fault"
01103       // (indicated by reorthogFault).
01104       //
01105       // NOTE (mfh 07 Nov 2010) For now, we include the entire block
01106       // of X, including possible random data (that was already
01107       // projected and normalized above).  It might make more sense
01108       // just to process the first firstPassRank columns of X.
01109       // However, the resulting reorthogonalization should still be
01110       // correct regardless.
01111       bool reorthogFault = false;
01112       // Indices of X at which there was an orthogonalization fault.
01113       std::vector<int> faultIndices;
01114       if (reorthogonalize) {
01115   using Teuchos::Copy;
01116   using Teuchos::NO_TRANS;
01117 
01118   // If we're using out-of-place normalization, copy X_out
01119   // (results of first project and normalize pass) back into
01120   // X_in, for the second project and normalize pass.
01121   if (outOfPlace)
01122     MVT::Assign (X_out, X_in);
01123 
01124   // C2 is only used internally, so we know that we are
01125   // allocating fresh and not recycling allocations.  Stating
01126   // this lets us save time checking dimensions.
01127   Teuchos::Array<mat_ptr> C2;
01128   allocateProjectionCoefficients (C2, Q, X_in, false);
01129 
01130   // Block Gram-Schmidt (again).  Delay updating the block
01131   // coefficients until we have the new normalization
01132   // coefficients, which we need in order to do the update.
01133   rawProject (X_in, Q, C2);
01134       
01135   // Coefficients for (re)normalization of X_in.
01136   RCP<mat_type> B2 (new mat_type (ncols_X, ncols_X));
01137 
01138   // Normalize X_in (into X_out, if working out of place).
01139   const int secondPassRank = outOfPlace ? 
01140     normalizeOutOfPlace (X_in, X_out, B2) : 
01141     normalize (X_in, B2);
01142   rank = secondPassRank; // Current rank of X
01143 
01144   // Update normalization coefficients.  We begin with copying
01145   // B_out, since the BLAS' _GEMM routine doesn't let us alias
01146   // its input and output arguments.
01147   mat_type B_copy (Copy, *B_out, B_out->numRows(), B_out->numCols());
01148   // B_out := B2 * B_out (where input B_out is in B_copy).
01149   const int err = B_out->multiply (NO_TRANS, NO_TRANS, SCT::one(), 
01150            *B2, B_copy, SCT::zero());
01151   TEUCHOS_TEST_FOR_EXCEPTION(err != 0, std::logic_error, 
01152          "Teuchos::SerialDenseMatrix::multiply "
01153          "returned err = " << err << " != 0");
01154   // Update the block coefficients from the projection step.  We
01155   // use B_copy for this (a copy of B_out, the first-pass
01156   // normalization coefficients).
01157   for (int k = 0; k < num_Q_blocks; ++k) {
01158     mat_type C_k_copy (Copy, *C[k], C[k]->numRows(), C[k]->numCols());
01159 
01160     // C[k] := C2[k]*B_copy + C[k].
01161     const int err2 = C[k]->multiply (NO_TRANS, NO_TRANS, SCT::one(), 
01162             *C2[k], B_copy, SCT::one());
01163     TEUCHOS_TEST_FOR_EXCEPTION(err2 != 0, std::logic_error, 
01164            "Teuchos::SerialDenseMatrix::multiply "
01165            "returned err = " << err << " != 0");
01166   }
01167   // Compute post-second-pass (pre-normalization) norms, using
01168   // B2 (the coefficients from the second normalization step) in
01169   // the same way as with B_out before.
01170   for (int j = 0; j < rank; ++j) {
01171     const Scalar* const B2_j = &(*B2)(0,j);
01172     normsAfterSecondPass[j] = blas.NRM2 (rank, B2_j, 1);
01173   }
01174   // Test whether any of the norms dropped below the
01175   // reorthogonalization threshold.  If so, it's an
01176   // orthogonalization fault, which requires expensive recovery.
01177   reorthogFault = false;
01178   for (int j = 0; j < rank; ++j) {
01179     const magnitude_type relativeLowerBound = 
01180       blockReorthogThreshold() * normsAfterFirstPass[j];
01181     if (normsAfterSecondPass[j] < relativeLowerBound) {
01182       reorthogFault = true; 
01183       faultIndices.push_back (j);
01184     }
01185   }
01186       } // if (reorthogonalize) // reorthogonalization pass
01187 
01188       if (reorthogFault) {
01189   if (throwOnReorthogFault_) {
01190     raiseReorthogFault (normsAfterFirstPass, 
01191             normsAfterSecondPass, 
01192             faultIndices);
01193   } else {
01194     // NOTE (mfh 19 Jan 2011) We could handle the fault here by
01195     // slowly reorthogonalizing, one vector at a time, the
01196     // offending vectors of X.  However, we choose not to
01197     // implement this for now.  If it becomes a problem, let us
01198     // know and we will prioritize implementing this.
01199     TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, 
01200            "TsqrOrthoManagerImpl has not yet implemented"
01201            " recovery from an orthogonalization fault.");
01202   }
01203       }
01204     } // if (reorthogonalizeBlocks_)
01205     return rank;
01206   }
01207 
01208 
01209   template<class Scalar, class MV>
01210   void
01211   TsqrOrthoManagerImpl<Scalar, MV>::
01212   raiseReorthogFault (const std::vector<magnitude_type>& normsAfterFirstPass,
01213           const std::vector<magnitude_type>& normsAfterSecondPass,
01214           const std::vector<int>& faultIndices)
01215   {
01216     using std::endl;
01217     typedef std::vector<int>::size_type size_type;
01218     std::ostringstream os;
01219       
01220     os << "Orthogonalization fault at the following column(s) of X:" << endl;
01221     os << "Column\tNorm decrease factor" << endl;
01222     for (size_type k = 0; k < faultIndices.size(); ++k)
01223       {
01224   const int index = faultIndices[k];
01225   const magnitude_type decreaseFactor = 
01226     normsAfterSecondPass[index] / normsAfterFirstPass[index];
01227   os << index << "\t" << decreaseFactor << endl;
01228       }
01229     throw TsqrOrthoFault (os.str());
01230   }
01231 
01232   template<class Scalar, class MV>
01233   Teuchos::RCP<const Teuchos::ParameterList>
01234   TsqrOrthoManagerImpl<Scalar, MV>::getValidParameters () const
01235   {
01236     using Teuchos::ParameterList;
01237     using Teuchos::parameterList;
01238     using Teuchos::RCP;
01239 
01240     if (defaultParams_.is_null()) {
01241       RCP<ParameterList> params = parameterList ("TsqrOrthoManagerImpl");
01242       //
01243       // TSQR parameters (set as a sublist).
01244       //
01245       params->set ("TSQR implementation", *(tsqrAdaptor_.getValidParameters()),
01246        "TSQR implementation parameters.");
01247       // 
01248       // Orthogonalization parameters
01249       //
01250       const bool defaultRandomizeNullSpace = true;
01251       params->set ("randomizeNullSpace", defaultRandomizeNullSpace, 
01252        "Whether to fill in null space vectors with random data.");
01253 
01254       const bool defaultReorthogonalizeBlocks = true;
01255       params->set ("reorthogonalizeBlocks", defaultReorthogonalizeBlocks,
01256        "Whether to do block reorthogonalization as necessary.");
01257 
01258       // This parameter corresponds to the "blk_tol_" parameter in
01259       // Belos' DGKSOrthoManager.  We choose the same default value.
01260       const magnitude_type defaultBlockReorthogThreshold = 
01261   magnitude_type(10) * SCTM::squareroot (SCTM::eps());
01262       params->set ("blockReorthogThreshold", defaultBlockReorthogThreshold, 
01263        "If reorthogonalizeBlocks==true, and if the norm of "
01264        "any column within a block decreases by this much or "
01265        "more after orthogonalization, we reorthogonalize.");
01266 
01267       // This parameter corresponds to the "sing_tol_" parameter in
01268       // Belos' DGKSOrthoManager.  We choose the same default value.
01269       const magnitude_type defaultRelativeRankTolerance = 
01270   Teuchos::as<magnitude_type>(10) * SCTM::eps();
01271 
01272       // If the relative rank tolerance is zero, then we will always
01273       // declare blocks to be numerically full rank, as long as no
01274       // singular values are zero.
01275       params->set ("relativeRankTolerance", defaultRelativeRankTolerance,
01276        "Relative tolerance to determine the numerical rank of a "
01277        "block when normalizing.");
01278 
01279       // See Stewart's 2008 paper on block Gram-Schmidt for a definition
01280       // of "orthogonalization fault."
01281       const bool defaultThrowOnReorthogFault = true;
01282       params->set ("throwOnReorthogFault", defaultThrowOnReorthogFault,
01283        "Whether to throw an exception if an orthogonalization "
01284        "fault occurs.  This only matters if reorthogonalization "
01285        "is enabled (reorthogonalizeBlocks==true).");
01286 
01287       const bool defaultForceNonnegativeDiagonal = false;
01288       params->set ("forceNonnegativeDiagonal", defaultForceNonnegativeDiagonal,
01289        "Whether to force the R factor produced by the normalization "
01290        "step to have a nonnegative diagonal.");
01291 
01292       defaultParams_ = params;
01293     }
01294     return defaultParams_;
01295   }
01296 
01297   template<class Scalar, class MV>
01298   Teuchos::RCP<const Teuchos::ParameterList>
01299   TsqrOrthoManagerImpl<Scalar, MV>::getFastParameters ()
01300   {
01301     using Teuchos::ParameterList;
01302     using Teuchos::RCP;
01303     using Teuchos::rcp;
01304 
01305     RCP<const ParameterList> defaultParams = getValidParameters();
01306     // Start with a clone of the default parameters.
01307     RCP<ParameterList> params = rcp (new ParameterList (*defaultParams));
01308   
01309     // Disable reorthogonalization and randomization of the null
01310     // space basis.  Reorthogonalization tolerances don't matter,
01311     // since we aren't reorthogonalizing blocks in the fast
01312     // settings.  We can leave the default values.  Also,
01313     // (re)orthogonalization faults may only occur with
01314     // reorthogonalization, so we don't have to worry about the
01315     // "throwOnReorthogFault" setting.
01316     const bool randomizeNullSpace = false;
01317     params->set ("randomizeNullSpace", randomizeNullSpace);      
01318     const bool reorthogonalizeBlocks = false;
01319     params->set ("reorthogonalizeBlocks", reorthogonalizeBlocks);
01320 
01321     return params;
01322   }
01323 
01324   template<class Scalar, class MV>
01325   int
01326   TsqrOrthoManagerImpl<Scalar, MV>::
01327   rawNormalize (MV& X, 
01328     MV& Q, 
01329     Teuchos::SerialDenseMatrix<int, Scalar>& B)
01330   {
01331     int rank;
01332     try {
01333       // This call only computes the QR factorization X = Q B.
01334       // It doesn't compute the rank of X.  That comes from
01335       // revealRank() below.
01336       tsqrAdaptor_.factorExplicit (X, Q, B, forceNonnegativeDiagonal_);
01337       // This call will only modify *B if *B on input is not of full
01338       // numerical rank.
01339       rank = tsqrAdaptor_.revealRank (Q, B, relativeRankTolerance_);
01340     } catch (std::exception& e) {
01341       throw TsqrOrthoError (e.what()); // Toss the exception up the chain.
01342     }
01343     return rank;
01344   }
01345 
01346   template<class Scalar, class MV>
01347   int
01348   TsqrOrthoManagerImpl<Scalar, MV>::
01349   normalizeOne (MV& X, 
01350     Teuchos::RCP<Teuchos::SerialDenseMatrix<int, Scalar> > B) const
01351   {
01352     // Make space for the normalization coefficient.  This will either
01353     // be a freshly allocated matrix (if B is null), or a view of the
01354     // 1x1 upper left submatrix of *B (if B is not null).
01355     mat_ptr B_out;
01356     if (B.is_null()) {
01357       B_out = rcp (new mat_type (1, 1));
01358     } else {
01359       const int numRows = B->numRows();
01360       const int numCols = B->numCols();
01361       TEUCHOS_TEST_FOR_EXCEPTION(numRows < 1 || numCols < 1, 
01362        std::invalid_argument,
01363        "normalizeOne: Input matrix B must be at "
01364        "least 1 x 1, but is instead " << numRows
01365        << " x " << numCols << ".");
01366       // Create a view of the 1x1 upper left submatrix of *B.
01367       B_out = rcp (new mat_type (Teuchos::View, *B, 1, 1));
01368     }
01369 
01370     // Compute the norm of X, and write the result to B_out.
01371     std::vector<magnitude_type> theNorm (1, SCTM::zero());
01372     MVT::MvNorm (X, theNorm);
01373     (*B_out)(0,0) = theNorm[0];
01374 
01375     if (B.is_null()) {
01376       // The input matrix B is null, so assign B_out to it.  If B was
01377       // not null on input, then B_out is a view of *B, so we don't
01378       // have to do anything here.  Note that SerialDenseMatrix uses
01379       // raw pointers to store data and represent views, so we have to
01380       // be careful about scope.
01381       B = B_out;
01382     }
01383 
01384     // Scale X by its norm, if its norm is zero.  Otherwise, do the
01385     // right thing based on whether the user wants us to fill the null
01386     // space with random vectors.
01387     if (theNorm[0] == SCTM::zero()) {
01388       // Make a view of the first column of Q, fill it with random
01389       // data, and normalize it.  Throw away the resulting norm.
01390       if (randomizeNullSpace_) {
01391   MVT::MvRandom(X);
01392   MVT::MvNorm (X, theNorm);
01393   if (theNorm[0] == SCTM::zero()) {
01394     // It is possible that a random vector could have all zero
01395     // entries, but unlikely.  We could try again, but it's also
01396     // possible that multiple tries could result in zero
01397     // vectors.  We choose instead to give up.
01398     throw TsqrOrthoError("normalizeOne: a supposedly random "
01399              "vector has norm zero!");
01400   } else {
01401     // NOTE (mfh 09 Nov 2010) I'm assuming that dividing a
01402     // Scalar by a magnitude_type is defined and that it results
01403     // in a Scalar.
01404     const Scalar alpha = SCT::one() / theNorm[0];
01405     MVT::MvScale (X, alpha);
01406   }
01407       }
01408       return 0; // The rank of the matrix (actually just one vector) X.
01409     } else {
01410       // NOTE (mfh 09 Nov 2010) I'm assuming that dividing a Scalar by
01411       // a magnitude_type is defined and that it results in a Scalar.
01412       const Scalar alpha = SCT::one() / theNorm[0];
01413       MVT::MvScale (X, alpha);
01414       return 1; // The rank of the matrix (actually just one vector) X.
01415     }
01416   }
01417 
01418 
01419   template<class Scalar, class MV>
01420   void
01421   TsqrOrthoManagerImpl<Scalar, MV>::
01422   rawProject (MV& X, 
01423         Teuchos::ArrayView<Teuchos::RCP<const MV> > Q,
01424         Teuchos::ArrayView<Teuchos::RCP<Teuchos::SerialDenseMatrix<int, Scalar> > > C) const
01425   {
01426     // "Modified Gram-Schmidt" version of Block Gram-Schmidt.
01427     const int num_Q_blocks = Q.size();
01428     for (int i = 0; i < num_Q_blocks; ++i)
01429       {
01430   // TEUCHOS_TEST_FOR_EXCEPTION(C[i].is_null(), std::logic_error,
01431   //       "TsqrOrthoManagerImpl::rawProject(): C[" 
01432   //       << i << "] is null");
01433   // TEUCHOS_TEST_FOR_EXCEPTION(Q[i].is_null(), std::logic_error,
01434   //       "TsqrOrthoManagerImpl::rawProject(): Q[" 
01435   //       << i << "] is null");
01436   mat_type& Ci = *C[i];
01437   const MV& Qi = *Q[i];
01438   innerProd (Qi, X, Ci);
01439   MVT::MvTimesMatAddMv (-SCT::one(), Qi, Ci, SCT::one(), X);
01440       }
01441   }
01442 
01443 
01444   template<class Scalar, class MV>
01445   void
01446   TsqrOrthoManagerImpl<Scalar, MV>::
01447   rawProject (MV& X, 
01448         const Teuchos::RCP<const MV>& Q, 
01449         const Teuchos::RCP<Teuchos::SerialDenseMatrix<int, Scalar> >& C) const
01450   {
01451     // Block Gram-Schmidt
01452     innerProd (*Q, X, *C);
01453     MVT::MvTimesMatAddMv (-SCT::one(), *Q, *C, SCT::one(), X);
01454   }
01455 
01456   template<class Scalar, class MV>
01457   int
01458   TsqrOrthoManagerImpl<Scalar, MV>::
01459   normalizeImpl (MV& X, 
01460      MV& Q, 
01461      Teuchos::RCP<Teuchos::SerialDenseMatrix<int, Scalar> > B, 
01462      const bool outOfPlace)
01463   {
01464     using Teuchos::Range1D;
01465     using Teuchos::RCP;
01466     using Teuchos::rcp;
01467     using Teuchos::ScalarTraits;
01468     using Teuchos::tuple;
01469     using std::cerr;
01470     using std::endl;
01471     // Don't set this to true unless you want lots of debugging
01472     // messages written to stderr on every MPI process.
01473     const bool extraDebug = false;
01474 
01475     const int numCols = MVT::GetNumberVecs (X);
01476     if (numCols == 0) {
01477       return 0; // Fast exit for an empty input matrix.
01478     }
01479 
01480     // We allow Q to have more columns than X.  In that case, we only
01481     // touch the first numCols columns of Q.
01482     TEUCHOS_TEST_FOR_EXCEPTION(MVT::GetNumberVecs(Q) < numCols, 
01483            std::invalid_argument, 
01484            "TsqrOrthoManagerImpl::normalizeImpl(X,Q,B): Q has "
01485            << MVT::GetNumberVecs(Q) << " columns.  This is too "
01486            "few, since X has " << numCols << " columns.");
01487     // TSQR wants a Q with the same number of columns as X, so have it
01488     // work on a nonconstant view of Q with the same number of columns
01489     // as X.
01490     RCP<MV> Q_view = MVT::CloneViewNonConst (Q, Range1D(0, numCols-1));
01491 
01492     // Make space for the normalization coefficients.  This will
01493     // either be a freshly allocated matrix (if B is null), or a view
01494     // of the appropriately sized upper left submatrix of *B (if B is
01495     // not null).
01496     mat_ptr B_out;
01497     if (B.is_null()) {
01498       B_out = rcp (new mat_type (numCols, numCols));
01499     } else {
01500       // Make sure that B is no smaller than numCols x numCols.
01501       TEUCHOS_TEST_FOR_EXCEPTION(B->numRows() < numCols || B->numCols() < numCols,
01502        std::invalid_argument,
01503        "normalizeOne: Input matrix B must be at "
01504        "least " << numCols << " x " << numCols 
01505        << ", but is instead " << B->numRows()
01506        << " x " << B->numCols() << ".");
01507       // Create a view of the numCols x numCols upper left submatrix
01508       // of *B.  TSQR will write the normalization coefficients there.
01509       B_out = rcp (new mat_type (Teuchos::View, *B, numCols, numCols));
01510     }
01511 
01512     if (extraDebug) {
01513       std::vector<magnitude_type> norms (numCols);
01514       MVT::MvNorm (X, norms);
01515       cerr << "Column norms of X before orthogonalization: ";
01516       typedef typename std::vector<magnitude_type>::const_iterator iter_type;
01517       for (iter_type iter = norms.begin(); iter != norms.end(); ++iter) {
01518   cerr << *iter;
01519   if (iter+1 != norms.end())
01520     cerr << ", ";
01521       }
01522     }
01523 
01524     // Compute rank-revealing decomposition (in this case, TSQR of X
01525     // followed by SVD of the R factor and appropriate updating of the
01526     // resulting Q factor) of X.  X is modified in place and filled
01527     // with garbage, and Q_view contains the resulting explicit Q
01528     // factor.  Later, we will copy this back into X.
01529     //
01530     // The matrix *B_out will only be upper triangular if X is of full
01531     // numerical rank.  Otherwise, the entries below the diagonal may
01532     // be filled in as well.
01533     const int rank = rawNormalize (X, *Q_view, *B_out);
01534     if (B.is_null()) {
01535       // The input matrix B is null, so assign B_out to it.  If B was
01536       // not null on input, then B_out is a view of *B, so we don't
01537       // have to do anything here.  Note that SerialDenseMatrix uses
01538       // raw pointers to store data and represent views, so we have to
01539       // be careful about scope.
01540       B = B_out;
01541     }
01542 
01543     if (extraDebug) {
01544       std::vector<magnitude_type> norms (numCols);
01545       MVT::MvNorm (*Q_view, norms);
01546       cerr << "Column norms of Q_view after orthogonalization: ";
01547       for (typename std::vector<magnitude_type>::const_iterator iter = norms.begin(); 
01548      iter != norms.end(); ++iter) {
01549   cerr << *iter;
01550   if (iter+1 != norms.end())
01551     cerr << ", ";
01552       }
01553     }
01554     TEUCHOS_TEST_FOR_EXCEPTION(rank < 0 || rank > numCols, std::logic_error,
01555            "Belos::TsqrOrthoManagerImpl::normalizeImpl: "
01556            "rawNormalize() returned rank = " << rank << " for a "
01557            "matrix X with " << numCols << " columns.  Please report"
01558            " this bug to the Belos developers.");
01559     if (extraDebug && rank == 0) {
01560       // Sanity check: ensure that the columns of X are sufficiently
01561       // small for X to be reported as rank zero.
01562       const mat_type& B_ref = *B;
01563       std::vector<magnitude_type> norms (B_ref.numCols());
01564       for (typename mat_type::ordinalType j = 0; j < B_ref.numCols(); ++j) {
01565   typedef typename mat_type::scalarType mat_scalar_type;
01566   mat_scalar_type sumOfSquares = ScalarTraits<mat_scalar_type>::zero();
01567   for (typename mat_type::ordinalType i = 0; i <= j; ++i) {
01568     const mat_scalar_type B_ij = 
01569       ScalarTraits<mat_scalar_type>::magnitude (B_ref(i,j));
01570     sumOfSquares += B_ij*B_ij;
01571   }
01572   norms[j] = ScalarTraits<mat_scalar_type>::squareroot (sumOfSquares);
01573       }
01574       bool anyNonzero = false;
01575       typedef typename std::vector<magnitude_type>::const_iterator iter_type;
01576       for (iter_type it = norms.begin(); it != norms.end(); ++it) {
01577   if (*it > relativeRankTolerance_) {
01578     anyNonzero = true;
01579   }
01580       }
01581       using std::cerr;
01582       using std::endl;
01583       cerr << "Norms of columns of B after orthogonalization: ";
01584       for (typename mat_type::ordinalType j = 0; j < B_ref.numCols(); ++j) {
01585   cerr << norms[j];
01586   if (j != B_ref.numCols() - 1)
01587     cerr << ", ";
01588       }
01589       cerr << endl;
01590     }
01591 
01592     // If X is full rank or we don't want to replace its null space
01593     // basis with random vectors, then we're done.
01594     if (rank == numCols || ! randomizeNullSpace_) {
01595       // If we're supposed to be working in place in X, copy the
01596       // results back from Q_view into X.
01597       if (! outOfPlace) {
01598   MVT::Assign (*Q_view, X);
01599       }
01600       return rank;
01601     }
01602 
01603     if (randomizeNullSpace_ && rank < numCols) {
01604       // X wasn't full rank.  Replace the null space basis of X (in
01605       // the last numCols-rank columns of Q_view) with random data,
01606       // project it against the first rank columns of Q_view, and
01607       // normalize.
01608       //
01609       // Number of columns to fill with random data.
01610       const int nullSpaceNumCols = numCols - rank;
01611       // Inclusive range of indices of columns of X to fill with
01612       // random data.
01613       Range1D nullSpaceIndices (rank, numCols-1);
01614 
01615       // rawNormalize() wrote the null space basis vectors into
01616       // Q_view.  We continue to work in place in Q_view by writing
01617       // the random data there and (if there is a nontrival column
01618       // space) projecting in place against the column space basis
01619       // vectors (also in Q_view).
01620       RCP<MV> Q_null = MVT::CloneViewNonConst (*Q_view, nullSpaceIndices);
01621       // Replace the null space basis with random data.
01622       MVT::MvRandom (*Q_null); 
01623 
01624       // Make sure that the "random" data isn't all zeros.  This is
01625       // statistically nearly impossible, but we test for debugging
01626       // purposes.
01627       {
01628   std::vector<magnitude_type> norms (MVT::GetNumberVecs(*Q_null));
01629   MVT::MvNorm (*Q_null, norms);
01630 
01631   bool anyZero = false;
01632   typedef typename std::vector<magnitude_type>::const_iterator iter_type;
01633   for (iter_type it = norms.begin(); it != norms.end(); ++it) {
01634     if (*it == SCTM::zero()) {
01635       anyZero = true;
01636     }
01637   }
01638   if (anyZero) {
01639     std::ostringstream os;
01640     os << "TsqrOrthoManagerImpl::normalizeImpl: "
01641       "We are being asked to randomize the null space, for a matrix "
01642       "with " << numCols << " columns and reported column rank "
01643        << rank << ".  The inclusive range of columns to fill with "
01644       "random data is [" << nullSpaceIndices.lbound() << "," 
01645        << nullSpaceIndices.ubound() << "].  After filling the null "
01646       "space vectors with random numbers, at least one of the vectors"
01647       " has norm zero.  Here are the norms of all the null space "
01648       "vectors: [";
01649     for (iter_type it = norms.begin(); it != norms.end(); ++it) {
01650       os << *it;
01651       if (it+1 != norms.end())
01652         os << ", ";
01653     }
01654     os << "].)  There is a tiny probability that this could happen "
01655       "randomly, but it is likely a bug.  Please report it to the "
01656       "Belos developers, especially if you are able to reproduce the "
01657       "behavior.";
01658     TEUCHOS_TEST_FOR_EXCEPTION(anyZero, TsqrOrthoError, os.str());
01659   }
01660       }
01661 
01662       if (rank > 0) {
01663   // Project the random data against the column space basis of
01664   // X, using a simple block projection ("Block Classical
01665   // Gram-Schmidt").  This is accurate because we've already
01666   // orthogonalized the column space basis of X nearly to
01667   // machine precision via a QR factorization (TSQR) with
01668   // accuracy comparable to Householder QR.
01669   RCP<const MV> Q_col = MVT::CloneView (*Q_view, Range1D(0, rank-1));
01670 
01671   // Temporary storage for projection coefficients.  We don't
01672   // need to keep them, since they represent the null space
01673   // basis (for which the coefficients are logically zero).
01674   mat_ptr C_null (new mat_type (rank, nullSpaceNumCols));
01675   rawProject (*Q_null, Q_col, C_null);
01676       }
01677       // Normalize the projected random vectors, so that they are
01678       // mutually orthogonal (as well as orthogonal to the column
01679       // space basis of X).  We use X for the output of the
01680       // normalization: for out-of-place normalization (outOfPlace ==
01681       // true), X is overwritten with "invalid values" anyway, and for
01682       // in-place normalization (outOfPlace == false), we want the
01683       // result to be in X anyway.
01684       RCP<MV> X_null = MVT::CloneViewNonConst (X, nullSpaceIndices);
01685       // Normalization coefficients for projected random vectors.
01686       // Will be thrown away.
01687       mat_type B_null (nullSpaceNumCols, nullSpaceNumCols);
01688       // Write the normalized vectors to X_null (in X).
01689       const int nullSpaceBasisRank = rawNormalize (*Q_null, *X_null, B_null);
01690     
01691       // It's possible, but unlikely, that X_null doesn't have full
01692       // rank (after the projection step).  We could recursively fill
01693       // in more random vectors until we finally get a full rank
01694       // matrix, but instead we just throw an exception.
01695       //
01696       // NOTE (mfh 08 Nov 2010) Perhaps we should deal with this case
01697       // more elegantly.  Recursion might be one way to solve it, but
01698       // be sure to check that the recursion will terminate.  We could
01699       // do this by supplying an additional argument to rawNormalize,
01700       // which is the null space basis rank from the previous
01701       // iteration.  The rank has to decrease each time, or the
01702       // recursion may go on forever.
01703       if (nullSpaceBasisRank < nullSpaceNumCols) {
01704   std::vector<magnitude_type> norms (MVT::GetNumberVecs(*X_null));
01705   MVT::MvNorm (*X_null, norms);
01706   std::ostringstream os;
01707   os << "TsqrOrthoManagerImpl::normalizeImpl: "
01708      << "We are being asked to randomize the null space, "
01709      << "for a matrix with " << numCols << " columns and "
01710      << "column rank " << rank << ".  After projecting and "
01711      << "normalizing the generated random vectors, they "
01712      << "only have rank " << nullSpaceBasisRank << ".  They"
01713      << " should have full rank " << nullSpaceNumCols 
01714      << ".  (The inclusive range of columns to fill with "
01715      << "random data is [" << nullSpaceIndices.lbound() 
01716      << "," << nullSpaceIndices.ubound() << "].  The "
01717      << "column norms of the resulting Q factor are: [";
01718   for (typename std::vector<magnitude_type>::size_type k = 0; 
01719        k < norms.size(); ++k) {
01720     os << norms[k];
01721     if (k != norms.size()-1) {
01722       os << ", ";
01723     }
01724   }
01725   os << "].)  There is a tiny probability that this could "
01726      << "happen randomly, but it is likely a bug.  Please "
01727      << "report it to the Belos developers, especially if "
01728      << "you are able to reproduce the behavior.";
01729 
01730   TEUCHOS_TEST_FOR_EXCEPTION(nullSpaceBasisRank < nullSpaceNumCols, 
01731          TsqrOrthoError, os.str());
01732       }
01733       // If we're normalizing out of place, copy the X_null
01734       // vectors back into Q_null; the Q_col vectors are already
01735       // where they are supposed to be in that case.
01736       //
01737       // If we're normalizing in place, leave X_null alone (it's
01738       // already where it needs to be, in X), but copy Q_col back
01739       // into the first rank columns of X.
01740       if (outOfPlace) {
01741   MVT::Assign (*X_null, *Q_null);
01742       } else if (rank > 0) {
01743   // MVT::Assign() doesn't accept empty ranges of columns.
01744   RCP<const MV> Q_col = MVT::CloneView (*Q_view, Range1D(0, rank-1));
01745   RCP<MV> X_col = MVT::CloneViewNonConst (X, Range1D(0, rank-1));
01746   MVT::Assign (*Q_col, *X_col);
01747       }
01748     }
01749     return rank;
01750   }
01751 
01752 
01753   template<class Scalar, class MV>
01754   void
01755   TsqrOrthoManagerImpl<Scalar, MV>::
01756   checkProjectionDims (int& ncols_X, 
01757            int& num_Q_blocks,
01758            int& ncols_Q_total,
01759            const MV& X, 
01760            Teuchos::ArrayView<Teuchos::RCP<const MV> > Q) const
01761   {
01762     // First assign to temporary values, so the function won't
01763     // commit any externally visible side effects unless it will
01764     // return normally (without throwing an exception).  (I'm being
01765     // cautious; MVT::GetNumberVecs() probably shouldn't have any
01766     // externally visible side effects, unless it is logging to a
01767     // file or something.)
01768     int the_ncols_X, the_num_Q_blocks, the_ncols_Q_total;
01769     the_num_Q_blocks = Q.size();
01770     the_ncols_X = MVT::GetNumberVecs (X);
01771 
01772     // Compute the total number of columns of all the Q[i] blocks.
01773     the_ncols_Q_total = 0;
01774     // You should be angry if your compiler doesn't support type
01775     // inference ("auto").  That's why I need this awful typedef.
01776     using Teuchos::ArrayView;
01777     using Teuchos::RCP;
01778     typedef typename ArrayView<RCP<const MV> >::const_iterator iter_type;
01779     for (iter_type it = Q.begin(); it != Q.end(); ++it) {
01780       const MV& Qi = **it;
01781       the_ncols_Q_total += MVT::GetNumberVecs (Qi);
01782     }
01783 
01784     // Commit temporary values to the output arguments.
01785     ncols_X = the_ncols_X;
01786     num_Q_blocks = the_num_Q_blocks;
01787     ncols_Q_total = the_ncols_Q_total;
01788   }
01789 
01790 } // namespace Anasazi
01791 
01792 #endif // __AnasaziTsqrOrthoManagerImpl_hpp
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends