Belos Package Browser (Single Doxygen Collection) Development
BelosProjectedLeastSquaresSolver.hpp
Go to the documentation of this file.
00001 //@HEADER
00002 // ************************************************************************
00003 //
00004 //                 Belos: Block Linear Solvers Package
00005 //                  Copyright 2004 Sandia Corporation
00006 //
00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00008 // the U.S. Government retains certain rights in this software.
00009 //
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00038 //
00039 // ************************************************************************
00040 //@HEADER
00041 
00042 #ifndef __Belos_ProjectedLeastSquaresSolver_hpp
00043 #define __Belos_ProjectedLeastSquaresSolver_hpp
00044 
00045 #include "BelosConfigDefs.hpp"
00046 #include "BelosTypes.hpp"
00047 #include "Teuchos_Array.hpp"
00048 #include "Teuchos_BLAS.hpp"
00049 #include "Teuchos_LAPACK.hpp"
00050 #include "Teuchos_oblackholestream.hpp"
00051 #include "Teuchos_ScalarTraits.hpp"
00052 #include "Teuchos_SerialDenseMatrix.hpp"
00053 #include "Teuchos_StandardParameterEntryValidators.hpp"
00054 
00058 
00059 namespace Belos {
00060 
00069   namespace details {
00070 
00071     // Anonymous namespace restricts contents to file scope.
00072     namespace {
00075       template<class Scalar>
00076       void
00077       printMatrix (std::ostream& out,
00078                    const std::string& name,
00079                    const Teuchos::SerialDenseMatrix<int, Scalar>& A)
00080       {
00081         using std::endl;
00082 
00083         const int numRows = A.numRows();
00084         const int numCols = A.numCols();
00085 
00086         out << name << " = " << endl << '[';
00087         if (numCols == 1) {
00088           // Compact form for column vectors; valid Matlab.
00089           for (int i = 0; i < numRows; ++i) {
00090             out << A(i,0);
00091             if (i < numRows-1) {
00092               out << "; ";
00093             }
00094           }
00095         } else {
00096           for (int i = 0; i < numRows; ++i) {
00097             for (int j = 0; j < numCols; ++j) {
00098               out << A(i,j);
00099               if (j < numCols-1) {
00100                 out << ", ";
00101               } else if (i < numRows-1) {
00102                 out << ';' << endl;
00103               }
00104             }
00105           }
00106         }
00107         out << ']' << endl;
00108       }
00109 
00112       template<class Scalar>
00113       void
00114       print (std::ostream& out,
00115              const Teuchos::SerialDenseMatrix<int, Scalar>& A,
00116              const std::string& linePrefix)
00117       {
00118         using std::endl;
00119 
00120         const int numRows = A.numRows();
00121         const int numCols = A.numCols();
00122 
00123         out << linePrefix << '[';
00124         if (numCols == 1) {
00125           // Compact form for column vectors; valid Matlab.
00126           for (int i = 0; i < numRows; ++i) {
00127             out << A(i,0);
00128             if (i < numRows-1) {
00129               out << "; ";
00130             }
00131           }
00132         } else {
00133           for (int i = 0; i < numRows; ++i) {
00134             for (int j = 0; j < numCols; ++j) {
00135               if (numRows > 1) {
00136                 out << linePrefix << "  ";
00137               }
00138               out << A(i,j);
00139               if (j < numCols-1) {
00140                 out << ", ";
00141               } else if (i < numRows-1) {
00142                 out << ';' << endl;
00143               }
00144             }
00145           }
00146         }
00147         out << linePrefix << ']' << endl;
00148       }
00149     } // namespace (anonymous)
00150 
00158     template<class Scalar>
00159     class ProjectedLeastSquaresProblem {
00160     public:
00163       typedef Scalar scalar_type;
00166       typedef typename Teuchos::ScalarTraits<Scalar>::magnitudeType magnitude_type;
00167 
00175       Teuchos::SerialDenseMatrix<int,Scalar> H;
00176 
00188       Teuchos::SerialDenseMatrix<int,Scalar> R;
00189 
00200       Teuchos::SerialDenseMatrix<int,Scalar> y;
00201 
00210       Teuchos::SerialDenseMatrix<int,Scalar> z;
00211 
00221       Teuchos::Array<Scalar> theCosines;
00222 
00227       Teuchos::Array<Scalar> theSines;
00228 
00237       ProjectedLeastSquaresProblem (const int maxNumIterations) :
00238         H (maxNumIterations+1, maxNumIterations),
00239         R (maxNumIterations+1, maxNumIterations),
00240         y (maxNumIterations+1, 1),
00241         z (maxNumIterations+1, 1),
00242         theCosines (maxNumIterations+1),
00243         theSines (maxNumIterations+1)
00244       {}
00245 
00264       void
00265       reset (const typename Teuchos::ScalarTraits<Scalar>::magnitudeType beta)
00266       {
00267         typedef Teuchos::ScalarTraits<Scalar> STS;
00268 
00269         // Zero out the right-hand side of the least-squares problem.
00270         z.putScalar (STS::zero());
00271 
00272         // Promote the initial residual norm from a magnitude type to
00273         // a scalar type, so we can assign it to the first entry of z.
00274         const Scalar initialResidualNorm (beta);
00275         z(0,0) = initialResidualNorm;
00276       }
00277 
00291       void
00292       reallocateAndReset (const typename Teuchos::ScalarTraits<Scalar>::magnitudeType beta,
00293                           const int maxNumIterations)
00294       {
00295         typedef Teuchos::ScalarTraits<Scalar> STS;
00296         typedef Teuchos::ScalarTraits<magnitude_type> STM;
00297 
00298         TEUCHOS_TEST_FOR_EXCEPTION(beta < STM::zero(), std::invalid_argument,
00299                            "ProjectedLeastSquaresProblem::reset: initial "
00300                            "residual beta = " << beta << " < 0.");
00301         TEUCHOS_TEST_FOR_EXCEPTION(maxNumIterations <= 0, std::invalid_argument,
00302                            "ProjectedLeastSquaresProblem::reset: maximum number "
00303                            "of iterations " << maxNumIterations << " <= 0.");
00304 
00305         if (H.numRows() < maxNumIterations+1 || H.numCols() < maxNumIterations) {
00306           const int errcode = H.reshape (maxNumIterations+1, maxNumIterations);
00307           TEUCHOS_TEST_FOR_EXCEPTION(errcode != 0, std::runtime_error,
00308                              "Failed to reshape H into a " << (maxNumIterations+1)
00309                              << " x " << maxNumIterations << " matrix.");
00310         }
00311         (void) H.putScalar (STS::zero());
00312 
00313         if (R.numRows() < maxNumIterations+1 || R.numCols() < maxNumIterations) {
00314           const int errcode = R.reshape (maxNumIterations+1, maxNumIterations);
00315           TEUCHOS_TEST_FOR_EXCEPTION(errcode != 0, std::runtime_error,
00316                              "Failed to reshape R into a " << (maxNumIterations+1)
00317                              << " x " << maxNumIterations << " matrix.");
00318         }
00319         (void) R.putScalar (STS::zero());
00320 
00321         if (y.numRows() < maxNumIterations+1 || y.numCols() < 1) {
00322           const int errcode = y.reshape (maxNumIterations+1, 1);
00323           TEUCHOS_TEST_FOR_EXCEPTION(errcode != 0, std::runtime_error,
00324                              "Failed to reshape y into a " << (maxNumIterations+1)
00325                              << " x " << 1 << " matrix.");
00326         }
00327         (void) y.putScalar (STS::zero());
00328 
00329         if (z.numRows() < maxNumIterations+1 || z.numCols() < 1) {
00330           const int errcode = z.reshape (maxNumIterations+1, 1);
00331           TEUCHOS_TEST_FOR_EXCEPTION(errcode != 0, std::runtime_error,
00332                              "Failed to reshape z into a " << (maxNumIterations+1)
00333                              << " x " << 1 << " matrix.");
00334         }
00335         reset (beta);
00336       }
00337 
00338     };
00339 
00340 
00348     template<class Scalar>
00349     class LocalDenseMatrixOps {
00350     public:
00353       typedef Scalar scalar_type;
00356       typedef typename Teuchos::ScalarTraits<Scalar>::magnitudeType magnitude_type;
00359       typedef Teuchos::SerialDenseMatrix<int,Scalar> mat_type;
00360 
00361     private:
00362       typedef Teuchos::ScalarTraits<scalar_type> STS;
00363       typedef Teuchos::ScalarTraits<magnitude_type> STM;
00364       typedef Teuchos::BLAS<int, scalar_type> blas_type;
00365       typedef Teuchos::LAPACK<int, scalar_type> lapack_type;
00366 
00367     public:
00369       void
00370       conjugateTranspose (mat_type& A_star, const mat_type& A) const
00371       {
00372         for (int i = 0; i < A.numRows(); ++i) {
00373           for (int j = 0; j < A.numCols(); ++j) {
00374             A_star(j,i) = STS::conjugate (A(i,j));
00375           }
00376         }
00377       }
00378 
00380       void
00381       conjugateTransposeOfUpperTriangular (mat_type& L, const mat_type& R) const
00382       {
00383         const int N = R.numCols();
00384 
00385         for (int j = 0; j < N; ++j) {
00386           for (int i = 0; i <= j; ++i) {
00387             L(j,i) = STS::conjugate (R(i,j));
00388           }
00389         }
00390       }
00391 
00393       void
00394       zeroOutStrictLowerTriangle (mat_type& A) const
00395       {
00396         const int N = std::min (A.numRows(), A.numCols());
00397 
00398         for (int j = 0; j < N; ++j) {
00399           for (int i = j+1; i < A.numRows(); ++i) {
00400             A(i,j) = STS::zero();
00401           }
00402         }
00403       }
00404 
00413       void
00414       partition (Teuchos::RCP<mat_type>& A_11,
00415                  Teuchos::RCP<mat_type>& A_21,
00416                  Teuchos::RCP<mat_type>& A_12,
00417                  Teuchos::RCP<mat_type>& A_22,
00418                  mat_type& A,
00419                  const int numRows1,
00420                  const int numRows2,
00421                  const int numCols1,
00422                  const int numCols2)
00423       {
00424         using Teuchos::rcp;
00425         using Teuchos::View;
00426 
00427         A_11 = rcp (new mat_type (View, A, numRows1, numCols1, 0, 0));
00428         A_21 = rcp (new mat_type (View, A, numRows2, numCols1, numRows1, 0));
00429         A_12 = rcp (new mat_type (View, A, numRows1, numCols2, 0, numCols1));
00430         A_22 = rcp (new mat_type (View, A, numRows2, numCols2, numRows1, numCols1));
00431       }
00432 
00434       void
00435       matScale (mat_type& A, const scalar_type& alpha) const
00436       {
00437         // const int LDA = A.stride(); // unused
00438         const int numRows = A.numRows();
00439         const int numCols = A.numCols();
00440 
00441         if (numRows == 0 || numCols == 0) {
00442           return;
00443         } else {
00444           for (int j = 0; j < numCols; ++j) {
00445             scalar_type* const A_j = &A(0,j);
00446 
00447             for (int i = 0; i < numRows; ++i) {
00448               A_j[i] *= alpha;
00449             }
00450           }
00451         }
00452       }
00453 
00458       void
00459       axpy (mat_type& Y,
00460             const scalar_type& alpha,
00461             const mat_type& X) const
00462       {
00463         const int numRows = Y.numRows();
00464         const int numCols = Y.numCols();
00465 
00466         TEUCHOS_TEST_FOR_EXCEPTION(numRows != X.numRows() || numCols != X.numCols(),
00467                            std::invalid_argument, "Dimensions of X and Y don't "
00468                            "match.  X is " << X.numRows() << " x " << X.numCols()
00469                            << ", and Y is " << numRows << " x " << numCols << ".");
00470         for (int j = 0; j < numCols; ++j) {
00471           for (int i = 0; i < numRows; ++i) {
00472             Y(i,j) += alpha * X(i,j);
00473           }
00474         }
00475       }
00476 
00478       void
00479       matAdd (mat_type& A, const mat_type& B) const
00480       {
00481         const int numRows = A.numRows();
00482         const int numCols = A.numCols();
00483 
00484         TEUCHOS_TEST_FOR_EXCEPTION(
00485           B.numRows() != numRows || B.numCols() != numCols,
00486           std::invalid_argument,
00487           "matAdd: The input matrices A and B have incompatible dimensions.  "
00488           "A is " << numRows << " x " << numCols << ", but B is " <<
00489           B.numRows () << " x " << B.numCols () << ".");
00490         if (numRows == 0 || numCols == 0) {
00491           return;
00492         } else {
00493           for (int j = 0; j < numCols; ++j) {
00494             scalar_type* const A_j = &A(0,j);
00495             const scalar_type* const B_j = &B(0,j);
00496 
00497             for (int i = 0; i < numRows; ++i) {
00498               A_j[i] += B_j[i];
00499             }
00500           }
00501         }
00502       }
00503 
00505       void
00506       matSub (mat_type& A, const mat_type& B) const
00507       {
00508         const int numRows = A.numRows();
00509         const int numCols = A.numCols();
00510 
00511         TEUCHOS_TEST_FOR_EXCEPTION(
00512           B.numRows() != numRows || B.numCols() != numCols,
00513           std::invalid_argument,
00514           "matSub: The input matrices A and B have incompatible dimensions.  "
00515           "A is " << numRows << " x " << numCols << ", but B is " <<
00516           B.numRows () << " x " << B.numCols () << ".");
00517         if (numRows == 0 || numCols == 0) {
00518           return;
00519         } else {
00520           for (int j = 0; j < numCols; ++j) {
00521             scalar_type* const A_j = &A(0,j);
00522             const scalar_type* const B_j = &B(0,j);
00523 
00524             for (int i = 0; i < numRows; ++i) {
00525               A_j[i] -= B_j[i];
00526             }
00527           }
00528         }
00529       }
00530 
00535       void
00536       rightUpperTriSolve (mat_type& B,
00537                           const mat_type& R) const
00538       {
00539         TEUCHOS_TEST_FOR_EXCEPTION(B.numCols() != R.numRows(),
00540                            std::invalid_argument,
00541                            "rightUpperTriSolve: R and B have incompatible "
00542                            "dimensions.  B has " << B.numCols() << " columns, "
00543                            "but R has " << R.numRows() << " rows.");
00544         blas_type blas;
00545         blas.TRSM (Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI,
00546                    Teuchos::NO_TRANS, Teuchos::NON_UNIT_DIAG,
00547                    R.numCols(), B.numCols(),
00548                    STS::one(), R.values(), R.stride(),
00549                    B.values(), B.stride());
00550       }
00551 
00557       void
00558       matMatMult (const scalar_type& beta,
00559                   mat_type& C,
00560                   const scalar_type& alpha,
00561                   const mat_type& A,
00562                   const mat_type& B) const
00563       {
00564         using Teuchos::NO_TRANS;
00565 
00566         TEUCHOS_TEST_FOR_EXCEPTION(A.numCols() != B.numRows(),
00567                            std::invalid_argument,
00568                            "matMatMult: The input matrices A and B have "
00569                            "incompatible dimensions.  A is " << A.numRows()
00570                            << " x " << A.numCols() << ", but B is "
00571                            << B.numRows() << " x " << B.numCols() << ".");
00572         TEUCHOS_TEST_FOR_EXCEPTION(A.numRows() != C.numRows(),
00573                            std::invalid_argument,
00574                            "matMatMult: The input matrix A and the output "
00575                            "matrix C have incompatible dimensions.  A has "
00576                            << A.numRows() << " rows, but C has " << C.numRows()
00577                            << " rows.");
00578         TEUCHOS_TEST_FOR_EXCEPTION(B.numCols() != C.numCols(),
00579                            std::invalid_argument,
00580                            "matMatMult: The input matrix B and the output "
00581                            "matrix C have incompatible dimensions.  B has "
00582                            << B.numCols() << " columns, but C has "
00583                            << C.numCols() << " columns.");
00584         blas_type blas;
00585         blas.GEMM (NO_TRANS, NO_TRANS, C.numRows(), C.numCols(), A.numCols(),
00586                    alpha, A.values(), A.stride(), B.values(), B.stride(),
00587                    beta, C.values(), C.stride());
00588       }
00589 
00597       int
00598       infNaNCount (const mat_type& A, const bool upperTriangular=false) const
00599       {
00600         int count = 0;
00601         for (int j = 0; j < A.numCols(); ++j) {
00602           if (upperTriangular) {
00603             for (int i = 0; i <= j && i < A.numRows(); ++i) {
00604               if (STS::isnaninf (A(i,j))) {
00605                 ++count;
00606               }
00607             }
00608           } else {
00609             for (int i = 0; i < A.numRows(); ++i) {
00610               if (STS::isnaninf (A(i,j))) {
00611                 ++count;
00612               }
00613             }
00614           }
00615         }
00616         return count;
00617       }
00618 
00624       std::pair<bool, std::pair<magnitude_type, magnitude_type> >
00625       isUpperTriangular (const mat_type& A) const
00626       {
00627         magnitude_type lowerTri = STM::zero();
00628         magnitude_type upperTri = STM::zero();
00629         int count = 0;
00630 
00631         for (int j = 0; j < A.numCols(); ++j) {
00632           // Compute the Frobenius norm of the upper triangle /
00633           // trapezoid of A.  The second clause of the loop upper
00634           // bound is for matrices with fewer rows than columns.
00635           for (int i = 0; i <= j && i < A.numRows(); ++i) {
00636             const magnitude_type A_ij_mag = STS::magnitude (A(i,j));
00637             upperTri += A_ij_mag * A_ij_mag;
00638           }
00639           // Scan the strict lower triangle / trapezoid of A.
00640           for (int i = j+1; i < A.numRows(); ++i) {
00641             const magnitude_type A_ij_mag = STS::magnitude (A(i,j));
00642             lowerTri += A_ij_mag * A_ij_mag;
00643             if (A_ij_mag != STM::zero()) {
00644               ++count;
00645             }
00646           }
00647         }
00648         return std::make_pair (count == 0, std::make_pair (lowerTri, upperTri));
00649       }
00650 
00651 
00657       std::pair<bool, std::pair<magnitude_type, magnitude_type> >
00658       isUpperHessenberg (const mat_type& A) const
00659       {
00660         magnitude_type lower = STM::zero();
00661         magnitude_type upper = STM::zero();
00662         int count = 0;
00663 
00664         for (int j = 0; j < A.numCols(); ++j) {
00665           // Compute the Frobenius norm of the upper Hessenberg part
00666           // of A.  The second clause of the loop upper bound is for
00667           // matrices with fewer rows than columns.
00668           for (int i = 0; i <= j+1 && i < A.numRows(); ++i) {
00669             const magnitude_type A_ij_mag = STS::magnitude (A(i,j));
00670             upper += A_ij_mag * A_ij_mag;
00671           }
00672           // Scan the strict lower part of A.
00673           for (int i = j+2; i < A.numRows(); ++i) {
00674             const magnitude_type A_ij_mag = STS::magnitude (A(i,j));
00675             lower += A_ij_mag * A_ij_mag;
00676             if (A_ij_mag != STM::zero()) {
00677               ++count;
00678             }
00679           }
00680         }
00681         return std::make_pair (count == 0, std::make_pair (lower, upper));
00682       }
00683 
00690       void
00691       ensureUpperTriangular (const mat_type& A,
00692                              const char* const matrixName) const
00693       {
00694         std::pair<bool, std::pair<magnitude_type, magnitude_type> > result =
00695           isUpperTriangular (A);
00696 
00697         TEUCHOS_TEST_FOR_EXCEPTION(! result.first, std::invalid_argument,
00698                            "The " << A.numRows() << " x " << A.numCols()
00699                            << " matrix " << matrixName << " is not upper "
00700                            "triangular.  ||tril(A)||_F = "
00701                            << result.second.first << " and ||A||_F = "
00702                            << result.second.second << ".");
00703       }
00704 
00711       void
00712       ensureUpperHessenberg (const mat_type& A,
00713                              const char* const matrixName) const
00714       {
00715         std::pair<bool, std::pair<magnitude_type, magnitude_type> > result =
00716           isUpperHessenberg (A);
00717 
00718         TEUCHOS_TEST_FOR_EXCEPTION(! result.first, std::invalid_argument,
00719                            "The " << A.numRows() << " x " << A.numCols()
00720                            << " matrix " << matrixName << " is not upper "
00721                            "triangular.  ||tril(A(2:end, :))||_F = "
00722                            << result.second.first << " and ||A||_F = "
00723                            << result.second.second << ".");
00724       }
00725 
00741       void
00742       ensureUpperHessenberg (const mat_type& A,
00743                              const char* const matrixName,
00744                              const magnitude_type relativeTolerance) const
00745       {
00746         std::pair<bool, std::pair<magnitude_type, magnitude_type> > result =
00747           isUpperHessenberg (A);
00748 
00749         if (result.first) {
00750           // Mollified relative departure from upper Hessenberg.
00751           const magnitude_type err = (result.second.second == STM::zero() ?
00752                                       result.second.first :
00753                                       result.second.first / result.second.second);
00754           TEUCHOS_TEST_FOR_EXCEPTION(err > relativeTolerance, std::invalid_argument,
00755                              "The " << A.numRows() << " x " << A.numCols()
00756                              << " matrix " << matrixName << " is not upper "
00757                              "triangular.  ||tril(A(2:end, :))||_F "
00758                              << (result.second.second == STM::zero() ? "" : " / ||A||_F")
00759                              << " = " << err << " > " << relativeTolerance << ".");
00760         }
00761       }
00762 
00774       void
00775       ensureMinimumDimensions (const mat_type& A,
00776                                const char* const matrixName,
00777                                const int minNumRows,
00778                                const int minNumCols) const
00779       {
00780         TEUCHOS_TEST_FOR_EXCEPTION(A.numRows() < minNumRows || A.numCols() < minNumCols,
00781                            std::invalid_argument,
00782                            "The matrix " << matrixName << " is " << A.numRows()
00783                            << " x " << A.numCols() << ", and therefore does not "
00784                            "satisfy the minimum dimensions " << minNumRows
00785                            << " x " << minNumCols << ".");
00786       }
00787 
00799       void
00800       ensureEqualDimensions (const mat_type& A,
00801                              const char* const matrixName,
00802                              const int numRows,
00803                              const int numCols) const
00804       {
00805         TEUCHOS_TEST_FOR_EXCEPTION(A.numRows() != numRows || A.numCols() != numCols,
00806                            std::invalid_argument,
00807                            "The matrix " << matrixName << " is supposed to be "
00808                            << numRows << " x " << numCols << ", but is "
00809                            << A.numRows() << " x " << A.numCols() << " instead.");
00810       }
00811 
00812     };
00813 
00832     enum ERobustness {
00833       ROBUSTNESS_NONE,
00834       ROBUSTNESS_SOME,
00835       ROBUSTNESS_LOTS,
00836       ROBUSTNESS_INVALID
00837     };
00838 
00840     inline std::string
00841     robustnessEnumToString (const ERobustness x)
00842     {
00843       const char* strings[] = {"None", "Some", "Lots"};
00844       TEUCHOS_TEST_FOR_EXCEPTION(x < ROBUSTNESS_NONE || x >= ROBUSTNESS_INVALID,
00845                          std::invalid_argument,
00846                          "Invalid enum value " << x << ".");
00847       return std::string (strings[x]);
00848     }
00849 
00851     ERobustness
00852     inline robustnessStringToEnum (const std::string& x)
00853     {
00854       const char* strings[] = {"None", "Some", "Lots"};
00855       for (int r = 0; r < static_cast<int> (ROBUSTNESS_INVALID); ++r) {
00856         if (x == strings[r]) {
00857           return static_cast<ERobustness> (r);
00858         }
00859       }
00860       TEUCHOS_TEST_FOR_EXCEPTION(true, std::invalid_argument,
00861                          "Invalid robustness string " << x << ".");
00862     }
00863 
00874     inline Teuchos::RCP<Teuchos::ParameterEntryValidator>
00875     robustnessValidator ()
00876     {
00877       using Teuchos::stringToIntegralParameterEntryValidator;
00878 
00879       Teuchos::Array<std::string> strs (3);
00880       strs[0] = robustnessEnumToString (ROBUSTNESS_NONE);
00881       strs[1] = robustnessEnumToString (ROBUSTNESS_SOME);
00882       strs[2] = robustnessEnumToString (ROBUSTNESS_LOTS);
00883       Teuchos::Array<std::string> docs (3);
00884       docs[0] = "Use the BLAS' triangular solve.  This may result in Inf or "
00885         "NaN output if the triangular matrix is rank deficient.";
00886       docs[1] = "Robustness somewhere between \"None\" and \"Lots\".";
00887       docs[2] = "Solve the triangular system in a least-squares sense, using "
00888         "an SVD-based algorithm.  This will always succeed, though the "
00889         "solution may not make sense for GMRES.";
00890       Teuchos::Array<ERobustness> ints (3);
00891       ints[0] = ROBUSTNESS_NONE;
00892       ints[1] = ROBUSTNESS_SOME;
00893       ints[2] = ROBUSTNESS_LOTS;
00894       const std::string pname ("Robustness of Projected Least-Squares Solve");
00895 
00896       return stringToIntegralParameterEntryValidator<ERobustness> (strs, docs,
00897                                                                    ints, pname);
00898     }
00899 
00962     template<class Scalar>
00963     class ProjectedLeastSquaresSolver {
00964     public:
00971       typedef Scalar scalar_type;
00976       typedef typename Teuchos::ScalarTraits<Scalar>::magnitudeType magnitude_type;
00979       typedef Teuchos::SerialDenseMatrix<int,Scalar> mat_type;
00980 
00981     private:
00982       typedef Teuchos::ScalarTraits<scalar_type> STS;
00983       typedef Teuchos::ScalarTraits<magnitude_type> STM;
00984       typedef Teuchos::BLAS<int, scalar_type> blas_type;
00985       typedef Teuchos::LAPACK<int, scalar_type> lapack_type;
00986 
00987     public:
01001       ProjectedLeastSquaresSolver (std::ostream& warnStream,
01002                                    const ERobustness defaultRobustness=ROBUSTNESS_NONE) :
01003         warn_ (warnStream),
01004         defaultRobustness_ (defaultRobustness)
01005       {}
01006 
01020       magnitude_type
01021       updateColumn (ProjectedLeastSquaresProblem<Scalar>& problem,
01022                     const int curCol)
01023       {
01024         return updateColumnGivens (problem.H, problem.R, problem.y, problem.z,
01025                                    problem.theCosines, problem.theSines, curCol);
01026       }
01027 
01042       magnitude_type
01043       updateColumns (ProjectedLeastSquaresProblem<Scalar>& problem,
01044                      const int startCol,
01045                      const int endCol)
01046       {
01047         return updateColumnsGivens (problem.H, problem.R, problem.y, problem.z,
01048                                     problem.theCosines, problem.theSines,
01049                                     startCol, endCol);
01050       }
01051 
01065       void
01066       solve (ProjectedLeastSquaresProblem<Scalar>& problem,
01067              const int curCol)
01068       {
01069         solveGivens (problem.y, problem.R, problem.z, curCol);
01070       }
01071 
01076       std::pair<int, bool>
01077       solveUpperTriangularSystem (Teuchos::ESide side,
01078                                   mat_type& X,
01079                                   const mat_type& R,
01080                                   const mat_type& B)
01081       {
01082         return solveUpperTriangularSystem (side, X, R, B, defaultRobustness_);
01083       }
01084 
01123       std::pair<int, bool>
01124       solveUpperTriangularSystem (Teuchos::ESide side,
01125                                   mat_type& X,
01126                                   const mat_type& R,
01127                                   const mat_type& B,
01128                                   const ERobustness robustness)
01129       {
01130         TEUCHOS_TEST_FOR_EXCEPTION(X.numRows() != B.numRows(), std::invalid_argument,
01131                            "The output X and right-hand side B have different "
01132                            "numbers of rows.  X has " << X.numRows() << " rows"
01133                            ", and B has " << B.numRows() << " rows.");
01134         // If B has more columns than X, we ignore the remaining
01135         // columns of B when solving the upper triangular system.  If
01136         // B has _fewer_ columns than X, we can't solve for all the
01137         // columns of X, so we throw an exception.
01138         TEUCHOS_TEST_FOR_EXCEPTION(X.numCols() > B.numCols(), std::invalid_argument,
01139                            "The output X has more columns than the "
01140                            "right-hand side B.  X has " << X.numCols()
01141                            << " columns and B has " << B.numCols()
01142                            << " columns.");
01143         // See above explaining the number of columns in B_view.
01144         mat_type B_view (Teuchos::View, B, B.numRows(), X.numCols());
01145 
01146         // Both the BLAS' _TRSM and LAPACK's _LATRS overwrite the
01147         // right-hand side with the solution, so first copy B_view
01148         // into X.
01149         X.assign (B_view);
01150 
01151         // Solve the upper triangular system.
01152         return solveUpperTriangularSystemInPlace (side, X, R, robustness);
01153       }
01154 
01159       std::pair<int, bool>
01160       solveUpperTriangularSystemInPlace (Teuchos::ESide side,
01161                                          mat_type& X,
01162                                          const mat_type& R)
01163       {
01164         return solveUpperTriangularSystemInPlace (side, X, R, defaultRobustness_);
01165       }
01166 
01174       std::pair<int, bool>
01175       solveUpperTriangularSystemInPlace (Teuchos::ESide side,
01176                                          mat_type& X,
01177                                          const mat_type& R,
01178                                          const ERobustness robustness)
01179       {
01180         using Teuchos::Array;
01181         using Teuchos::Copy;
01182         using Teuchos::LEFT_SIDE;
01183         using Teuchos::RIGHT_SIDE;
01184         LocalDenseMatrixOps<Scalar> ops;
01185 
01186         const int M = R.numRows();
01187         const int N = R.numCols();
01188         TEUCHOS_TEST_FOR_EXCEPTION(M < N, std::invalid_argument,
01189                            "The input matrix R has fewer columns than rows.  "
01190                            "R is " << M << " x " << N << ".");
01191         // Ignore any additional rows of R by working with a square view.
01192         mat_type R_view (Teuchos::View, R, N, N);
01193 
01194         if (side == LEFT_SIDE) {
01195           TEUCHOS_TEST_FOR_EXCEPTION(X.numRows() < N, std::invalid_argument,
01196                              "The input/output matrix X has only "
01197                              << X.numRows() << " rows, but needs at least "
01198                              << N << " rows to match the matrix for a "
01199                              "left-side solve R \\ X.");
01200         } else if (side == RIGHT_SIDE) {
01201           TEUCHOS_TEST_FOR_EXCEPTION(X.numCols() < N, std::invalid_argument,
01202                              "The input/output matrix X has only "
01203                              << X.numCols() << " columns, but needs at least "
01204                              << N << " columns to match the matrix for a "
01205                              "right-side solve X / R.");
01206         }
01207         TEUCHOS_TEST_FOR_EXCEPTION(robustness < ROBUSTNESS_NONE ||
01208                            robustness >= ROBUSTNESS_INVALID,
01209                            std::invalid_argument,
01210                            "Invalid robustness value " << robustness << ".");
01211 
01212         // In robust mode, scan the matrix and right-hand side(s) for
01213         // Infs and NaNs.  Only look at the upper triangle of the
01214         // matrix.
01215         if (robustness > ROBUSTNESS_NONE) {
01216           int count = ops.infNaNCount (R_view, true);
01217           TEUCHOS_TEST_FOR_EXCEPTION(count > 0, std::runtime_error,
01218                              "There " << (count != 1 ? "are" : "is")
01219                              << " " << count << " Inf or NaN entr"
01220                              << (count != 1 ? "ies" : "y")
01221                              << " in the upper triangle of R.");
01222           count = ops.infNaNCount (X, false);
01223           TEUCHOS_TEST_FOR_EXCEPTION(count > 0, std::runtime_error,
01224                              "There " << (count != 1 ? "are" : "is")
01225                              << " " << count << " Inf or NaN entr"
01226                              << (count != 1 ? "ies" : "y") << " in the "
01227                              "right-hand side(s) X.");
01228         }
01229 
01230         // Pair of values to return from this method.
01231         int rank = N;
01232         bool foundRankDeficiency = false;
01233 
01234         // Solve for X.
01235         blas_type blas;
01236 
01237         if (robustness == ROBUSTNESS_NONE) {
01238           // Fast triangular solve using the BLAS' _TRSM.  This does
01239           // no checking for rank deficiency.
01240           blas.TRSM(side, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
01241                     Teuchos::NON_UNIT_DIAG, X.numRows(), X.numCols(),
01242                     STS::one(), R.values(), R.stride(),
01243                     X.values(), X.stride());
01244         } else if (robustness < ROBUSTNESS_INVALID) {
01245           // Save a copy of X, since X contains the right-hand side on
01246           // input.
01247           mat_type B (Copy, X, X.numRows(), X.numCols());
01248 
01249           // Fast triangular solve using the BLAS' _TRSM.  This does
01250           // no checking for rank deficiency.
01251           blas.TRSM(side, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
01252                     Teuchos::NON_UNIT_DIAG, X.numRows(), X.numCols(),
01253                     STS::one(), R.values(), R.stride(),
01254                     X.values(), X.stride());
01255 
01256           // Check for Infs or NaNs in X.  If there are any, then
01257           // assume that TRSM failed, and use a more robust algorithm.
01258           if (ops.infNaNCount (X, false) != 0) {
01259 
01260             warn_ << "Upper triangular solve: Found Infs and/or NaNs in the "
01261               "solution after using the fast algorithm.  Retrying using a more "
01262               "robust algorithm." << std::endl;
01263 
01264             // Restore X from the copy.
01265             X.assign (B);
01266 
01267             // Find the minimum-norm solution to the least-squares
01268             // problem $\min_x \|RX - B\|_2$, using the singular value
01269             // decomposition (SVD).
01270             LocalDenseMatrixOps<Scalar> ops;
01271             if (side == LEFT_SIDE) {
01272               // _GELSS overwrites its matrix input, so make a copy.
01273               mat_type R_copy (Teuchos::Copy, R_view, N, N);
01274 
01275               // Zero out the lower triangle of R_copy, since the
01276               // mat_type constructor copies all the entries, not just
01277               // the upper triangle.  _GELSS will read all the entries
01278               // of the input matrix.
01279               ops.zeroOutStrictLowerTriangle (R_copy);
01280 
01281               // Solve the least-squares problem.
01282               rank = solveLeastSquaresUsingSVD (R_copy, X);
01283             } else {
01284               // If solving with R on the right-hand side, the interface
01285               // requires that instead of solving $\min \|XR - B\|_2$,
01286               // we have to solve $\min \|R^* X^* - B^*\|_2$.  We
01287               // compute (conjugate) transposes in newly allocated
01288               // temporary matrices X_star resp. R_star.  (B is already
01289               // in X and _GELSS overwrites its input vector X with the
01290               // solution.)
01291               mat_type X_star (X.numCols(), X.numRows());
01292               ops.conjugateTranspose (X_star, X);
01293               mat_type R_star (N, N); // Filled with zeros automatically.
01294               ops.conjugateTransposeOfUpperTriangular (R_star, R);
01295 
01296               // Solve the least-squares problem.
01297               rank = solveLeastSquaresUsingSVD (R_star, X_star);
01298 
01299               // Copy the transpose of X_star back into X.
01300               ops.conjugateTranspose (X, X_star);
01301             }
01302             if (rank < N) {
01303               foundRankDeficiency = true;
01304             }
01305           }
01306         } else {
01307           TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
01308                              "Should never get here!  Invalid robustness value "
01309                              << robustness << ".  Please report this bug to the "
01310                              "Belos developers.");
01311         }
01312         return std::make_pair (rank, foundRankDeficiency);
01313       }
01314 
01315 
01316     public:
01325       bool
01326       testGivensRotations (std::ostream& out)
01327       {
01328         using std::endl;
01329 
01330         out << "Testing Givens rotations:" << endl;
01331         Scalar x = STS::random();
01332         Scalar y = STS::random();
01333         out << "  x = " << x << ", y = " << y << endl;
01334 
01335         Scalar theCosine, theSine, result;
01336         blas_type blas;
01337         computeGivensRotation (x, y, theCosine, theSine, result);
01338         out << "-- After computing rotation:" << endl;
01339         out << "---- cos,sin = " << theCosine << "," << theSine << endl;
01340         out << "---- x = " << x << ", y = " << y
01341             << ", result = " << result << endl;
01342 
01343         blas.ROT (1, &x, 1, &y, 1, &theCosine, &theSine);
01344         out << "-- After applying rotation:" << endl;
01345         out << "---- cos,sin = " << theCosine << "," << theSine << endl;
01346         out << "---- x = " << x << ", y = " << y << endl;
01347 
01348         // Allow only a tiny bit of wiggle room for zeroing-out of y.
01349         if (STS::magnitude(y) > 2*STS::eps())
01350           return false;
01351         else
01352           return true;
01353       }
01354 
01384       bool
01385       testUpdateColumn (std::ostream& out,
01386                         const int numCols,
01387                         const bool testBlockGivens=false,
01388                         const bool extraVerbose=false)
01389       {
01390         using Teuchos::Array;
01391         using std::endl;
01392 
01393         TEUCHOS_TEST_FOR_EXCEPTION(numCols <= 0, std::invalid_argument,
01394                            "numCols = " << numCols << " <= 0.");
01395         const int numRows = numCols + 1;
01396 
01397         mat_type H (numRows, numCols);
01398         mat_type z (numRows, 1);
01399 
01400         mat_type R_givens (numRows, numCols);
01401         mat_type y_givens (numRows, 1);
01402         mat_type z_givens (numRows, 1);
01403         Array<Scalar> theCosines (numCols);
01404         Array<Scalar> theSines (numCols);
01405 
01406         mat_type R_blockGivens (numRows, numCols);
01407         mat_type y_blockGivens (numRows, 1);
01408         mat_type z_blockGivens (numRows, 1);
01409         Array<Scalar> blockCosines (numCols);
01410         Array<Scalar> blockSines (numCols);
01411         const int panelWidth = std::min (3, numCols);
01412 
01413         mat_type R_lapack (numRows, numCols);
01414         mat_type y_lapack (numRows, 1);
01415         mat_type z_lapack (numRows, 1);
01416 
01417         // Make a random least-squares problem.
01418         makeRandomProblem (H, z);
01419         if (extraVerbose) {
01420           printMatrix<Scalar> (out, "H", H);
01421           printMatrix<Scalar> (out, "z", z);
01422         }
01423 
01424         // Set up the right-hand side copies for each of the methods.
01425         // Each method is free to overwrite its given right-hand side.
01426         z_givens.assign (z);
01427         if (testBlockGivens) {
01428           z_blockGivens.assign (z);
01429         }
01430         z_lapack.assign (z);
01431 
01432         //
01433         // Imitate how one would update the least-squares problem in a
01434         // typical GMRES implementation, for each updating method.
01435         //
01436         // Update using Givens rotations, one at a time.
01437         magnitude_type residualNormGivens = STM::zero();
01438         for (int curCol = 0; curCol < numCols; ++curCol) {
01439           residualNormGivens = updateColumnGivens (H, R_givens, y_givens, z_givens,
01440                                                    theCosines, theSines, curCol);
01441         }
01442         solveGivens (y_givens, R_givens, z_givens, numCols-1);
01443 
01444         // Update using the "panel left-looking" Givens approach, with
01445         // the given panel width.
01446         magnitude_type residualNormBlockGivens = STM::zero();
01447         if (testBlockGivens) {
01448           const bool testBlocksAtATime = true;
01449           if (testBlocksAtATime) {
01450             // Blocks of columns at a time.
01451             for (int startCol = 0; startCol < numCols; startCol += panelWidth) {
01452               int endCol = std::min (startCol + panelWidth - 1, numCols - 1);
01453               residualNormBlockGivens =
01454                 updateColumnsGivens (H, R_blockGivens, y_blockGivens, z_blockGivens,
01455                                      blockCosines, blockSines, startCol, endCol);
01456             }
01457           } else {
01458             // One column at a time.  This is good as a sanity check
01459             // to make sure updateColumnsGivens() with a single column
01460             // does the same thing as updateColumnGivens().
01461             for (int startCol = 0; startCol < numCols; ++startCol) {
01462               residualNormBlockGivens =
01463                 updateColumnsGivens (H, R_blockGivens, y_blockGivens, z_blockGivens,
01464                                      blockCosines, blockSines, startCol, startCol);
01465             }
01466           }
01467           // The panel version of Givens should compute the same
01468           // cosines and sines as the non-panel version, and should
01469           // update the right-hand side z in the same way.  Thus, we
01470           // should be able to use the same triangular solver.
01471           solveGivens (y_blockGivens, R_blockGivens, z_blockGivens, numCols-1);
01472         }
01473 
01474         // Solve using LAPACK's least-squares solver.
01475         const magnitude_type residualNormLapack =
01476           solveLapack (H, R_lapack, y_lapack, z_lapack, numCols-1);
01477 
01478         // Compute the condition number of the least-squares problem.
01479         // This requires a residual, so use the residual from the
01480         // LAPACK method.  All that the method needs for an accurate
01481         // residual norm is forward stability.
01482         const magnitude_type leastSquaresCondNum =
01483           leastSquaresConditionNumber (H, z, residualNormLapack);
01484 
01485         // Compute the relative least-squares solution error for both
01486         // Givens methods.  We assume that the LAPACK solution is
01487         // "exact" and compare against the Givens rotations solution.
01488         // This is taking liberties with the definition of condition
01489         // number, but it's the best we can do, since we don't know
01490         // the exact solution and don't have an extended-precision
01491         // solver.
01492 
01493         // The solution lives only in y[0 .. numCols-1].
01494         mat_type y_givens_view (Teuchos::View, y_givens, numCols, 1);
01495         mat_type y_blockGivens_view (Teuchos::View, y_blockGivens, numCols, 1);
01496         mat_type y_lapack_view (Teuchos::View, y_lapack, numCols, 1);
01497 
01498         const magnitude_type givensSolutionError =
01499           solutionError (y_givens_view, y_lapack_view);
01500         const magnitude_type blockGivensSolutionError = testBlockGivens ?
01501           solutionError (y_blockGivens_view, y_lapack_view) :
01502           STM::zero();
01503 
01504         // If printing out the matrices, copy out the upper triangular
01505         // factors for printing.  (Both methods are free to leave data
01506         // below the lower triangle.)
01507         if (extraVerbose) {
01508           mat_type R_factorFromGivens (numCols, numCols);
01509           mat_type R_factorFromBlockGivens (numCols, numCols);
01510           mat_type R_factorFromLapack (numCols, numCols);
01511 
01512           for (int j = 0; j < numCols; ++j) {
01513             for (int i = 0; i <= j; ++i) {
01514               R_factorFromGivens(i,j) = R_givens(i,j);
01515               if (testBlockGivens) {
01516                 R_factorFromBlockGivens(i,j) = R_blockGivens(i,j);
01517               }
01518               R_factorFromLapack(i,j) = R_lapack(i,j);
01519             }
01520           }
01521 
01522           printMatrix<Scalar> (out, "R_givens", R_factorFromGivens);
01523           printMatrix<Scalar> (out, "y_givens", y_givens_view);
01524           printMatrix<Scalar> (out, "z_givens", z_givens);
01525 
01526           if (testBlockGivens) {
01527             printMatrix<Scalar> (out, "R_blockGivens", R_factorFromBlockGivens);
01528             printMatrix<Scalar> (out, "y_blockGivens", y_blockGivens_view);
01529             printMatrix<Scalar> (out, "z_blockGivens", z_blockGivens);
01530           }
01531 
01532           printMatrix<Scalar> (out, "R_lapack", R_factorFromLapack);
01533           printMatrix<Scalar> (out, "y_lapack", y_lapack_view);
01534           printMatrix<Scalar> (out, "z_lapack", z_lapack);
01535         }
01536 
01537         // Compute the (Frobenius) norm of the original matrix H.
01538         const magnitude_type H_norm = H.normFrobenius();
01539 
01540         out << "||H||_F = " << H_norm << endl;
01541 
01542         out << "||H y_givens - z||_2 / ||H||_F = "
01543             << leastSquaresResidualNorm (H, y_givens_view, z) / H_norm << endl;
01544         if (testBlockGivens) {
01545           out << "||H y_blockGivens - z||_2 / ||H||_F = "
01546               << leastSquaresResidualNorm (H, y_blockGivens_view, z) / H_norm << endl;
01547         }
01548         out << "||H y_lapack - z||_2 / ||H||_F = "
01549             << leastSquaresResidualNorm (H, y_lapack_view, z) / H_norm << endl;
01550 
01551         out << "||y_givens - y_lapack||_2 / ||y_lapack||_2 = "
01552             << givensSolutionError << endl;
01553         if (testBlockGivens) {
01554           out << "||y_blockGivens - y_lapack||_2 / ||y_lapack||_2 = "
01555               << blockGivensSolutionError << endl;
01556         }
01557 
01558         out << "Least-squares condition number = "
01559             << leastSquaresCondNum << endl;
01560 
01561         // Now for the controversial part of the test: judging whether
01562         // we succeeded.  This includes the problem's condition
01563         // number, which is a measure of the maximum perturbation in
01564         // the solution for which we can still say that the solution
01565         // is valid.  We include a little wiggle room by including a
01566         // factor proportional to the square root of the number of
01567         // floating-point operations that influence the last entry
01568         // (the conventional Wilkinsonian heuristic), times 10 for
01569         // good measure.
01570         //
01571         // (The square root looks like it has something to do with an
01572         // average-case probabilistic argument, but doesn't really.
01573         // What's an "average problem"?)
01574         const magnitude_type wiggleFactor =
01575           10 * STM::squareroot( numRows*numCols );
01576         const magnitude_type solutionErrorBoundFactor =
01577           wiggleFactor * leastSquaresCondNum;
01578         const magnitude_type solutionErrorBound =
01579           solutionErrorBoundFactor * STS::eps();
01580         out << "Solution error bound: " << solutionErrorBoundFactor
01581             << " * eps = " << solutionErrorBound << endl;
01582 
01583         // Remember that NaN is not greater than, not less than, and
01584         // not equal to any other number, including itself.  Some
01585         // compilers will rudely optimize away the "x != x" test.
01586         if (STM::isnaninf (solutionErrorBound)) {
01587           // Hm, the solution error bound is Inf or NaN.  This
01588           // probably means that the test problem was generated
01589           // incorrectly.  We should return false in this case.
01590           return false;
01591         } else { // solution error bound is finite.
01592           if (STM::isnaninf (givensSolutionError)) {
01593             return false;
01594           } else if (givensSolutionError > solutionErrorBound) {
01595             return false;
01596           } else if (testBlockGivens) {
01597             if (STM::isnaninf (blockGivensSolutionError)) {
01598               return false;
01599             } else if (blockGivensSolutionError > solutionErrorBound) {
01600               return false;
01601             } else { // Givens and Block Givens tests succeeded.
01602               return true;
01603             }
01604           } else { // Not testing block Givens; Givens test succeeded.
01605             return true;
01606           }
01607         }
01608       }
01609 
01625       bool
01626       testTriangularSolves (std::ostream& out,
01627                             const int testProblemSize,
01628                             const ERobustness robustness,
01629                             const bool verbose=false)
01630       {
01631         using Teuchos::LEFT_SIDE;
01632         using Teuchos::RIGHT_SIDE;
01633         using std::endl;
01634         typedef Teuchos::SerialDenseMatrix<int, scalar_type> mat_type;
01635 
01636         Teuchos::oblackholestream blackHole;
01637         std::ostream& verboseOut = verbose ? out : blackHole;
01638 
01639         verboseOut << "Testing upper triangular solves" << endl;
01640         //
01641         // Construct an upper triangular linear system to solve.
01642         //
01643         verboseOut << "-- Generating test matrix" << endl;
01644         const int N = testProblemSize;
01645         mat_type R (N, N);
01646         // Fill the upper triangle of R with random numbers.
01647         for (int j = 0; j < N; ++j) {
01648           for (int i = 0; i <= j; ++i) {
01649             R(i,j) = STS::random ();
01650           }
01651         }
01652         // Compute the Frobenius norm of R for later use.
01653         const magnitude_type R_norm = R.normFrobenius ();
01654         // Fill the right-hand side B with random numbers.
01655         mat_type B (N, 1);
01656         B.random ();
01657         // Compute the Frobenius norm of B for later use.
01658         const magnitude_type B_norm = B.normFrobenius ();
01659 
01660         // Save a copy of the original upper triangular system.
01661         mat_type R_copy (Teuchos::Copy, R, N, N);
01662         mat_type B_copy (Teuchos::Copy, B, N, 1);
01663 
01664         // Solution vector.
01665         mat_type X (N, 1);
01666 
01667         // Solve RX = B.
01668         verboseOut << "-- Solving RX=B" << endl;
01669         // We're ignoring the return values for now.
01670         (void) solveUpperTriangularSystem (LEFT_SIDE, X, R, B, robustness);
01671         // Test the residual error.
01672         mat_type Resid (N, 1);
01673         Resid.assign (B_copy);
01674         Belos::details::LocalDenseMatrixOps<scalar_type> ops;
01675         ops.matMatMult (STS::one(), Resid, -STS::one(), R_copy, X);
01676         verboseOut << "---- ||R*X - B||_F = " << Resid.normFrobenius() << endl;
01677         verboseOut << "---- ||R||_F ||X||_F + ||B||_F = "
01678                    << (R_norm * X.normFrobenius() + B_norm)
01679                    << endl;
01680 
01681         // Restore R and B.
01682         R.assign (R_copy);
01683         B.assign (B_copy);
01684 
01685         //
01686         // Set up a right-side test problem: YR = B^*.
01687         //
01688         mat_type Y (1, N);
01689         mat_type B_star (1, N);
01690         ops.conjugateTranspose (B_star, B);
01691         mat_type B_star_copy (1, N);
01692         B_star_copy.assign (B_star);
01693         // Solve YR = B^*.
01694         verboseOut << "-- Solving YR=B^*" << endl;
01695         // We're ignoring the return values for now.
01696         (void) solveUpperTriangularSystem (RIGHT_SIDE, Y, R, B_star, robustness);
01697         // Test the residual error.
01698         mat_type Resid2 (1, N);
01699         Resid2.assign (B_star_copy);
01700         ops.matMatMult (STS::one(), Resid2, -STS::one(), Y, R_copy);
01701         verboseOut << "---- ||Y*R - B^*||_F = " << Resid2.normFrobenius() << endl;
01702         verboseOut << "---- ||Y||_F ||R||_F + ||B^*||_F = "
01703                    << (Y.normFrobenius() * R_norm + B_norm)
01704                    << endl;
01705 
01706         // FIXME (mfh 14 Oct 2011) The test always "passes" for now;
01707         // you have to inspect the output in order to see whether it
01708         // succeeded.  We really should fix the above to use the
01709         // infinity-norm bounds in Higham's book for triangular
01710         // solves.  That would automate the test.
01711         return true;
01712       }
01713 
01714     private:
01716       std::ostream& warn_;
01717 
01722       ERobustness defaultRobustness_;
01723 
01724     private:
01736       int
01737       solveLeastSquaresUsingSVD (mat_type& A, mat_type& X)
01738       {
01739         using Teuchos::Array;
01740         LocalDenseMatrixOps<Scalar> ops;
01741 
01742         if (defaultRobustness_ > ROBUSTNESS_SOME) {
01743           int count = ops.infNaNCount (A);
01744           TEUCHOS_TEST_FOR_EXCEPTION(count != 0, std::invalid_argument,
01745                              "solveLeastSquaresUsingSVD: The input matrix A "
01746                              "contains " << count << "Inf and/or NaN entr"
01747                              << (count != 1 ? "ies" : "y") << ".");
01748           count = ops.infNaNCount (X);
01749           TEUCHOS_TEST_FOR_EXCEPTION(count != 0, std::invalid_argument,
01750                              "solveLeastSquaresUsingSVD: The input matrix X "
01751                              "contains " << count << "Inf and/or NaN entr"
01752                              << (count != 1 ? "ies" : "y") << ".");
01753         }
01754         const int N = std::min (A.numRows(), A.numCols());
01755         lapack_type lapack;
01756 
01757         // Rank of A; to be computed by _GELSS and returned.
01758         int rank = N;
01759 
01760         // Use Scalar's machine precision for the rank tolerance,
01761         // not magnitude_type's machine precision.
01762         const magnitude_type rankTolerance = STS::eps();
01763 
01764         // Array of singular values.
01765         Array<magnitude_type> singularValues (N);
01766 
01767         // Extra workspace.  This is only used by _GELSS if Scalar is
01768         // complex.  Teuchos::LAPACK presents a unified interface to
01769         // _GELSS that always includes the RWORK argument, even though
01770         // LAPACK's SGELSS and DGELSS don't have the RWORK argument.
01771         // We always allocate at least one entry so that &rwork[0]
01772         // makes sense.
01773         Array<magnitude_type> rwork (1);
01774         if (STS::isComplex) {
01775           rwork.resize (std::max (1, 5 * N));
01776         }
01777         //
01778         // Workspace query
01779         //
01780         Scalar lworkScalar = STS::one(); // To be set by workspace query
01781         int info = 0;
01782         lapack.GELSS (A.numRows(), A.numCols(), X.numCols(),
01783                       A.values(), A.stride(), X.values(), X.stride(),
01784                       &singularValues[0], rankTolerance, &rank,
01785                       &lworkScalar, -1, &rwork[0], &info);
01786         TEUCHOS_TEST_FOR_EXCEPTION(info != 0, std::logic_error,
01787                            "_GELSS workspace query returned INFO = "
01788                            << info << " != 0.");
01789         const int lwork = static_cast<int> (STS::real (lworkScalar));
01790         TEUCHOS_TEST_FOR_EXCEPTION(lwork < 0, std::logic_error,
01791                            "_GELSS workspace query returned LWORK = "
01792                            << lwork << " < 0.");
01793         // Allocate workspace.  Size > 0 means &work[0] makes sense.
01794         Array<Scalar> work (std::max (1, lwork));
01795         // Solve the least-squares problem.
01796         lapack.GELSS (A.numRows(), A.numCols(), X.numCols(),
01797                       A.values(), A.stride(), X.values(), X.stride(),
01798                       &singularValues[0], rankTolerance, &rank,
01799                       &work[0], lwork, &rwork[0], &info);
01800         TEUCHOS_TEST_FOR_EXCEPTION(info != 0, std::runtime_error,
01801                            "_GELSS returned INFO = " << info << " != 0.");
01802         return rank;
01803       }
01804 
01811       void
01812       solveGivens (mat_type& y,
01813                    mat_type& R,
01814                    const mat_type& z,
01815                    const int curCol)
01816       {
01817         const int numRows = curCol + 2;
01818 
01819         // Now that we have the updated R factor of H, and the updated
01820         // right-hand side z, solve the least-squares problem by
01821         // solving the upper triangular linear system Ry=z for y.
01822         const mat_type R_view (Teuchos::View, R, numRows-1, numRows-1);
01823         const mat_type z_view (Teuchos::View, z, numRows-1, z.numCols());
01824         mat_type y_view (Teuchos::View, y, numRows-1, y.numCols());
01825 
01826         (void) solveUpperTriangularSystem (Teuchos::LEFT_SIDE, y_view,
01827                                            R_view, z_view, defaultRobustness_);
01828       }
01829 
01831       void
01832       makeRandomProblem (mat_type& H, mat_type& z)
01833       {
01834         // In GMRES, z always starts out with only the first entry
01835         // being nonzero.  That entry always has nonnegative real part
01836         // and zero imaginary part, since it is the initial residual
01837         // norm.
01838         H.random ();
01839         // Zero out the entries below the subdiagonal of H, so that it
01840         // is upper Hessenberg.
01841         for (int j = 0; j < H.numCols(); ++j) {
01842           for (int i = j+2; i < H.numRows(); ++i) {
01843             H(i,j) = STS::zero();
01844           }
01845         }
01846         // Initialize z, the right-hand side of the least-squares
01847         // problem.  Make the first entry of z nonzero.
01848         {
01849           // It's still possible that a random number will come up
01850           // zero after 1000 trials, but unlikely.  Nevertheless, it's
01851           // still important not to allow an infinite loop, for
01852           // example if the pseudorandom number generator is broken
01853           // and always returns zero.
01854           const int numTrials = 1000;
01855           magnitude_type z_init = STM::zero();
01856           for (int trial = 0; trial < numTrials && z_init == STM::zero(); ++trial) {
01857             z_init = STM::random();
01858           }
01859           TEUCHOS_TEST_FOR_EXCEPTION(z_init == STM::zero(), std::runtime_error,
01860                              "After " << numTrials << " trial"
01861                              << (numTrials != 1 ? "s" : "")
01862                              << ", we were unable to generate a nonzero pseudo"
01863                              "random real number.  This most likely indicates a "
01864                              "broken pseudorandom number generator.");
01865           const magnitude_type z_first = (z_init < 0) ? -z_init : z_init;
01866 
01867           // NOTE I'm assuming here that "scalar_type = magnitude_type"
01868           // assignments make sense.
01869           z(0,0) = z_first;
01870         }
01871       }
01872 
01876       void
01877       computeGivensRotation (const Scalar& x,
01878                              const Scalar& y,
01879                              Scalar& theCosine,
01880                              Scalar& theSine,
01881                              Scalar& result)
01882       {
01883         // _LARTG, an LAPACK aux routine, is slower but more accurate
01884         // than the BLAS' _ROTG.
01885         const bool useLartg = false;
01886 
01887         if (useLartg) {
01888           lapack_type lapack;
01889           // _LARTG doesn't clobber its input arguments x and y.
01890           lapack.LARTG (x, y, &theCosine, &theSine, &result);
01891         } else {
01892           // _ROTG clobbers its first two arguments.  x is overwritten
01893           // with the result of applying the Givens rotation: [x; y] ->
01894           // [x (on output); 0].  y is overwritten with the "fast"
01895           // Givens transform (see Golub and Van Loan, 3rd ed.).
01896           Scalar x_temp = x;
01897           Scalar y_temp = y;
01898           blas_type blas;
01899           blas.ROTG (&x_temp, &y_temp, &theCosine, &theSine);
01900           result = x_temp;
01901         }
01902       }
01903 
01905       void
01906       singularValues (const mat_type& A,
01907                       Teuchos::ArrayView<magnitude_type> sigmas)
01908       {
01909         using Teuchos::Array;
01910         using Teuchos::ArrayView;
01911 
01912         const int numRows = A.numRows();
01913         const int numCols = A.numCols();
01914         TEUCHOS_TEST_FOR_EXCEPTION(sigmas.size() < std::min (numRows, numCols),
01915                            std::invalid_argument,
01916                            "The sigmas array is only of length " << sigmas.size()
01917                            << ", but must be of length at least "
01918                            << std::min (numRows, numCols)
01919                            << " in order to hold all the singular values of the "
01920                            "matrix A.");
01921 
01922         // Compute the condition number of the matrix A, using a singular
01923         // value decomposition (SVD).  LAPACK's SVD routine overwrites the
01924         // input matrix, so make a copy.
01925         mat_type A_copy (numRows, numCols);
01926         A_copy.assign (A);
01927 
01928         // Workspace query.
01929         lapack_type lapack;
01930         int info = 0;
01931         Scalar lworkScalar = STS::zero();
01932         Array<magnitude_type> rwork (std::max (std::min (numRows, numCols) - 1, 1));
01933         lapack.GESVD ('N', 'N', numRows, numCols,
01934                       A_copy.values(), A_copy.stride(), &sigmas[0],
01935                       (Scalar*) NULL, 1, (Scalar*) NULL, 1,
01936                       &lworkScalar, -1, &rwork[0], &info);
01937 
01938         TEUCHOS_TEST_FOR_EXCEPTION(info != 0, std::logic_error,
01939                            "LAPACK _GESVD workspace query failed with INFO = "
01940                            << info << ".");
01941         const int lwork = static_cast<int> (STS::real (lworkScalar));
01942         TEUCHOS_TEST_FOR_EXCEPTION(lwork < 0, std::logic_error,
01943                            "LAPACK _GESVD workspace query returned LWORK = "
01944                            << lwork << " < 0.");
01945         // Make sure that the workspace array always has positive
01946         // length, so that &work[0] makes sense.
01947         Teuchos::Array<Scalar> work (std::max (1, lwork));
01948 
01949         // Compute the singular values of A.
01950         lapack.GESVD ('N', 'N', numRows, numCols,
01951                       A_copy.values(), A_copy.stride(), &sigmas[0],
01952                       (Scalar*) NULL, 1, (Scalar*) NULL, 1,
01953                       &work[0], lwork, &rwork[0], &info);
01954         TEUCHOS_TEST_FOR_EXCEPTION(info != 0, std::logic_error,
01955                            "LAPACK _GESVD failed with INFO = " << info << ".");
01956       }
01957 
01965       std::pair<magnitude_type, magnitude_type>
01966       extremeSingularValues (const mat_type& A)
01967       {
01968         using Teuchos::Array;
01969 
01970         const int numRows = A.numRows();
01971         const int numCols = A.numCols();
01972 
01973         Array<magnitude_type> sigmas (std::min (numRows, numCols));
01974         singularValues (A, sigmas);
01975         return std::make_pair (sigmas[0], sigmas[std::min(numRows, numCols) - 1]);
01976       }
01977 
01988       magnitude_type
01989       leastSquaresConditionNumber (const mat_type& A,
01990                                    const mat_type& b,
01991                                    const magnitude_type& residualNorm)
01992       {
01993         // Extreme singular values of A.
01994         const std::pair<magnitude_type, magnitude_type> sigmaMaxMin =
01995           extremeSingularValues (A);
01996 
01997         // Our solvers currently assume that H has full rank.  If the
01998         // test matrix doesn't have full rank, we stop right away.
01999         TEUCHOS_TEST_FOR_EXCEPTION(sigmaMaxMin.second == STM::zero(), std::runtime_error,
02000                            "The test matrix is rank deficient; LAPACK's _GESVD "
02001                            "routine reports that its smallest singular value is "
02002                            "zero.");
02003         // 2-norm condition number of A.  We checked above that the
02004         // denominator is nonzero.
02005         const magnitude_type A_cond = sigmaMaxMin.first / sigmaMaxMin.second;
02006 
02007         // "Theta" in the variable names below refers to the angle between
02008         // the vectors b and A*x, where x is the computed solution.  It
02009         // measures whether the residual norm is large (near ||b||) or
02010         // small (near 0).
02011         const magnitude_type sinTheta = residualNorm / b.normFrobenius();
02012 
02013         // \sin^2 \theta + \cos^2 \theta = 1.
02014         //
02015         // The range of sine is [-1,1], so squaring it won't overflow.
02016         // We still have to check whether sinTheta > 1, though.  This
02017         // is impossible in exact arithmetic, assuming that the
02018         // least-squares solver worked (b-A*0 = b and x minimizes
02019         // ||b-A*x||_2, so ||b-A*0||_2 >= ||b-A*x||_2).  However, it
02020         // might just be possible in floating-point arithmetic.  We're
02021         // just looking for an estimate, so if sinTheta > 1, we cap it
02022         // at 1.
02023         const magnitude_type cosTheta = (sinTheta > STM::one()) ?
02024           STM::zero() : STM::squareroot (1 - sinTheta * sinTheta);
02025 
02026         // This may result in Inf, if cosTheta is zero.  That's OK; in
02027         // that case, the condition number of the (full-rank)
02028         // least-squares problem is rightfully infinite.
02029         const magnitude_type tanTheta = sinTheta / cosTheta;
02030 
02031         // Condition number for the full-rank least-squares problem.
02032         return 2 * A_cond / cosTheta + tanTheta * A_cond * A_cond;
02033       }
02034 
02036       magnitude_type
02037       leastSquaresResidualNorm (const mat_type& A,
02038                                 const mat_type& x,
02039                                 const mat_type& b)
02040       {
02041         mat_type r (b.numRows(), b.numCols());
02042 
02043         // r := b - A*x
02044         r.assign (b);
02045         LocalDenseMatrixOps<Scalar> ops;
02046         ops.matMatMult (STS::one(), r, -STS::one(), A, x);
02047         return r.normFrobenius ();
02048       }
02049 
02054       magnitude_type
02055       solutionError (const mat_type& x_approx,
02056                      const mat_type& x_exact)
02057       {
02058         const int numRows = x_exact.numRows();
02059         const int numCols = x_exact.numCols();
02060 
02061         mat_type x_diff (numRows, numCols);
02062         for (int j = 0; j < numCols; ++j) {
02063           for (int i = 0; i < numRows; ++i) {
02064             x_diff(i,j) = x_exact(i,j) - x_approx(i,j);
02065           }
02066         }
02067         const magnitude_type scalingFactor = x_exact.normFrobenius();
02068 
02069         // If x_exact has zero norm, just use the absolute difference.
02070         return x_diff.normFrobenius() /
02071           (scalingFactor == STM::zero() ? STM::one() : scalingFactor);
02072       }
02073 
02120       magnitude_type
02121       updateColumnGivens (const mat_type& H,
02122                           mat_type& R,
02123                           mat_type& y,
02124                           mat_type& z,
02125                           Teuchos::ArrayView<scalar_type> theCosines,
02126                           Teuchos::ArrayView<scalar_type> theSines,
02127                           const int curCol)
02128       {
02129         using std::cerr;
02130         using std::endl;
02131 
02132         const int numRows = curCol + 2; // curCol is zero-based
02133         const int LDR = R.stride();
02134         const bool extraDebug = false;
02135 
02136         if (extraDebug) {
02137           cerr << "updateColumnGivens: curCol = " << curCol << endl;
02138         }
02139 
02140         // View of H( 1:curCol+1, curCol ) (in Matlab notation, if
02141         // curCol were a one-based index, as it would be in Matlab but
02142         // is not here).
02143         const mat_type H_col (Teuchos::View, H, numRows, 1, 0, curCol);
02144 
02145         // View of R( 1:curCol+1, curCol ) (again, in Matlab notation,
02146         // if curCol were a one-based index).
02147         mat_type R_col (Teuchos::View, R, numRows, 1, 0, curCol);
02148 
02149         // 1. Copy the current column from H into R, where it will be
02150         //    modified.
02151         R_col.assign (H_col);
02152 
02153         if (extraDebug) {
02154           printMatrix<Scalar> (cerr, "R_col before ", R_col);
02155         }
02156 
02157         // 2. Apply all the previous Givens rotations, if any, to the
02158         //    current column of the matrix.
02159         blas_type blas;
02160         for (int j = 0; j < curCol; ++j) {
02161           // BLAS::ROT really should take "const Scalar*" for these
02162           // arguments, but it wants a "Scalar*" instead, alas.
02163           Scalar theCosine = theCosines[j];
02164           Scalar theSine = theSines[j];
02165 
02166           if (extraDebug) {
02167             cerr << "  j = " << j << ": (cos,sin) = "
02168                  << theCosines[j] << "," << theSines[j] << endl;
02169           }
02170           blas.ROT (1, &R_col(j,0), LDR, &R_col(j+1,0), LDR,
02171                     &theCosine, &theSine);
02172         }
02173         if (extraDebug && curCol > 0) {
02174           printMatrix<Scalar> (cerr, "R_col after applying previous "
02175                                "Givens rotations", R_col);
02176         }
02177 
02178         // 3. Calculate new Givens rotation for R(curCol, curCol),
02179         //    R(curCol+1, curCol).
02180         Scalar theCosine, theSine, result;
02181         computeGivensRotation (R_col(curCol,0), R_col(curCol+1,0),
02182                                theCosine, theSine, result);
02183         theCosines[curCol] = theCosine;
02184         theSines[curCol] = theSine;
02185 
02186         if (extraDebug) {
02187           cerr << "  New cos,sin = " << theCosine << "," << theSine << endl;
02188         }
02189 
02190         // 4. _Apply_ the new Givens rotation.  We don't need to
02191         //    invoke _ROT here, because computeGivensRotation()
02192         //    already gives us the result: [x; y] -> [result; 0].
02193         R_col(curCol, 0) = result;
02194         R_col(curCol+1, 0) = STS::zero();
02195 
02196         if (extraDebug) {
02197           printMatrix<Scalar> (cerr, "R_col after applying current "
02198                                "Givens rotation", R_col);
02199         }
02200 
02201         // 5. Apply the resulting Givens rotation to z (the right-hand
02202         //    side of the projected least-squares problem).
02203         //
02204         // We prefer overgeneralization to undergeneralization by assuming
02205         // here that z may have more than one column.
02206         const int LDZ = z.stride();
02207         blas.ROT (z.numCols(),
02208                   &z(curCol,0), LDZ, &z(curCol+1,0), LDZ,
02209                   &theCosine, &theSine);
02210 
02211         if (extraDebug) {
02212           //mat_type R_after (Teuchos::View, R, numRows, numRows-1);
02213           //printMatrix<Scalar> (cerr, "R_after", R_after);
02214           //mat_type z_after (Teuchos::View, z, numRows, z.numCols());
02215           printMatrix<Scalar> (cerr, "z_after", z);
02216         }
02217 
02218         // The last entry of z is the nonzero part of the residual of the
02219         // least-squares problem.  Its magnitude gives the residual 2-norm
02220         // of the least-squares problem.
02221         return STS::magnitude( z(numRows-1, 0) );
02222       }
02223 
02257       magnitude_type
02258       solveLapack (const mat_type& H,
02259                    mat_type& R,
02260                    mat_type& y,
02261                    mat_type& z,
02262                    const int curCol)
02263       {
02264         const int numRows = curCol + 2;
02265         const int numCols = curCol + 1;
02266         const int LDR = R.stride();
02267 
02268         // Copy H( 1:curCol+1, 1:curCol ) into R( 1:curCol+1, 1:curCol ).
02269         const mat_type H_view (Teuchos::View, H, numRows, numCols);
02270         mat_type R_view (Teuchos::View, R, numRows, numCols);
02271         R_view.assign (H_view);
02272 
02273         // The LAPACK least-squares solver overwrites the right-hand side
02274         // vector with the solution, so first copy z into y.
02275         mat_type y_view (Teuchos::View, y, numRows, y.numCols());
02276         mat_type z_view (Teuchos::View, z, numRows, y.numCols());
02277         y_view.assign (z_view);
02278 
02279         // Workspace query for the least-squares routine.
02280         int info = 0;
02281         Scalar lworkScalar = STS::zero();
02282         lapack_type lapack;
02283         lapack.GELS ('N', numRows, numCols, y_view.numCols(),
02284                      NULL, LDR, NULL, y_view.stride(),
02285                      &lworkScalar, -1, &info);
02286         TEUCHOS_TEST_FOR_EXCEPTION(info != 0, std::logic_error,
02287                            "LAPACK _GELS workspace query failed with INFO = "
02288                            << info << ", for a " << numRows << " x " << numCols
02289                            << " matrix with " << y_view.numCols()
02290                            << " right hand side"
02291                            << ((y_view.numCols() != 1) ? "s" : "") << ".");
02292         TEUCHOS_TEST_FOR_EXCEPTION(STS::real(lworkScalar) < STM::zero(),
02293                            std::logic_error,
02294                            "LAPACK _GELS workspace query returned an LWORK with "
02295                            "negative real part: LWORK = " << lworkScalar
02296                            << ".  That should never happen.  Please report this "
02297                            "to the Belos developers.");
02298         TEUCHOS_TEST_FOR_EXCEPTION(STS::isComplex && STS::imag(lworkScalar) != STM::zero(),
02299                            std::logic_error,
02300                            "LAPACK _GELS workspace query returned an LWORK with "
02301                            "nonzero imaginary part: LWORK = " << lworkScalar
02302                            << ".  That should never happen.  Please report this "
02303                            "to the Belos developers.");
02304         // Cast workspace from Scalar to int.  Scalar may be complex,
02305         // hence the request for the real part.  Don't ask for the
02306         // magnitude, since computing the magnitude may overflow due
02307         // to squaring and square root to int.  Hopefully LAPACK
02308         // doesn't ever overflow int this way.
02309         const int lwork = std::max (1, static_cast<int> (STS::real (lworkScalar)));
02310 
02311         // Allocate workspace for solving the least-squares problem.
02312         Teuchos::Array<Scalar> work (lwork);
02313 
02314         // Solve the least-squares problem.  The ?: operator prevents
02315         // accessing the first element of the work array, if it has
02316         // length zero.
02317         lapack.GELS ('N', numRows, numCols, y_view.numCols(),
02318                      R_view.values(), R_view.stride(),
02319                      y_view.values(), y_view.stride(),
02320                      (lwork > 0 ? &work[0] : (Scalar*) NULL),
02321                      lwork, &info);
02322 
02323         TEUCHOS_TEST_FOR_EXCEPTION(info != 0, std::logic_error,
02324                            "Solving projected least-squares problem with LAPACK "
02325                            "_GELS failed with INFO = " << info << ", for a "
02326                            << numRows << " x " << numCols << " matrix with "
02327                            << y_view.numCols() << " right hand side"
02328                            << (y_view.numCols() != 1 ? "s" : "") << ".");
02329         // Extract the projected least-squares problem's residual error.
02330         // It's the magnitude of the last entry of y_view on output from
02331         // LAPACK's least-squares solver.
02332         return STS::magnitude( y_view(numRows-1, 0) );
02333       }
02334 
02345       magnitude_type
02346       updateColumnsGivens (const mat_type& H,
02347                            mat_type& R,
02348                            mat_type& y,
02349                            mat_type& z,
02350                            Teuchos::ArrayView<scalar_type> theCosines,
02351                            Teuchos::ArrayView<scalar_type> theSines,
02352                            const int startCol,
02353                            const int endCol)
02354       {
02355         TEUCHOS_TEST_FOR_EXCEPTION(startCol > endCol, std::invalid_argument,
02356                            "updateColumnGivens: startCol = " << startCol
02357                            << " > endCol = " << endCol << ".");
02358         magnitude_type lastResult = STM::zero();
02359         // [startCol, endCol] is an inclusive range.
02360         for (int curCol = startCol; curCol <= endCol; ++curCol) {
02361           lastResult = updateColumnGivens (H, R, y, z, theCosines, theSines, curCol);
02362         }
02363         return lastResult;
02364       }
02365 
02378       magnitude_type
02379       updateColumnsGivensBlock (const mat_type& H,
02380                                 mat_type& R,
02381                                 mat_type& y,
02382                                 mat_type& z,
02383                                 Teuchos::ArrayView<scalar_type> theCosines,
02384                                 Teuchos::ArrayView<scalar_type> theSines,
02385                                 const int startCol,
02386                                 const int endCol)
02387       {
02388         const int numRows = endCol + 2;
02389         const int numColsToUpdate = endCol - startCol + 1;
02390         const int LDR = R.stride();
02391 
02392         // 1. Copy columns [startCol, endCol] from H into R, where they
02393         //    will be modified.
02394         {
02395           const mat_type H_view (Teuchos::View, H, numRows, numColsToUpdate, 0, startCol);
02396           mat_type R_view (Teuchos::View, R, numRows, numColsToUpdate, 0, startCol);
02397           R_view.assign (H_view);
02398         }
02399 
02400         // 2. Apply all the previous Givens rotations, if any, to
02401         //    columns [startCol, endCol] of the matrix.  (Remember
02402         //    that we're using a left-looking QR factorization
02403         //    approach; we haven't yet touched those columns.)
02404         blas_type blas;
02405         for (int j = 0; j < startCol; ++j) {
02406           blas.ROT (numColsToUpdate,
02407                     &R(j, startCol), LDR, &R(j+1, startCol), LDR,
02408                     &theCosines[j], &theSines[j]);
02409         }
02410 
02411         // 3. Update each column in turn of columns [startCol, endCol].
02412         for (int curCol = startCol; curCol < endCol; ++curCol) {
02413           // a. Apply the Givens rotations computed in previous
02414           //    iterations of this loop to the current column of R.
02415           for (int j = startCol; j < curCol; ++j) {
02416             blas.ROT (1, &R(j, curCol), LDR, &R(j+1, curCol), LDR,
02417                       &theCosines[j], &theSines[j]);
02418           }
02419           // b. Calculate new Givens rotation for R(curCol, curCol),
02420           //    R(curCol+1, curCol).
02421           Scalar theCosine, theSine, result;
02422           computeGivensRotation (R(curCol, curCol), R(curCol+1, curCol),
02423                                  theCosine, theSine, result);
02424           theCosines[curCol] = theCosine;
02425           theSines[curCol] = theSine;
02426 
02427           // c. _Apply_ the new Givens rotation.  We don't need to
02428           //    invoke _ROT here, because computeGivensRotation()
02429           //    already gives us the result: [x; y] -> [result; 0].
02430           R(curCol+1, curCol) = result;
02431           R(curCol+1, curCol) = STS::zero();
02432 
02433           // d. Apply the resulting Givens rotation to z (the right-hand
02434           //    side of the projected least-squares problem).
02435           //
02436           // We prefer overgeneralization to undergeneralization by
02437           // assuming here that z may have more than one column.
02438           const int LDZ = z.stride();
02439           blas.ROT (z.numCols(),
02440                     &z(curCol,0), LDZ, &z(curCol+1,0), LDZ,
02441                     &theCosine, &theSine);
02442         }
02443 
02444         // The last entry of z is the nonzero part of the residual of the
02445         // least-squares problem.  Its magnitude gives the residual 2-norm
02446         // of the least-squares problem.
02447         return STS::magnitude( z(numRows-1, 0) );
02448       }
02449     }; // class ProjectedLeastSquaresSolver
02450   } // namespace details
02451 } // namespace Belos
02452 
02453 #endif // __Belos_ProjectedLeastSquaresSolver_hpp
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines