Belos Package Browser (Single Doxygen Collection) Development
BelosProjectedLeastSquaresSolver.hpp
Go to the documentation of this file.
00001 //@HEADER
00002 // ************************************************************************
00003 //
00004 //                 Belos: Block Linear Solvers Package
00005 //                  Copyright 2004 Sandia Corporation
00006 //
00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00008 // the U.S. Government retains certain rights in this software.
00009 //
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00038 //
00039 // ************************************************************************
00040 //@HEADER
00041 
00042 #ifndef __Belos_ProjectedLeastSquaresSolver_hpp
00043 #define __Belos_ProjectedLeastSquaresSolver_hpp
00044 
00045 #include "BelosConfigDefs.hpp"
00046 #include "BelosTypes.hpp"
00047 #include "Teuchos_Array.hpp"
00048 #include "Teuchos_BLAS.hpp"
00049 #include "Teuchos_LAPACK.hpp"
00050 #include "Teuchos_oblackholestream.hpp"
00051 #include "Teuchos_ScalarTraits.hpp"
00052 #include "Teuchos_SerialDenseMatrix.hpp"
00053 #include "Teuchos_StandardParameterEntryValidators.hpp"
00054 
00058 
00059 namespace Belos {
00060 
00069   namespace details {
00070 
00071     // Anonymous namespace restricts contents to file scope.
00072     namespace {
00075       template<class Scalar>
00076       void
00077       printMatrix (std::ostream& out,
00078                    const std::string& name,
00079                    const Teuchos::SerialDenseMatrix<int, Scalar>& A)
00080       {
00081         using std::endl;
00082 
00083         const int numRows = A.numRows();
00084         const int numCols = A.numCols();
00085 
00086         out << name << " = " << endl << '[';
00087         if (numCols == 1) {
00088           // Compact form for column vectors; valid Matlab.
00089           for (int i = 0; i < numRows; ++i) {
00090             out << A(i,0);
00091             if (i < numRows-1) {
00092               out << "; ";
00093             }
00094           }
00095         } else {
00096           for (int i = 0; i < numRows; ++i) {
00097             for (int j = 0; j < numCols; ++j) {
00098               out << A(i,j);
00099               if (j < numCols-1) {
00100                 out << ", ";
00101               } else if (i < numRows-1) {
00102                 out << ';' << endl;
00103               }
00104             }
00105           }
00106         }
00107         out << ']' << endl;
00108       }
00109 
00112       template<class Scalar>
00113       void
00114       print (std::ostream& out,
00115              const Teuchos::SerialDenseMatrix<int, Scalar>& A,
00116              const std::string& linePrefix)
00117       {
00118         using std::endl;
00119 
00120         const int numRows = A.numRows();
00121         const int numCols = A.numCols();
00122 
00123         out << linePrefix << '[';
00124         if (numCols == 1) {
00125           // Compact form for column vectors; valid Matlab.
00126           for (int i = 0; i < numRows; ++i) {
00127             out << A(i,0);
00128             if (i < numRows-1) {
00129               out << "; ";
00130             }
00131           }
00132         } else {
00133           for (int i = 0; i < numRows; ++i) {
00134             for (int j = 0; j < numCols; ++j) {
00135               if (numRows > 1) {
00136                 out << linePrefix << "  ";
00137               }
00138               out << A(i,j);
00139               if (j < numCols-1) {
00140                 out << ", ";
00141               } else if (i < numRows-1) {
00142                 out << ';' << endl;
00143               }
00144             }
00145           }
00146         }
00147         out << linePrefix << ']' << endl;
00148       }
00149     } // namespace (anonymous)
00150 
00157     template<class Scalar>
00158     class ProjectedLeastSquaresProblem {
00159     public:
00162       typedef Scalar scalar_type;
00165       typedef typename Teuchos::ScalarTraits<Scalar>::magnitudeType magnitude_type;
00166 
00174       Teuchos::SerialDenseMatrix<int,Scalar> H;
00175 
00187       Teuchos::SerialDenseMatrix<int,Scalar> R;
00188 
00199       Teuchos::SerialDenseMatrix<int,Scalar> y;
00200 
00209       Teuchos::SerialDenseMatrix<int,Scalar> z;
00210 
00220       Teuchos::Array<Scalar> theCosines;
00221 
00226       Teuchos::Array<Scalar> theSines;
00227 
00236       ProjectedLeastSquaresProblem (const int maxNumIterations) :
00237         H (maxNumIterations+1, maxNumIterations),
00238         R (maxNumIterations+1, maxNumIterations),
00239         y (maxNumIterations+1, 1),
00240         z (maxNumIterations+1, 1),
00241         theCosines (maxNumIterations+1),
00242         theSines (maxNumIterations+1)
00243       {}
00244 
00263       void
00264       reset (const typename Teuchos::ScalarTraits<Scalar>::magnitudeType beta)
00265       {
00266         typedef Teuchos::ScalarTraits<Scalar> STS;
00267 
00268         // Zero out the right-hand side of the least-squares problem.
00269         z.putScalar (STS::zero());
00270 
00271         // Promote the initial residual norm from a magnitude type to
00272         // a scalar type, so we can assign it to the first entry of z.
00273         const Scalar initialResidualNorm (beta);
00274         z(0,0) = initialResidualNorm;
00275       }
00276 
00290       void
00291       reallocateAndReset (const typename Teuchos::ScalarTraits<Scalar>::magnitudeType beta,
00292                           const int maxNumIterations)
00293       {
00294         typedef Teuchos::ScalarTraits<Scalar> STS;
00295         typedef Teuchos::ScalarTraits<magnitude_type> STM;
00296 
00297         TEUCHOS_TEST_FOR_EXCEPTION(beta < STM::zero(), std::invalid_argument,
00298                            "ProjectedLeastSquaresProblem::reset: initial "
00299                            "residual beta = " << beta << " < 0.");
00300         TEUCHOS_TEST_FOR_EXCEPTION(maxNumIterations <= 0, std::invalid_argument,
00301                            "ProjectedLeastSquaresProblem::reset: maximum number "
00302                            "of iterations " << maxNumIterations << " <= 0.");
00303 
00304         if (H.numRows() < maxNumIterations+1 || H.numCols() < maxNumIterations) {
00305           const int errcode = H.reshape (maxNumIterations+1, maxNumIterations);
00306           TEUCHOS_TEST_FOR_EXCEPTION(errcode != 0, std::runtime_error,
00307                              "Failed to reshape H into a " << (maxNumIterations+1)
00308                              << " x " << maxNumIterations << " matrix.");
00309         }
00310         (void) H.putScalar (STS::zero());
00311 
00312         if (R.numRows() < maxNumIterations+1 || R.numCols() < maxNumIterations) {
00313           const int errcode = R.reshape (maxNumIterations+1, maxNumIterations);
00314           TEUCHOS_TEST_FOR_EXCEPTION(errcode != 0, std::runtime_error,
00315                              "Failed to reshape R into a " << (maxNumIterations+1)
00316                              << " x " << maxNumIterations << " matrix.");
00317         }
00318         (void) R.putScalar (STS::zero());
00319 
00320         if (y.numRows() < maxNumIterations+1 || y.numCols() < 1) {
00321           const int errcode = y.reshape (maxNumIterations+1, 1);
00322           TEUCHOS_TEST_FOR_EXCEPTION(errcode != 0, std::runtime_error,
00323                              "Failed to reshape y into a " << (maxNumIterations+1)
00324                              << " x " << 1 << " matrix.");
00325         }
00326         (void) y.putScalar (STS::zero());
00327 
00328         if (z.numRows() < maxNumIterations+1 || z.numCols() < 1) {
00329           const int errcode = z.reshape (maxNumIterations+1, 1);
00330           TEUCHOS_TEST_FOR_EXCEPTION(errcode != 0, std::runtime_error,
00331                              "Failed to reshape z into a " << (maxNumIterations+1)
00332                              << " x " << 1 << " matrix.");
00333         }
00334         reset (beta);
00335       }
00336 
00337     };
00338 
00339 
00347     template<class Scalar>
00348     class LocalDenseMatrixOps {
00349     public:
00352       typedef Scalar scalar_type;
00355       typedef typename Teuchos::ScalarTraits<Scalar>::magnitudeType magnitude_type;
00358       typedef Teuchos::SerialDenseMatrix<int,Scalar> mat_type;
00359 
00360     private:
00361       typedef Teuchos::ScalarTraits<scalar_type> STS;
00362       typedef Teuchos::ScalarTraits<magnitude_type> STM;
00363       typedef Teuchos::BLAS<int, scalar_type> blas_type;
00364       typedef Teuchos::LAPACK<int, scalar_type> lapack_type;
00365 
00366     public:
00368       void
00369       conjugateTranspose (mat_type& A_star, const mat_type& A) const
00370       {
00371         for (int i = 0; i < A.numRows(); ++i) {
00372           for (int j = 0; j < A.numCols(); ++j) {
00373             A_star(j,i) = STS::conjugate (A(i,j));
00374           }
00375         }
00376       }
00377 
00379       void
00380       conjugateTransposeOfUpperTriangular (mat_type& L, const mat_type& R) const
00381       {
00382         const int N = R.numCols();
00383 
00384         for (int j = 0; j < N; ++j) {
00385           for (int i = 0; i <= j; ++i) {
00386             L(j,i) = STS::conjugate (R(i,j));
00387           }
00388         }
00389       }
00390 
00392       void
00393       zeroOutStrictLowerTriangle (mat_type& A) const
00394       {
00395         const int N = std::min (A.numRows(), A.numCols());
00396 
00397         for (int j = 0; j < N; ++j) {
00398           for (int i = j+1; i < A.numRows(); ++i) {
00399             A(i,j) = STS::zero();
00400           }
00401         }
00402       }
00403 
00412       void
00413       partition (Teuchos::RCP<mat_type>& A_11,
00414                  Teuchos::RCP<mat_type>& A_21,
00415                  Teuchos::RCP<mat_type>& A_12,
00416                  Teuchos::RCP<mat_type>& A_22,
00417                  mat_type& A,
00418                  const int numRows1,
00419                  const int numRows2,
00420                  const int numCols1,
00421                  const int numCols2)
00422       {
00423         using Teuchos::rcp;
00424         using Teuchos::View;
00425 
00426         A_11 = rcp (new mat_type (View, A, numRows1, numCols1, 0, 0));
00427         A_21 = rcp (new mat_type (View, A, numRows2, numCols1, numRows1, 0));
00428         A_12 = rcp (new mat_type (View, A, numRows1, numCols2, 0, numCols1));
00429         A_22 = rcp (new mat_type (View, A, numRows2, numCols2, numRows1, numCols1));
00430       }
00431 
00433       void
00434       matScale (mat_type& A, const scalar_type& alpha) const
00435       {
00436         const int LDA = A.stride();
00437         const int numRows = A.numRows();
00438         const int numCols = A.numCols();
00439 
00440         if (numRows == 0 || numCols == 0) {
00441           return;
00442         } else {
00443           for (int j = 0; j < numCols; ++j) {
00444             scalar_type* const A_j = &A(0,j);
00445 
00446             for (int i = 0; i < numRows; ++i) {
00447               A_j[i] *= alpha;
00448             }
00449           }
00450         }
00451       }
00452 
00457       void
00458       axpy (mat_type& Y,
00459             const scalar_type& alpha,
00460             const mat_type& X) const
00461       {
00462         const int numRows = Y.numRows();
00463         const int numCols = Y.numCols();
00464 
00465         TEUCHOS_TEST_FOR_EXCEPTION(numRows != X.numRows() || numCols != X.numCols(),
00466                            std::invalid_argument, "Dimensions of X and Y don't "
00467                            "match.  X is " << X.numRows() << " x " << X.numCols()
00468                            << ", and Y is " << numRows << " x " << numCols << ".");
00469         for (int j = 0; j < numCols; ++j) {
00470           for (int i = 0; i < numRows; ++i) {
00471             Y(i,j) += alpha * X(i,j);
00472           }
00473         }
00474       }
00475 
00477       void
00478       matAdd (mat_type& A, const mat_type& B) const
00479       {
00480         const int LDA = A.stride();
00481         const int LDB = B.stride();
00482         const int numRows = A.numRows();
00483         const int numCols = A.numCols();
00484 
00485         TEUCHOS_TEST_FOR_EXCEPTION(B.numRows() != numRows || B.numCols() != numCols,
00486                            std::invalid_argument,
00487                            "matAdd: The input matrices A and B have "
00488                            "incompatible dimensions.  A is " << numRows
00489                            << " x " << numCols << ", but B is " << B.numRows()
00490                            << " x " << B.numCols() << ".");
00491         if (numRows == 0 || numCols == 0) {
00492           return;
00493         } else {
00494           for (int j = 0; j < numCols; ++j) {
00495             scalar_type* const A_j = &A(0,j);
00496             const scalar_type* const B_j = &B(0,j);
00497 
00498             for (int i = 0; i < numRows; ++i) {
00499               A_j[i] += B_j[i];
00500             }
00501           }
00502         }
00503       }
00504 
00506       void
00507       matSub (mat_type& A, const mat_type& B) const
00508       {
00509         const int LDA = A.stride();
00510         const int LDB = B.stride();
00511         const int numRows = A.numRows();
00512         const int numCols = A.numCols();
00513 
00514         TEUCHOS_TEST_FOR_EXCEPTION(B.numRows() != numRows || B.numCols() != numCols,
00515                            std::invalid_argument,
00516                            "matSub: The input matrices A and B have "
00517                            "incompatible dimensions.  A is " << numRows
00518                            << " x " << numCols << ", but B is " << B.numRows()
00519                            << " x " << B.numCols() << ".");
00520         if (numRows == 0 || numCols == 0) {
00521           return;
00522         } else {
00523           for (int j = 0; j < numCols; ++j) {
00524             scalar_type* const A_j = &A(0,j);
00525             const scalar_type* const B_j = &B(0,j);
00526 
00527             for (int i = 0; i < numRows; ++i) {
00528               A_j[i] -= B_j[i];
00529             }
00530           }
00531         }
00532       }
00533 
00538       void
00539       rightUpperTriSolve (mat_type& B,
00540                           const mat_type& R) const
00541       {
00542         TEUCHOS_TEST_FOR_EXCEPTION(B.numCols() != R.numRows(),
00543                            std::invalid_argument,
00544                            "rightUpperTriSolve: R and B have incompatible "
00545                            "dimensions.  B has " << B.numCols() << " columns, "
00546                            "but R has " << R.numRows() << " rows.");
00547         blas_type blas;
00548         blas.TRSM (Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI,
00549                    Teuchos::NO_TRANS, Teuchos::NON_UNIT_DIAG,
00550                    R.numCols(), B.numCols(),
00551                    STS::one(), R.values(), R.stride(),
00552                    B.values(), B.stride());
00553       }
00554 
00560       void
00561       matMatMult (const scalar_type& beta,
00562                   mat_type& C,
00563                   const scalar_type& alpha,
00564                   const mat_type& A,
00565                   const mat_type& B) const
00566       {
00567         using Teuchos::NO_TRANS;
00568 
00569         TEUCHOS_TEST_FOR_EXCEPTION(A.numCols() != B.numRows(),
00570                            std::invalid_argument,
00571                            "matMatMult: The input matrices A and B have "
00572                            "incompatible dimensions.  A is " << A.numRows()
00573                            << " x " << A.numCols() << ", but B is "
00574                            << B.numRows() << " x " << B.numCols() << ".");
00575         TEUCHOS_TEST_FOR_EXCEPTION(A.numRows() != C.numRows(),
00576                            std::invalid_argument,
00577                            "matMatMult: The input matrix A and the output "
00578                            "matrix C have incompatible dimensions.  A has "
00579                            << A.numRows() << " rows, but C has " << C.numRows()
00580                            << " rows.");
00581         TEUCHOS_TEST_FOR_EXCEPTION(B.numCols() != C.numCols(),
00582                            std::invalid_argument,
00583                            "matMatMult: The input matrix B and the output "
00584                            "matrix C have incompatible dimensions.  B has "
00585                            << B.numCols() << " columns, but C has "
00586                            << C.numCols() << " columns.");
00587         blas_type blas;
00588         blas.GEMM (NO_TRANS, NO_TRANS, C.numRows(), C.numCols(), A.numCols(),
00589                    alpha, A.values(), A.stride(), B.values(), B.stride(),
00590                    beta, C.values(), C.stride());
00591       }
00592 
00600       int
00601       infNaNCount (const mat_type& A, const bool upperTriangular=false) const
00602       {
00603         int count = 0;
00604         for (int j = 0; j < A.numCols(); ++j) {
00605           if (upperTriangular) {
00606             for (int i = 0; i <= j && i < A.numRows(); ++i) {
00607               if (STS::isnaninf (A(i,j))) {
00608                 ++count;
00609               }
00610             }
00611           } else {
00612             for (int i = 0; i < A.numRows(); ++i) {
00613               if (STS::isnaninf (A(i,j))) {
00614                 ++count;
00615               }
00616             }
00617           }
00618         }
00619         return count;
00620       }
00621 
00627       std::pair<bool, std::pair<magnitude_type, magnitude_type> >
00628       isUpperTriangular (const mat_type& A) const
00629       {
00630         magnitude_type lowerTri = STM::zero();
00631         magnitude_type upperTri = STM::zero();
00632         int count = 0;
00633 
00634         for (int j = 0; j < A.numCols(); ++j) {
00635           // Compute the Frobenius norm of the upper triangle /
00636           // trapezoid of A.  The second clause of the loop upper
00637           // bound is for matrices with fewer rows than columns.
00638           for (int i = 0; i <= j && i < A.numRows(); ++i) {
00639             const magnitude_type A_ij_mag = STS::magnitude (A(i,j));
00640             upperTri += A_ij_mag * A_ij_mag;
00641           }
00642           // Scan the strict lower triangle / trapezoid of A.
00643           for (int i = j+1; i < A.numRows(); ++i) {
00644             const magnitude_type A_ij_mag = STS::magnitude (A(i,j));
00645             lowerTri += A_ij_mag * A_ij_mag;
00646             if (A_ij_mag != STM::zero()) {
00647               ++count;
00648             }
00649           }
00650         }
00651         return std::make_pair (count == 0, std::make_pair (lowerTri, upperTri));
00652       }
00653 
00654 
00660       std::pair<bool, std::pair<magnitude_type, magnitude_type> >
00661       isUpperHessenberg (const mat_type& A) const
00662       {
00663         magnitude_type lower = STM::zero();
00664         magnitude_type upper = STM::zero();
00665         int count = 0;
00666 
00667         for (int j = 0; j < A.numCols(); ++j) {
00668           // Compute the Frobenius norm of the upper Hessenberg part
00669           // of A.  The second clause of the loop upper bound is for
00670           // matrices with fewer rows than columns.
00671           for (int i = 0; i <= j+1 && i < A.numRows(); ++i) {
00672             const magnitude_type A_ij_mag = STS::magnitude (A(i,j));
00673             upper += A_ij_mag * A_ij_mag;
00674           }
00675           // Scan the strict lower part of A.
00676           for (int i = j+2; i < A.numRows(); ++i) {
00677             const magnitude_type A_ij_mag = STS::magnitude (A(i,j));
00678             lower += A_ij_mag * A_ij_mag;
00679             if (A_ij_mag != STM::zero()) {
00680               ++count;
00681             }
00682           }
00683         }
00684         return std::make_pair (count == 0, std::make_pair (lower, upper));
00685       }
00686 
00693       void
00694       ensureUpperTriangular (const mat_type& A,
00695                              const char* const matrixName) const
00696       {
00697         std::pair<bool, std::pair<magnitude_type, magnitude_type> > result =
00698           isUpperTriangular (A);
00699 
00700         TEUCHOS_TEST_FOR_EXCEPTION(! result.first, std::invalid_argument,
00701                            "The " << A.numRows() << " x " << A.numCols()
00702                            << " matrix " << matrixName << " is not upper "
00703                            "triangular.  ||tril(A)||_F = "
00704                            << result.second.first << " and ||A||_F = "
00705                            << result.second.second << ".");
00706       }
00707 
00714       void
00715       ensureUpperHessenberg (const mat_type& A,
00716                              const char* const matrixName) const
00717       {
00718         std::pair<bool, std::pair<magnitude_type, magnitude_type> > result =
00719           isUpperHessenberg (A);
00720 
00721         TEUCHOS_TEST_FOR_EXCEPTION(! result.first, std::invalid_argument,
00722                            "The " << A.numRows() << " x " << A.numCols()
00723                            << " matrix " << matrixName << " is not upper "
00724                            "triangular.  ||tril(A(2:end, :))||_F = "
00725                            << result.second.first << " and ||A||_F = "
00726                            << result.second.second << ".");
00727       }
00728 
00744       void
00745       ensureUpperHessenberg (const mat_type& A,
00746                              const char* const matrixName,
00747                              const magnitude_type relativeTolerance) const
00748       {
00749         std::pair<bool, std::pair<magnitude_type, magnitude_type> > result =
00750           isUpperHessenberg (A);
00751 
00752         if (result.first) {
00753           // Mollified relative departure from upper Hessenberg.
00754           const magnitude_type err = (result.second.second == STM::zero() ?
00755                                       result.second.first :
00756                                       result.second.first / result.second.second);
00757           TEUCHOS_TEST_FOR_EXCEPTION(err > relativeTolerance, std::invalid_argument,
00758                              "The " << A.numRows() << " x " << A.numCols()
00759                              << " matrix " << matrixName << " is not upper "
00760                              "triangular.  ||tril(A(2:end, :))||_F "
00761                              << (result.second.second == STM::zero() ? "" : " / ||A||_F")
00762                              << " = " << err << " > " << relativeTolerance << ".");
00763         }
00764       }
00765 
00777       void
00778       ensureMinimumDimensions (const mat_type& A,
00779                                const char* const matrixName,
00780                                const int minNumRows,
00781                                const int minNumCols) const
00782       {
00783         TEUCHOS_TEST_FOR_EXCEPTION(A.numRows() < minNumRows || A.numCols() < minNumCols,
00784                            std::invalid_argument,
00785                            "The matrix " << matrixName << " is " << A.numRows()
00786                            << " x " << A.numCols() << ", and therefore does not "
00787                            "satisfy the minimum dimensions " << minNumRows
00788                            << " x " << minNumCols << ".");
00789       }
00790 
00802       void
00803       ensureEqualDimensions (const mat_type& A,
00804                              const char* const matrixName,
00805                              const int numRows,
00806                              const int numCols) const
00807       {
00808         TEUCHOS_TEST_FOR_EXCEPTION(A.numRows() != numRows || A.numCols() != numCols,
00809                            std::invalid_argument,
00810                            "The matrix " << matrixName << " is supposed to be "
00811                            << numRows << " x " << numCols << ", but is "
00812                            << A.numRows() << " x " << A.numCols() << " instead.");
00813       }
00814 
00815     };
00816 
00835     enum ERobustness {
00836       ROBUSTNESS_NONE,
00837       ROBUSTNESS_SOME,
00838       ROBUSTNESS_LOTS,
00839       ROBUSTNESS_INVALID
00840     };
00841 
00843     std::string
00844     robustnessEnumToString (const ERobustness x)
00845     {
00846       const char* strings[] = {"None", "Some", "Lots"};
00847       TEUCHOS_TEST_FOR_EXCEPTION(x < ROBUSTNESS_NONE || x >= ROBUSTNESS_INVALID,
00848                          std::invalid_argument,
00849                          "Invalid enum value " << x << ".");
00850       return std::string (strings[x]);
00851     }
00852 
00854     ERobustness
00855     robustnessStringToEnum (const std::string& x)
00856     {
00857       const char* strings[] = {"None", "Some", "Lots"};
00858       for (int r = 0; r < static_cast<int> (ROBUSTNESS_INVALID); ++r) {
00859         if (x == strings[r]) {
00860           return static_cast<ERobustness> (r);
00861         }
00862       }
00863       TEUCHOS_TEST_FOR_EXCEPTION(true, std::invalid_argument,
00864                          "Invalid robustness string " << x << ".");
00865     }
00866 
00877     Teuchos::RCP<Teuchos::ParameterEntryValidator>
00878     robustnessValidator ()
00879     {
00880       using Teuchos::stringToIntegralParameterEntryValidator;
00881 
00882       Teuchos::Array<std::string> strs (3);
00883       strs[0] = robustnessEnumToString (ROBUSTNESS_NONE);
00884       strs[1] = robustnessEnumToString (ROBUSTNESS_SOME);
00885       strs[2] = robustnessEnumToString (ROBUSTNESS_LOTS);
00886       Teuchos::Array<std::string> docs (3);
00887       docs[0] = "Use the BLAS' triangular solve.  This may result in Inf or "
00888         "NaN output if the triangular matrix is rank deficient.";
00889       docs[1] = "Robustness somewhere between \"None\" and \"Lots\".";
00890       docs[2] = "Solve the triangular system in a least-squares sense, using "
00891         "an SVD-based algorithm.  This will always succeed, though the "
00892         "solution may not make sense for GMRES.";
00893       Teuchos::Array<ERobustness> ints (3);
00894       ints[0] = ROBUSTNESS_NONE;
00895       ints[1] = ROBUSTNESS_SOME;
00896       ints[2] = ROBUSTNESS_LOTS;
00897       const std::string pname ("Robustness of Projected Least-Squares Solve");
00898 
00899       return stringToIntegralParameterEntryValidator<ERobustness> (strs, docs,
00900                                                                    ints, pname);
00901     }
00902 
00965     template<class Scalar>
00966     class ProjectedLeastSquaresSolver {
00967     public:
00974       typedef Scalar scalar_type;
00979       typedef typename Teuchos::ScalarTraits<Scalar>::magnitudeType magnitude_type;
00982       typedef Teuchos::SerialDenseMatrix<int,Scalar> mat_type;
00983 
00984     private:
00985       typedef Teuchos::ScalarTraits<scalar_type> STS;
00986       typedef Teuchos::ScalarTraits<magnitude_type> STM;
00987       typedef Teuchos::BLAS<int, scalar_type> blas_type;
00988       typedef Teuchos::LAPACK<int, scalar_type> lapack_type;
00989 
00990     public:
01004       ProjectedLeastSquaresSolver (std::ostream& warnStream,
01005                                    const ERobustness defaultRobustness=ROBUSTNESS_NONE) :
01006         warn_ (warnStream),
01007         defaultRobustness_ (defaultRobustness)
01008       {}
01009 
01023       magnitude_type
01024       updateColumn (ProjectedLeastSquaresProblem<Scalar>& problem,
01025                     const int curCol)
01026       {
01027         return updateColumnGivens (problem.H, problem.R, problem.y, problem.z,
01028                                    problem.theCosines, problem.theSines, curCol);
01029       }
01030 
01045       magnitude_type
01046       updateColumns (ProjectedLeastSquaresProblem<Scalar>& problem,
01047                      const int startCol,
01048                      const int endCol)
01049       {
01050         return updateColumnsGivens (problem.H, problem.R, problem.y, problem.z,
01051                                     problem.theCosines, problem.theSines,
01052                                     startCol, endCol);
01053       }
01054 
01067       void
01068       solve (ProjectedLeastSquaresProblem<Scalar>& problem,
01069              const int curCol)
01070       {
01071         solveGivens (problem.y, problem.R, problem.z, curCol);
01072       }
01073 
01078       std::pair<int, bool>
01079       solveUpperTriangularSystem (Teuchos::ESide side,
01080                                   mat_type& X,
01081                                   const mat_type& R,
01082                                   const mat_type& B)
01083       {
01084         return solveUpperTriangularSystem (side, X, R, B, defaultRobustness_);
01085       }
01086 
01125       std::pair<int, bool>
01126       solveUpperTriangularSystem (Teuchos::ESide side,
01127                                   mat_type& X,
01128                                   const mat_type& R,
01129                                   const mat_type& B,
01130                                   const ERobustness robustness)
01131       {
01132         TEUCHOS_TEST_FOR_EXCEPTION(X.numRows() != B.numRows(), std::invalid_argument,
01133                            "The output X and right-hand side B have different "
01134                            "numbers of rows.  X has " << X.numRows() << " rows"
01135                            ", and B has " << B.numRows() << " rows.");
01136         // If B has more columns than X, we ignore the remaining
01137         // columns of B when solving the upper triangular system.  If
01138         // B has _fewer_ columns than X, we can't solve for all the
01139         // columns of X, so we throw an exception.
01140         TEUCHOS_TEST_FOR_EXCEPTION(X.numCols() > B.numCols(), std::invalid_argument,
01141                            "The output X has more columns than the "
01142                            "right-hand side B.  X has " << X.numCols()
01143                            << " columns and B has " << B.numCols()
01144                            << " columns.");
01145         // See above explaining the number of columns in B_view.
01146         mat_type B_view (Teuchos::View, B, B.numRows(), X.numCols());
01147 
01148         // Both the BLAS' _TRSM and LAPACK's _LATRS overwrite the
01149         // right-hand side with the solution, so first copy B_view
01150         // into X.
01151         X.assign (B_view);
01152 
01153         // Solve the upper triangular system.
01154         return solveUpperTriangularSystemInPlace (side, X, R, robustness);
01155       }
01156 
01161       std::pair<int, bool>
01162       solveUpperTriangularSystemInPlace (Teuchos::ESide side,
01163                                          mat_type& X,
01164                                          const mat_type& R)
01165       {
01166         return solveUpperTriangularSystemInPlace (side, X, R, defaultRobustness_);
01167       }
01168 
01176       std::pair<int, bool>
01177       solveUpperTriangularSystemInPlace (Teuchos::ESide side,
01178                                          mat_type& X,
01179                                          const mat_type& R,
01180                                          const ERobustness robustness)
01181       {
01182         using Teuchos::Array;
01183         using Teuchos::Copy;
01184         using Teuchos::LEFT_SIDE;
01185         using Teuchos::RIGHT_SIDE;
01186         LocalDenseMatrixOps<Scalar> ops;
01187 
01188         const int M = R.numRows();
01189         const int N = R.numCols();
01190         TEUCHOS_TEST_FOR_EXCEPTION(M < N, std::invalid_argument,
01191                            "The input matrix R has fewer columns than rows.  "
01192                            "R is " << M << " x " << N << ".");
01193         // Ignore any additional rows of R by working with a square view.
01194         mat_type R_view (Teuchos::View, R, N, N);
01195 
01196         if (side == LEFT_SIDE) {
01197           TEUCHOS_TEST_FOR_EXCEPTION(X.numRows() < N, std::invalid_argument,
01198                              "The input/output matrix X has only "
01199                              << X.numRows() << " rows, but needs at least "
01200                              << N << " rows to match the matrix for a "
01201                              "left-side solve R \\ X.");
01202         } else if (side == RIGHT_SIDE) {
01203           TEUCHOS_TEST_FOR_EXCEPTION(X.numCols() < N, std::invalid_argument,
01204                              "The input/output matrix X has only "
01205                              << X.numCols() << " columns, but needs at least "
01206                              << N << " columns to match the matrix for a "
01207                              "right-side solve X / R.");
01208         }
01209         TEUCHOS_TEST_FOR_EXCEPTION(robustness < ROBUSTNESS_NONE ||
01210                            robustness >= ROBUSTNESS_INVALID,
01211                            std::invalid_argument,
01212                            "Invalid robustness value " << robustness << ".");
01213 
01214         // In robust mode, scan the matrix and right-hand side(s) for
01215         // Infs and NaNs.  Only look at the upper triangle of the
01216         // matrix.
01217         if (robustness > ROBUSTNESS_NONE) {
01218           int count = ops.infNaNCount (R_view, true);
01219           TEUCHOS_TEST_FOR_EXCEPTION(count > 0, std::runtime_error,
01220                              "There " << (count != 1 ? "are" : "is")
01221                              << " " << count << " Inf or NaN entr"
01222                              << (count != 1 ? "ies" : "y")
01223                              << " in the upper triangle of R.");
01224           count = ops.infNaNCount (X, false);
01225           TEUCHOS_TEST_FOR_EXCEPTION(count > 0, std::runtime_error,
01226                              "There " << (count != 1 ? "are" : "is")
01227                              << " " << count << " Inf or NaN entr"
01228                              << (count != 1 ? "ies" : "y") << " in the "
01229                              "right-hand side(s) X.");
01230         }
01231 
01232         // Pair of values to return from this method.
01233         int rank = N;
01234         bool foundRankDeficiency = false;
01235 
01236         // Solve for X.
01237         blas_type blas;
01238 
01239         if (robustness == ROBUSTNESS_NONE) {
01240           // Fast triangular solve using the BLAS' _TRSM.  This does
01241           // no checking for rank deficiency.
01242           blas.TRSM(side, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
01243                     Teuchos::NON_UNIT_DIAG, X.numRows(), X.numCols(),
01244                     STS::one(), R.values(), R.stride(),
01245                     X.values(), X.stride());
01246         } else if (robustness < ROBUSTNESS_INVALID) {
01247           // Save a copy of X, since X contains the right-hand side on
01248           // input.
01249           mat_type B (Copy, X, X.numRows(), X.numCols());
01250 
01251           // Fast triangular solve using the BLAS' _TRSM.  This does
01252           // no checking for rank deficiency.
01253           blas.TRSM(side, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
01254                     Teuchos::NON_UNIT_DIAG, X.numRows(), X.numCols(),
01255                     STS::one(), R.values(), R.stride(),
01256                     X.values(), X.stride());
01257 
01258           // Check for Infs or NaNs in X.  If there are any, then
01259           // assume that TRSM failed, and use a more robust algorithm.
01260           if (ops.infNaNCount (X, false) != 0) {
01261 
01262             warn_ << "Upper triangular solve: Found Infs and/or NaNs in the "
01263               "solution after using the fast algorithm.  Retrying using a more "
01264               "robust algorithm." << std::endl;
01265 
01266             // Restore X from the copy.
01267             X.assign (B);
01268 
01269             // Find the minimum-norm solution to the least-squares
01270             // problem $\min_x \|RX - B\|_2$, using the singular value
01271             // decomposition (SVD).
01272             LocalDenseMatrixOps<Scalar> ops;
01273             if (side == LEFT_SIDE) {
01274               // _GELSS overwrites its matrix input, so make a copy.
01275               mat_type R_copy (Teuchos::Copy, R_view, N, N);
01276 
01277               // Zero out the lower triangle of R_copy, since the
01278               // mat_type constructor copies all the entries, not just
01279               // the upper triangle.  _GELSS will read all the entries
01280               // of the input matrix.
01281               ops.zeroOutStrictLowerTriangle (R_copy);
01282 
01283               // Solve the least-squares problem.
01284               rank = solveLeastSquaresUsingSVD (R_copy, X);
01285             } else {
01286               // If solving with R on the right-hand side, the interface
01287               // requires that instead of solving $\min \|XR - B\|_2$,
01288               // we have to solve $\min \|R^* X^* - B^*\|_2$.  We
01289               // compute (conjugate) transposes in newly allocated
01290               // temporary matrices X_star resp. R_star.  (B is already
01291               // in X and _GELSS overwrites its input vector X with the
01292               // solution.)
01293               mat_type X_star (X.numCols(), X.numRows());
01294               ops.conjugateTranspose (X_star, X);
01295               mat_type R_star (N, N); // Filled with zeros automatically.
01296               ops.conjugateTransposeOfUpperTriangular (R_star, R);
01297 
01298               // Solve the least-squares problem.
01299               rank = solveLeastSquaresUsingSVD (R_star, X_star);
01300 
01301               // Copy the transpose of X_star back into X.
01302               ops.conjugateTranspose (X, X_star);
01303             }
01304             if (rank < N) {
01305               foundRankDeficiency = true;
01306             }
01307           }
01308         } else {
01309           TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
01310                              "Should never get here!  Invalid robustness value "
01311                              << robustness << ".  Please report this bug to the "
01312                              "Belos developers.");
01313         }
01314         return std::make_pair (rank, foundRankDeficiency);
01315       }
01316 
01317 
01318     public:
01327       bool
01328       testGivensRotations (std::ostream& out)
01329       {
01330         using std::endl;
01331 
01332         out << "Testing Givens rotations:" << endl;
01333         Scalar x = STS::random();
01334         Scalar y = STS::random();
01335         out << "  x = " << x << ", y = " << y << endl;
01336 
01337         Scalar theCosine, theSine, result;
01338         blas_type blas;
01339         computeGivensRotation (x, y, theCosine, theSine, result);
01340         out << "-- After computing rotation:" << endl;
01341         out << "---- cos,sin = " << theCosine << "," << theSine << endl;
01342         out << "---- x = " << x << ", y = " << y
01343             << ", result = " << result << endl;
01344 
01345         blas.ROT (1, &x, 1, &y, 1, &theCosine, &theSine);
01346         out << "-- After applying rotation:" << endl;
01347         out << "---- cos,sin = " << theCosine << "," << theSine << endl;
01348         out << "---- x = " << x << ", y = " << y << endl;
01349 
01350         // Allow only a tiny bit of wiggle room for zeroing-out of y.
01351         if (STS::magnitude(y) > 2*STS::eps())
01352           return false;
01353         else
01354           return true;
01355       }
01356 
01386       bool
01387       testUpdateColumn (std::ostream& out,
01388                         const int numCols,
01389                         const bool testBlockGivens=false,
01390                         const bool extraVerbose=false)
01391       {
01392         using Teuchos::Array;
01393         using std::endl;
01394 
01395         TEUCHOS_TEST_FOR_EXCEPTION(numCols <= 0, std::invalid_argument,
01396                            "numCols = " << numCols << " <= 0.");
01397         const int numRows = numCols + 1;
01398 
01399         mat_type H (numRows, numCols);
01400         mat_type z (numRows, 1);
01401 
01402         mat_type R_givens (numRows, numCols);
01403         mat_type y_givens (numRows, 1);
01404         mat_type z_givens (numRows, 1);
01405         Array<Scalar> theCosines (numCols);
01406         Array<Scalar> theSines (numCols);
01407 
01408         mat_type R_blockGivens (numRows, numCols);
01409         mat_type y_blockGivens (numRows, 1);
01410         mat_type z_blockGivens (numRows, 1);
01411         Array<Scalar> blockCosines (numCols);
01412         Array<Scalar> blockSines (numCols);
01413         const int panelWidth = std::min (3, numCols);
01414 
01415         mat_type R_lapack (numRows, numCols);
01416         mat_type y_lapack (numRows, 1);
01417         mat_type z_lapack (numRows, 1);
01418 
01419         // Make a random least-squares problem.
01420         makeRandomProblem (H, z);
01421         if (extraVerbose) {
01422           printMatrix<Scalar> (out, "H", H);
01423           printMatrix<Scalar> (out, "z", z);
01424         }
01425 
01426         // Set up the right-hand side copies for each of the methods.
01427         // Each method is free to overwrite its given right-hand side.
01428         z_givens.assign (z);
01429         if (testBlockGivens) {
01430           z_blockGivens.assign (z);
01431         }
01432         z_lapack.assign (z);
01433 
01434         //
01435         // Imitate how one would update the least-squares problem in a
01436         // typical GMRES implementation, for each updating method.
01437         //
01438         // Update using Givens rotations, one at a time.
01439         magnitude_type residualNormGivens = STM::zero();
01440         for (int curCol = 0; curCol < numCols; ++curCol) {
01441           residualNormGivens = updateColumnGivens (H, R_givens, y_givens, z_givens,
01442                                                    theCosines, theSines, curCol);
01443         }
01444         solveGivens (y_givens, R_givens, z_givens, numCols-1);
01445 
01446         // Update using the "panel left-looking" Givens approach, with
01447         // the given panel width.
01448         magnitude_type residualNormBlockGivens = STM::zero();
01449         if (testBlockGivens) {
01450           const bool testBlocksAtATime = true;
01451           if (testBlocksAtATime) {
01452             // Blocks of columns at a time.
01453             for (int startCol = 0; startCol < numCols; startCol += panelWidth) {
01454               int endCol = std::min (startCol + panelWidth - 1, numCols - 1);
01455               residualNormBlockGivens =
01456                 updateColumnsGivens (H, R_blockGivens, y_blockGivens, z_blockGivens,
01457                                      blockCosines, blockSines, startCol, endCol);
01458             }
01459           } else {
01460             // One column at a time.  This is good as a sanity check
01461             // to make sure updateColumnsGivens() with a single column
01462             // does the same thing as updateColumnGivens().
01463             for (int startCol = 0; startCol < numCols; ++startCol) {
01464               residualNormBlockGivens =
01465                 updateColumnsGivens (H, R_blockGivens, y_blockGivens, z_blockGivens,
01466                                      blockCosines, blockSines, startCol, startCol);
01467             }
01468           }
01469           // The panel version of Givens should compute the same
01470           // cosines and sines as the non-panel version, and should
01471           // update the right-hand side z in the same way.  Thus, we
01472           // should be able to use the same triangular solver.
01473           solveGivens (y_blockGivens, R_blockGivens, z_blockGivens, numCols-1);
01474         }
01475 
01476         // Solve using LAPACK's least-squares solver.
01477         const magnitude_type residualNormLapack =
01478           solveLapack (H, R_lapack, y_lapack, z_lapack, numCols-1);
01479 
01480         // Compute the condition number of the least-squares problem.
01481         // This requires a residual, so use the residual from the
01482         // LAPACK method.  All that the method needs for an accurate
01483         // residual norm is forward stability.
01484         const magnitude_type leastSquaresCondNum =
01485           leastSquaresConditionNumber (H, z, residualNormLapack);
01486 
01487         // Compute the relative least-squares solution error for both
01488         // Givens methods.  We assume that the LAPACK solution is
01489         // "exact" and compare against the Givens rotations solution.
01490         // This is taking liberties with the definition of condition
01491         // number, but it's the best we can do, since we don't know
01492         // the exact solution and don't have an extended-precision
01493         // solver.
01494 
01495         // The solution lives only in y[0 .. numCols-1].
01496         mat_type y_givens_view (Teuchos::View, y_givens, numCols, 1);
01497         mat_type y_blockGivens_view (Teuchos::View, y_blockGivens, numCols, 1);
01498         mat_type y_lapack_view (Teuchos::View, y_lapack, numCols, 1);
01499 
01500         const magnitude_type givensSolutionError =
01501           solutionError (y_givens_view, y_lapack_view);
01502         const magnitude_type blockGivensSolutionError = testBlockGivens ?
01503           solutionError (y_blockGivens_view, y_lapack_view) :
01504           STM::zero();
01505 
01506         // If printing out the matrices, copy out the upper triangular
01507         // factors for printing.  (Both methods are free to leave data
01508         // below the lower triangle.)
01509         if (extraVerbose) {
01510           mat_type R_factorFromGivens (numCols, numCols);
01511           mat_type R_factorFromBlockGivens (numCols, numCols);
01512           mat_type R_factorFromLapack (numCols, numCols);
01513 
01514           for (int j = 0; j < numCols; ++j) {
01515             for (int i = 0; i <= j; ++i) {
01516               R_factorFromGivens(i,j) = R_givens(i,j);
01517               if (testBlockGivens) {
01518                 R_factorFromBlockGivens(i,j) = R_blockGivens(i,j);
01519               }
01520               R_factorFromLapack(i,j) = R_lapack(i,j);
01521             }
01522           }
01523 
01524           printMatrix<Scalar> (out, "R_givens", R_factorFromGivens);
01525           printMatrix<Scalar> (out, "y_givens", y_givens_view);
01526           printMatrix<Scalar> (out, "z_givens", z_givens);
01527 
01528           if (testBlockGivens) {
01529             printMatrix<Scalar> (out, "R_blockGivens", R_factorFromBlockGivens);
01530             printMatrix<Scalar> (out, "y_blockGivens", y_blockGivens_view);
01531             printMatrix<Scalar> (out, "z_blockGivens", z_blockGivens);
01532           }
01533 
01534           printMatrix<Scalar> (out, "R_lapack", R_factorFromLapack);
01535           printMatrix<Scalar> (out, "y_lapack", y_lapack_view);
01536           printMatrix<Scalar> (out, "z_lapack", z_lapack);
01537         }
01538 
01539         // Compute the (Frobenius) norm of the original matrix H.
01540         const magnitude_type H_norm = H.normFrobenius();
01541 
01542         out << "||H||_F = " << H_norm << endl;
01543 
01544         out << "||H y_givens - z||_2 / ||H||_F = "
01545             << leastSquaresResidualNorm (H, y_givens_view, z) / H_norm << endl;
01546         if (testBlockGivens) {
01547           out << "||H y_blockGivens - z||_2 / ||H||_F = "
01548               << leastSquaresResidualNorm (H, y_blockGivens_view, z) / H_norm << endl;
01549         }
01550         out << "||H y_lapack - z||_2 / ||H||_F = "
01551             << leastSquaresResidualNorm (H, y_lapack_view, z) / H_norm << endl;
01552 
01553         out << "||y_givens - y_lapack||_2 / ||y_lapack||_2 = "
01554             << givensSolutionError << endl;
01555         if (testBlockGivens) {
01556           out << "||y_blockGivens - y_lapack||_2 / ||y_lapack||_2 = "
01557               << blockGivensSolutionError << endl;
01558         }
01559 
01560         out << "Least-squares condition number = "
01561             << leastSquaresCondNum << endl;
01562 
01563         // Now for the controversial part of the test: judging whether
01564         // we succeeded.  This includes the problem's condition
01565         // number, which is a measure of the maximum perturbation in
01566         // the solution for which we can still say that the solution
01567         // is valid.  We include a little wiggle room by including a
01568         // factor proportional to the square root of the number of
01569         // floating-point operations that influence the last entry
01570         // (the conventional Wilkinsonian heuristic), times 10 for
01571         // good measure.
01572         //
01573         // (The square root looks like it has something to do with an
01574         // average-case probabilistic argument, but doesn't really.
01575         // What's an "average problem"?)
01576         const magnitude_type wiggleFactor =
01577           10 * STM::squareroot( numRows*numCols );
01578         const magnitude_type solutionErrorBoundFactor =
01579           wiggleFactor * leastSquaresCondNum;
01580         const magnitude_type solutionErrorBound =
01581           solutionErrorBoundFactor * STS::eps();
01582         out << "Solution error bound: " << solutionErrorBoundFactor
01583             << " * eps = " << solutionErrorBound << endl;
01584 
01585         // Remember that NaN is not greater than, not less than, and
01586         // not equal to any other number, including itself.  Some
01587         // compilers will rudely optimize away the "x != x" test.
01588         if (STM::isnaninf (solutionErrorBound)) {
01589           // Hm, the solution error bound is Inf or NaN.  This
01590           // probably means that the test problem was generated
01591           // incorrectly.  We should return false in this case.
01592           return false;
01593         } else { // solution error bound is finite.
01594           if (STM::isnaninf (givensSolutionError)) {
01595             return false;
01596           } else if (givensSolutionError > solutionErrorBound) {
01597             return false;
01598           } else if (testBlockGivens) {
01599             if (STM::isnaninf (blockGivensSolutionError)) {
01600               return false;
01601             } else if (blockGivensSolutionError > solutionErrorBound) {
01602               return false;
01603             } else { // Givens and Block Givens tests succeeded.
01604               return true;
01605             }
01606           } else { // Not testing block Givens; Givens test succeeded.
01607             return true;
01608           }
01609         }
01610       }
01611 
01627       bool
01628       testTriangularSolves (std::ostream& out,
01629                             const int testProblemSize,
01630                             const ERobustness robustness,
01631                             const bool verbose=false)
01632       {
01633         using Teuchos::LEFT_SIDE;
01634         using Teuchos::RIGHT_SIDE;
01635         using std::endl;
01636         typedef Teuchos::SerialDenseMatrix<int, scalar_type> mat_type;
01637 
01638         Teuchos::oblackholestream blackHole;
01639         std::ostream& verboseOut = verbose ? out : blackHole;
01640 
01641         verboseOut << "Testing upper triangular solves" << endl;
01642         //
01643         // Construct an upper triangular linear system to solve.
01644         //
01645         verboseOut << "-- Generating test matrix" << endl;
01646         const int N = testProblemSize;
01647         mat_type R (N, N);
01648         // Fill the upper triangle of R with random numbers.
01649         for (int j = 0; j < N; ++j) {
01650           for (int i = 0; i <= j; ++i) {
01651             R(i,j) = STS::random ();
01652           }
01653         }
01654         // Compute the Frobenius norm of R for later use.
01655         const magnitude_type R_norm = R.normFrobenius ();
01656         // Fill the right-hand side B with random numbers.
01657         mat_type B (N, 1);
01658         B.random ();
01659         // Compute the Frobenius norm of B for later use.
01660         const magnitude_type B_norm = B.normFrobenius ();
01661 
01662         // Save a copy of the original upper triangular system.
01663         mat_type R_copy (Teuchos::Copy, R, N, N);
01664         mat_type B_copy (Teuchos::Copy, B, N, 1);
01665 
01666         // Solution vector.
01667         mat_type X (N, 1);
01668 
01669         // Solve RX = B.
01670         verboseOut << "-- Solving RX=B" << endl;
01671         // We're ignoring the return values for now.
01672         (void) solveUpperTriangularSystem (LEFT_SIDE, X, R, B, robustness);
01673         // Test the residual error.
01674         mat_type Resid (N, 1);
01675         Resid.assign (B_copy);
01676         Belos::details::LocalDenseMatrixOps<scalar_type> ops;
01677         ops.matMatMult (STS::one(), Resid, -STS::one(), R_copy, X);
01678         verboseOut << "---- ||R*X - B||_F = " << Resid.normFrobenius() << endl;
01679         verboseOut << "---- ||R||_F ||X||_F + ||B||_F = "
01680                    << (R_norm * X.normFrobenius() + B_norm)
01681                    << endl;
01682 
01683         // Restore R and B.
01684         R.assign (R_copy);
01685         B.assign (B_copy);
01686 
01687         //
01688         // Set up a right-side test problem: YR = B^*.
01689         //
01690         mat_type Y (1, N);
01691         mat_type B_star (1, N);
01692         ops.conjugateTranspose (B_star, B);
01693         mat_type B_star_copy (1, N);
01694         B_star_copy.assign (B_star);
01695         // Solve YR = B^*.
01696         verboseOut << "-- Solving YR=B^*" << endl;
01697         // We're ignoring the return values for now.
01698         (void) solveUpperTriangularSystem (RIGHT_SIDE, Y, R, B_star, robustness);
01699         // Test the residual error.
01700         mat_type Resid2 (1, N);
01701         Resid2.assign (B_star_copy);
01702         ops.matMatMult (STS::one(), Resid2, -STS::one(), Y, R_copy);
01703         verboseOut << "---- ||Y*R - B^*||_F = " << Resid2.normFrobenius() << endl;
01704         verboseOut << "---- ||Y||_F ||R||_F + ||B^*||_F = "
01705                    << (Y.normFrobenius() * R_norm + B_norm)
01706                    << endl;
01707 
01708         // FIXME (mfh 14 Oct 2011) The test always "passes" for now;
01709         // you have to inspect the output in order to see whether it
01710         // succeeded.  We really should fix the above to use the
01711         // infinity-norm bounds in Higham's book for triangular
01712         // solves.  That would automate the test.
01713         return true;
01714       }
01715 
01716     private:
01718       std::ostream& warn_;
01719 
01724       ERobustness defaultRobustness_;
01725 
01726     private:
01738       int
01739       solveLeastSquaresUsingSVD (mat_type& A, mat_type& X)
01740       {
01741         using Teuchos::Array;
01742         LocalDenseMatrixOps<Scalar> ops;
01743 
01744         if (defaultRobustness_ > ROBUSTNESS_SOME) {
01745           int count = ops.infNaNCount (A);
01746           TEUCHOS_TEST_FOR_EXCEPTION(count != 0, std::invalid_argument,
01747                              "solveLeastSquaresUsingSVD: The input matrix A "
01748                              "contains " << count << "Inf and/or NaN entr"
01749                              << (count != 1 ? "ies" : "y") << ".");
01750           count = ops.infNaNCount (X);
01751           TEUCHOS_TEST_FOR_EXCEPTION(count != 0, std::invalid_argument,
01752                              "solveLeastSquaresUsingSVD: The input matrix X "
01753                              "contains " << count << "Inf and/or NaN entr"
01754                              << (count != 1 ? "ies" : "y") << ".");
01755         }
01756         const int N = std::min (A.numRows(), A.numCols());
01757         lapack_type lapack;
01758 
01759         // Rank of A; to be computed by _GELSS and returned.
01760         int rank = N;
01761 
01762         // Use Scalar's machine precision for the rank tolerance,
01763         // not magnitude_type's machine precision.
01764         const magnitude_type rankTolerance = STS::eps();
01765 
01766         // Array of singular values.
01767         Array<magnitude_type> singularValues (N);
01768 
01769         // Extra workspace.  This is only used by _GELSS if Scalar is
01770         // complex.  Teuchos::LAPACK presents a unified interface to
01771         // _GELSS that always includes the RWORK argument, even though
01772         // LAPACK's SGELSS and DGELSS don't have the RWORK argument.
01773         // We always allocate at least one entry so that &rwork[0]
01774         // makes sense.
01775         Array<magnitude_type> rwork (1);
01776         if (STS::isComplex) {
01777           rwork.resize (std::max (1, 5 * N));
01778         }
01779         //
01780         // Workspace query
01781         //
01782         Scalar lworkScalar = STS::one(); // To be set by workspace query
01783         int info = 0;
01784         lapack.GELSS (A.numRows(), A.numCols(), X.numCols(),
01785                       A.values(), A.stride(), X.values(), X.stride(),
01786                       &singularValues[0], rankTolerance, &rank,
01787                       &lworkScalar, -1, &rwork[0], &info);
01788         TEUCHOS_TEST_FOR_EXCEPTION(info != 0, std::logic_error,
01789                            "_GELSS workspace query returned INFO = "
01790                            << info << " != 0.");
01791         const int lwork = static_cast<int> (STS::real (lworkScalar));
01792         TEUCHOS_TEST_FOR_EXCEPTION(lwork < 0, std::logic_error,
01793                            "_GELSS workspace query returned LWORK = "
01794                            << lwork << " < 0.");
01795         // Allocate workspace.  Size > 0 means &work[0] makes sense.
01796         Array<Scalar> work (std::max (1, lwork));
01797         // Solve the least-squares problem.
01798         lapack.GELSS (A.numRows(), A.numCols(), X.numCols(),
01799                       A.values(), A.stride(), X.values(), X.stride(),
01800                       &singularValues[0], rankTolerance, &rank,
01801                       &work[0], lwork, &rwork[0], &info);
01802         TEUCHOS_TEST_FOR_EXCEPTION(info != 0, std::runtime_error,
01803                            "_GELSS returned INFO = " << info << " != 0.");
01804         return rank;
01805       }
01806 
01813       void
01814       solveGivens (mat_type& y,
01815                    mat_type& R,
01816                    const mat_type& z,
01817                    const int curCol)
01818       {
01819         const int numRows = curCol + 2;
01820 
01821         // Now that we have the updated R factor of H, and the updated
01822         // right-hand side z, solve the least-squares problem by
01823         // solving the upper triangular linear system Ry=z for y.
01824         const mat_type R_view (Teuchos::View, R, numRows-1, numRows-1);
01825         const mat_type z_view (Teuchos::View, z, numRows-1, z.numCols());
01826         mat_type y_view (Teuchos::View, y, numRows-1, y.numCols());
01827 
01828         (void) solveUpperTriangularSystem (Teuchos::LEFT_SIDE, y_view,
01829                                            R_view, z_view, defaultRobustness_);
01830       }
01831 
01833       void
01834       makeRandomProblem (mat_type& H, mat_type& z)
01835       {
01836         // In GMRES, z always starts out with only the first entry
01837         // being nonzero.  That entry always has nonnegative real part
01838         // and zero imaginary part, since it is the initial residual
01839         // norm.
01840         H.random ();
01841         // Zero out the entries below the subdiagonal of H, so that it
01842         // is upper Hessenberg.
01843         for (int j = 0; j < H.numCols(); ++j) {
01844           for (int i = j+2; i < H.numRows(); ++i) {
01845             H(i,j) = STS::zero();
01846           }
01847         }
01848         // Initialize z, the right-hand side of the least-squares
01849         // problem.  Make the first entry of z nonzero.
01850         {
01851           // It's still possible that a random number will come up
01852           // zero after 1000 trials, but unlikely.  Nevertheless, it's
01853           // still important not to allow an infinite loop, for
01854           // example if the pseudorandom number generator is broken
01855           // and always returns zero.
01856           const int numTrials = 1000;
01857           magnitude_type z_init = STM::zero();
01858           for (int trial = 0; trial < numTrials && z_init == STM::zero(); ++trial) {
01859             z_init = STM::random();
01860           }
01861           TEUCHOS_TEST_FOR_EXCEPTION(z_init == STM::zero(), std::runtime_error,
01862                              "After " << numTrials << " trial"
01863                              << (numTrials != 1 ? "s" : "")
01864                              << ", we were unable to generate a nonzero pseudo"
01865                              "random real number.  This most likely indicates a "
01866                              "broken pseudorandom number generator.");
01867           const magnitude_type z_first = (z_init < 0) ? -z_init : z_init;
01868 
01869           // NOTE I'm assuming here that "scalar_type = magnitude_type"
01870           // assignments make sense.
01871           z(0,0) = z_first;
01872         }
01873       }
01874 
01878       void
01879       computeGivensRotation (const Scalar& x,
01880                              const Scalar& y,
01881                              Scalar& theCosine,
01882                              Scalar& theSine,
01883                              Scalar& result)
01884       {
01885         // _LARTG, an LAPACK aux routine, is slower but more accurate
01886         // than the BLAS' _ROTG.
01887         const bool useLartg = false;
01888 
01889         if (useLartg) {
01890           lapack_type lapack;
01891           // _LARTG doesn't clobber its input arguments x and y.
01892           lapack.LARTG (x, y, &theCosine, &theSine, &result);
01893         } else {
01894           // _ROTG clobbers its first two arguments.  x is overwritten
01895           // with the result of applying the Givens rotation: [x; y] ->
01896           // [x (on output); 0].  y is overwritten with the "fast"
01897           // Givens transform (see Golub and Van Loan, 3rd ed.).
01898           Scalar x_temp = x;
01899           Scalar y_temp = y;
01900           blas_type blas;
01901           blas.ROTG (&x_temp, &y_temp, &theCosine, &theSine);
01902           result = x_temp;
01903         }
01904       }
01905 
01907       void
01908       singularValues (const mat_type& A,
01909                       Teuchos::ArrayView<magnitude_type> sigmas)
01910       {
01911         using Teuchos::Array;
01912         using Teuchos::ArrayView;
01913 
01914         const int numRows = A.numRows();
01915         const int numCols = A.numCols();
01916         TEUCHOS_TEST_FOR_EXCEPTION(sigmas.size() < std::min (numRows, numCols),
01917                            std::invalid_argument,
01918                            "The sigmas array is only of length " << sigmas.size()
01919                            << ", but must be of length at least "
01920                            << std::min (numRows, numCols)
01921                            << " in order to hold all the singular values of the "
01922                            "matrix A.");
01923 
01924         // Compute the condition number of the matrix A, using a singular
01925         // value decomposition (SVD).  LAPACK's SVD routine overwrites the
01926         // input matrix, so make a copy.
01927         mat_type A_copy (numRows, numCols);
01928         A_copy.assign (A);
01929 
01930         // Workspace query.
01931         lapack_type lapack;
01932         int info = 0;
01933         Scalar lworkScalar = STS::zero();
01934         Array<magnitude_type> rwork (std::max (std::min (numRows, numCols) - 1, 1));
01935         lapack.GESVD ('N', 'N', numRows, numCols,
01936                       A_copy.values(), A_copy.stride(), &sigmas[0],
01937                       (Scalar*) NULL, 1, (Scalar*) NULL, 1,
01938                       &lworkScalar, -1, &rwork[0], &info);
01939 
01940         TEUCHOS_TEST_FOR_EXCEPTION(info != 0, std::logic_error,
01941                            "LAPACK _GESVD workspace query failed with INFO = "
01942                            << info << ".");
01943         const int lwork = static_cast<int> (STS::real (lworkScalar));
01944         TEUCHOS_TEST_FOR_EXCEPTION(lwork < 0, std::logic_error,
01945                            "LAPACK _GESVD workspace query returned LWORK = "
01946                            << lwork << " < 0.");
01947         // Make sure that the workspace array always has positive
01948         // length, so that &work[0] makes sense.
01949         Teuchos::Array<Scalar> work (std::max (1, lwork));
01950 
01951         // Compute the singular values of A.
01952         lapack.GESVD ('N', 'N', numRows, numCols,
01953                       A_copy.values(), A_copy.stride(), &sigmas[0],
01954                       (Scalar*) NULL, 1, (Scalar*) NULL, 1,
01955                       &work[0], lwork, &rwork[0], &info);
01956         TEUCHOS_TEST_FOR_EXCEPTION(info != 0, std::logic_error,
01957                            "LAPACK _GESVD failed with INFO = " << info << ".");
01958       }
01959 
01967       std::pair<magnitude_type, magnitude_type>
01968       extremeSingularValues (const mat_type& A)
01969       {
01970         using Teuchos::Array;
01971 
01972         const int numRows = A.numRows();
01973         const int numCols = A.numCols();
01974 
01975         Array<magnitude_type> sigmas (std::min (numRows, numCols));
01976         singularValues (A, sigmas);
01977         return std::make_pair (sigmas[0], sigmas[std::min(numRows, numCols) - 1]);
01978       }
01979 
01990       magnitude_type
01991       leastSquaresConditionNumber (const mat_type& A,
01992                                    const mat_type& b,
01993                                    const magnitude_type& residualNorm)
01994       {
01995         // Extreme singular values of A.
01996         const std::pair<magnitude_type, magnitude_type> sigmaMaxMin =
01997           extremeSingularValues (A);
01998 
01999         // Our solvers currently assume that H has full rank.  If the
02000         // test matrix doesn't have full rank, we stop right away.
02001         TEUCHOS_TEST_FOR_EXCEPTION(sigmaMaxMin.second == STM::zero(), std::runtime_error,
02002                            "The test matrix is rank deficient; LAPACK's _GESVD "
02003                            "routine reports that its smallest singular value is "
02004                            "zero.");
02005         // 2-norm condition number of A.  We checked above that the
02006         // denominator is nonzero.
02007         const magnitude_type A_cond = sigmaMaxMin.first / sigmaMaxMin.second;
02008 
02009         // "Theta" in the variable names below refers to the angle between
02010         // the vectors b and A*x, where x is the computed solution.  It
02011         // measures whether the residual norm is large (near ||b||) or
02012         // small (near 0).
02013         const magnitude_type sinTheta = residualNorm / b.normFrobenius();
02014 
02015         // \sin^2 \theta + \cos^2 \theta = 1.
02016         //
02017         // The range of sine is [-1,1], so squaring it won't overflow.
02018         // We still have to check whether sinTheta > 1, though.  This
02019         // is impossible in exact arithmetic, assuming that the
02020         // least-squares solver worked (b-A*0 = b and x minimizes
02021         // ||b-A*x||_2, so ||b-A*0||_2 >= ||b-A*x||_2).  However, it
02022         // might just be possible in floating-point arithmetic.  We're
02023         // just looking for an estimate, so if sinTheta > 1, we cap it
02024         // at 1.
02025         const magnitude_type cosTheta = (sinTheta > STM::one()) ?
02026           STM::zero() : STM::squareroot (1 - sinTheta * sinTheta);
02027 
02028         // This may result in Inf, if cosTheta is zero.  That's OK; in
02029         // that case, the condition number of the (full-rank)
02030         // least-squares problem is rightfully infinite.
02031         const magnitude_type tanTheta = sinTheta / cosTheta;
02032 
02033         // Condition number for the full-rank least-squares problem.
02034         return 2 * A_cond / cosTheta + tanTheta * A_cond * A_cond;
02035       }
02036 
02038       magnitude_type
02039       leastSquaresResidualNorm (const mat_type& A,
02040                                 const mat_type& x,
02041                                 const mat_type& b)
02042       {
02043         mat_type r (b.numRows(), b.numCols());
02044 
02045         // r := b - A*x
02046         r.assign (b);
02047         LocalDenseMatrixOps<Scalar> ops;
02048         ops.matMatMult (STS::one(), r, -STS::one(), A, x);
02049         return r.normFrobenius ();
02050       }
02051 
02056       magnitude_type
02057       solutionError (const mat_type& x_approx,
02058                      const mat_type& x_exact)
02059       {
02060         const int numRows = x_exact.numRows();
02061         const int numCols = x_exact.numCols();
02062 
02063         mat_type x_diff (numRows, numCols);
02064         for (int j = 0; j < numCols; ++j) {
02065           for (int i = 0; i < numRows; ++i) {
02066             x_diff(i,j) = x_exact(i,j) - x_approx(i,j);
02067           }
02068         }
02069         const magnitude_type scalingFactor = x_exact.normFrobenius();
02070 
02071         // If x_exact has zero norm, just use the absolute difference.
02072         return x_diff.normFrobenius() /
02073           (scalingFactor == STM::zero() ? STM::one() : scalingFactor);
02074       }
02075 
02122       magnitude_type
02123       updateColumnGivens (const mat_type& H,
02124                           mat_type& R,
02125                           mat_type& y,
02126                           mat_type& z,
02127                           Teuchos::ArrayView<scalar_type> theCosines,
02128                           Teuchos::ArrayView<scalar_type> theSines,
02129                           const int curCol)
02130       {
02131         using std::cerr;
02132         using std::endl;
02133 
02134         const int numRows = curCol + 2; // curCol is zero-based
02135         const int LDR = R.stride();
02136         const bool extraDebug = false;
02137 
02138         if (extraDebug) {
02139           cerr << "updateColumnGivens: curCol = " << curCol << endl;
02140         }
02141 
02142         // View of H( 1:curCol+1, curCol ) (in Matlab notation, if
02143         // curCol were a one-based index, as it would be in Matlab but
02144         // is not here).
02145         const mat_type H_col (Teuchos::View, H, numRows, 1, 0, curCol);
02146 
02147         // View of R( 1:curCol+1, curCol ) (again, in Matlab notation,
02148         // if curCol were a one-based index).
02149         mat_type R_col (Teuchos::View, R, numRows, 1, 0, curCol);
02150 
02151         // 1. Copy the current column from H into R, where it will be
02152         //    modified.
02153         R_col.assign (H_col);
02154 
02155         if (extraDebug) {
02156           printMatrix<Scalar> (cerr, "R_col before ", R_col);
02157         }
02158 
02159         // 2. Apply all the previous Givens rotations, if any, to the
02160         //    current column of the matrix.
02161         blas_type blas;
02162         for (int j = 0; j < curCol; ++j) {
02163           // BLAS::ROT really should take "const Scalar*" for these
02164           // arguments, but it wants a "Scalar*" instead, alas.
02165           Scalar theCosine = theCosines[j];
02166           Scalar theSine = theSines[j];
02167 
02168           if (extraDebug) {
02169             cerr << "  j = " << j << ": (cos,sin) = "
02170                  << theCosines[j] << "," << theSines[j] << endl;
02171           }
02172           blas.ROT (1, &R_col(j,0), LDR, &R_col(j+1,0), LDR,
02173                     &theCosine, &theSine);
02174         }
02175         if (extraDebug && curCol > 0) {
02176           printMatrix<Scalar> (cerr, "R_col after applying previous "
02177                                "Givens rotations", R_col);
02178         }
02179 
02180         // 3. Calculate new Givens rotation for R(curCol, curCol),
02181         //    R(curCol+1, curCol).
02182         Scalar theCosine, theSine, result;
02183         computeGivensRotation (R_col(curCol,0), R_col(curCol+1,0),
02184                                theCosine, theSine, result);
02185         theCosines[curCol] = theCosine;
02186         theSines[curCol] = theSine;
02187 
02188         if (extraDebug) {
02189           cerr << "  New cos,sin = " << theCosine << "," << theSine << endl;
02190         }
02191 
02192         // 4. _Apply_ the new Givens rotation.  We don't need to
02193         //    invoke _ROT here, because computeGivensRotation()
02194         //    already gives us the result: [x; y] -> [result; 0].
02195         R_col(curCol, 0) = result;
02196         R_col(curCol+1, 0) = STS::zero();
02197 
02198         if (extraDebug) {
02199           printMatrix<Scalar> (cerr, "R_col after applying current "
02200                                "Givens rotation", R_col);
02201         }
02202 
02203         // 5. Apply the resulting Givens rotation to z (the right-hand
02204         //    side of the projected least-squares problem).
02205         //
02206         // We prefer overgeneralization to undergeneralization by assuming
02207         // here that z may have more than one column.
02208         const int LDZ = z.stride();
02209         blas.ROT (z.numCols(),
02210                   &z(curCol,0), LDZ, &z(curCol+1,0), LDZ,
02211                   &theCosine, &theSine);
02212 
02213         if (extraDebug) {
02214           //mat_type R_after (Teuchos::View, R, numRows, numRows-1);
02215           //printMatrix<Scalar> (cerr, "R_after", R_after);
02216           //mat_type z_after (Teuchos::View, z, numRows, z.numCols());
02217           printMatrix<Scalar> (cerr, "z_after", z);
02218         }
02219 
02220         // The last entry of z is the nonzero part of the residual of the
02221         // least-squares problem.  Its magnitude gives the residual 2-norm
02222         // of the least-squares problem.
02223         return STS::magnitude( z(numRows-1, 0) );
02224       }
02225 
02259       magnitude_type
02260       solveLapack (const mat_type& H,
02261                    mat_type& R,
02262                    mat_type& y,
02263                    mat_type& z,
02264                    const int curCol)
02265       {
02266         const int numRows = curCol + 2;
02267         const int numCols = curCol + 1;
02268         const int LDR = R.stride();
02269 
02270         // Copy H( 1:curCol+1, 1:curCol ) into R( 1:curCol+1, 1:curCol ).
02271         const mat_type H_view (Teuchos::View, H, numRows, numCols);
02272         mat_type R_view (Teuchos::View, R, numRows, numCols);
02273         R_view.assign (H_view);
02274 
02275         // The LAPACK least-squares solver overwrites the right-hand side
02276         // vector with the solution, so first copy z into y.
02277         mat_type y_view (Teuchos::View, y, numRows, y.numCols());
02278         mat_type z_view (Teuchos::View, z, numRows, y.numCols());
02279         y_view.assign (z_view);
02280 
02281         // Workspace query for the least-squares routine.
02282         int info = 0;
02283         Scalar lworkScalar = STS::zero();
02284         lapack_type lapack;
02285         lapack.GELS ('N', numRows, numCols, y_view.numCols(),
02286                      NULL, LDR, NULL, y_view.stride(),
02287                      &lworkScalar, -1, &info);
02288         TEUCHOS_TEST_FOR_EXCEPTION(info != 0, std::logic_error,
02289                            "LAPACK _GELS workspace query failed with INFO = "
02290                            << info << ", for a " << numRows << " x " << numCols
02291                            << " matrix with " << y_view.numCols()
02292                            << " right hand side"
02293                            << ((y_view.numCols() != 1) ? "s" : "") << ".");
02294         TEUCHOS_TEST_FOR_EXCEPTION(STS::real(lworkScalar) < STM::zero(),
02295                            std::logic_error,
02296                            "LAPACK _GELS workspace query returned an LWORK with "
02297                            "negative real part: LWORK = " << lworkScalar
02298                            << ".  That should never happen.  Please report this "
02299                            "to the Belos developers.");
02300         TEUCHOS_TEST_FOR_EXCEPTION(STS::isComplex && STS::imag(lworkScalar) != STM::zero(),
02301                            std::logic_error,
02302                            "LAPACK _GELS workspace query returned an LWORK with "
02303                            "nonzero imaginary part: LWORK = " << lworkScalar
02304                            << ".  That should never happen.  Please report this "
02305                            "to the Belos developers.");
02306         // Cast workspace from Scalar to int.  Scalar may be complex,
02307         // hence the request for the real part.  Don't ask for the
02308         // magnitude, since computing the magnitude may overflow due
02309         // to squaring and square root to int.  Hopefully LAPACK
02310         // doesn't ever overflow int this way.
02311         const int lwork = std::max (1, static_cast<int> (STS::real (lworkScalar)));
02312 
02313         // Allocate workspace for solving the least-squares problem.
02314         Teuchos::Array<Scalar> work (lwork);
02315 
02316         // Solve the least-squares problem.  The ?: operator prevents
02317         // accessing the first element of the work array, if it has
02318         // length zero.
02319         lapack.GELS ('N', numRows, numCols, y_view.numCols(),
02320                      R_view.values(), R_view.stride(),
02321                      y_view.values(), y_view.stride(),
02322                      (lwork > 0 ? &work[0] : (Scalar*) NULL),
02323                      lwork, &info);
02324 
02325         TEUCHOS_TEST_FOR_EXCEPTION(info != 0, std::logic_error,
02326                            "Solving projected least-squares problem with LAPACK "
02327                            "_GELS failed with INFO = " << info << ", for a "
02328                            << numRows << " x " << numCols << " matrix with "
02329                            << y_view.numCols() << " right hand side"
02330                            << (y_view.numCols() != 1 ? "s" : "") << ".");
02331         // Extract the projected least-squares problem's residual error.
02332         // It's the magnitude of the last entry of y_view on output from
02333         // LAPACK's least-squares solver.
02334         return STS::magnitude( y_view(numRows-1, 0) );
02335       }
02336 
02347       magnitude_type
02348       updateColumnsGivens (const mat_type& H,
02349                            mat_type& R,
02350                            mat_type& y,
02351                            mat_type& z,
02352                            Teuchos::ArrayView<scalar_type> theCosines,
02353                            Teuchos::ArrayView<scalar_type> theSines,
02354                            const int startCol,
02355                            const int endCol)
02356       {
02357         TEUCHOS_TEST_FOR_EXCEPTION(startCol > endCol, std::invalid_argument,
02358                            "updateColumnGivens: startCol = " << startCol
02359                            << " > endCol = " << endCol << ".");
02360         magnitude_type lastResult = STM::zero();
02361         // [startCol, endCol] is an inclusive range.
02362         for (int curCol = startCol; curCol <= endCol; ++curCol) {
02363           lastResult = updateColumnGivens (H, R, y, z, theCosines, theSines, curCol);
02364         }
02365         return lastResult;
02366       }
02367 
02380       magnitude_type
02381       updateColumnsGivensBlock (const mat_type& H,
02382                                 mat_type& R,
02383                                 mat_type& y,
02384                                 mat_type& z,
02385                                 Teuchos::ArrayView<scalar_type> theCosines,
02386                                 Teuchos::ArrayView<scalar_type> theSines,
02387                                 const int startCol,
02388                                 const int endCol)
02389       {
02390         const int numRows = endCol + 2;
02391         const int numColsToUpdate = endCol - startCol + 1;
02392         const int LDR = R.stride();
02393 
02394         // 1. Copy columns [startCol, endCol] from H into R, where they
02395         //    will be modified.
02396         {
02397           const mat_type H_view (Teuchos::View, H, numRows, numColsToUpdate, 0, startCol);
02398           mat_type R_view (Teuchos::View, R, numRows, numColsToUpdate, 0, startCol);
02399           R_view.assign (H_view);
02400         }
02401 
02402         // 2. Apply all the previous Givens rotations, if any, to
02403         //    columns [startCol, endCol] of the matrix.  (Remember
02404         //    that we're using a left-looking QR factorization
02405         //    approach; we haven't yet touched those columns.)
02406         blas_type blas;
02407         for (int j = 0; j < startCol; ++j) {
02408           blas.ROT (numColsToUpdate,
02409                     &R(j, startCol), LDR, &R(j+1, startCol), LDR,
02410                     &theCosines[j], &theSines[j]);
02411         }
02412 
02413         // 3. Update each column in turn of columns [startCol, endCol].
02414         for (int curCol = startCol; curCol < endCol; ++curCol) {
02415           // a. Apply the Givens rotations computed in previous
02416           //    iterations of this loop to the current column of R.
02417           for (int j = startCol; j < curCol; ++j) {
02418             blas.ROT (1, &R(j, curCol), LDR, &R(j+1, curCol), LDR,
02419                       &theCosines[j], &theSines[j]);
02420           }
02421           // b. Calculate new Givens rotation for R(curCol, curCol),
02422           //    R(curCol+1, curCol).
02423           Scalar theCosine, theSine, result;
02424           computeGivensRotation (R(curCol, curCol), R(curCol+1, curCol),
02425                                  theCosine, theSine, result);
02426           theCosines[curCol] = theCosine;
02427           theSines[curCol] = theSine;
02428 
02429           // c. _Apply_ the new Givens rotation.  We don't need to
02430           //    invoke _ROT here, because computeGivensRotation()
02431           //    already gives us the result: [x; y] -> [result; 0].
02432           R(curCol+1, curCol) = result;
02433           R(curCol+1, curCol) = STS::zero();
02434 
02435           // d. Apply the resulting Givens rotation to z (the right-hand
02436           //    side of the projected least-squares problem).
02437           //
02438           // We prefer overgeneralization to undergeneralization by
02439           // assuming here that z may have more than one column.
02440           const int LDZ = z.stride();
02441           blas.ROT (z.numCols(),
02442                     &z(curCol,0), LDZ, &z(curCol+1,0), LDZ,
02443                     &theCosine, &theSine);
02444         }
02445 
02446         // The last entry of z is the nonzero part of the residual of the
02447         // least-squares problem.  Its magnitude gives the residual 2-norm
02448         // of the least-squares problem.
02449         return STS::magnitude( z(numRows-1, 0) );
02450       }
02451     }; // class ProjectedLeastSquaresSolver
02452   } // namespace details
02453 } // namespace Belos
02454 
02455 #endif // __Belos_ProjectedLeastSquaresSolver_hpp
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines