Belos Package Browser (Single Doxygen Collection) Development
BelosProjectedLeastSquaresSolver.hpp
Go to the documentation of this file.
00001 //@HEADER
00002 // ************************************************************************
00003 //
00004 //                 Belos: Block Linear Solvers Package
00005 //                  Copyright 2004 Sandia Corporation
00006 //
00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00008 // the U.S. Government retains certain rights in this software.
00009 //
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00038 //
00039 // ************************************************************************
00040 //@HEADER
00041 
00042 #ifndef __Belos_ProjectedLeastSquaresSolver_hpp
00043 #define __Belos_ProjectedLeastSquaresSolver_hpp
00044 
00045 #include "BelosConfigDefs.hpp"
00046 #include "BelosTypes.hpp"
00047 #include "Teuchos_Array.hpp"
00048 #include "Teuchos_BLAS.hpp"
00049 #include "Teuchos_LAPACK.hpp"
00050 #include "Teuchos_oblackholestream.hpp"
00051 #include "Teuchos_ScalarTraits.hpp"
00052 #include "Teuchos_SerialDenseMatrix.hpp"
00053 #include "Teuchos_StandardParameterEntryValidators.hpp"
00054 
00058 
00059 namespace Belos {
00060 
00069   namespace details {
00070 
00071     // Anonymous namespace restricts contents to file scope.
00072     namespace {
00075       template<class Scalar>
00076       void 
00077       printMatrix (std::ostream& out, 
00078        const std::string& name,
00079        const Teuchos::SerialDenseMatrix<int, Scalar>& A)
00080       {
00081   using std::endl;
00082 
00083         const int numRows = A.numRows();
00084   const int numCols = A.numCols();
00085 
00086   out << name << " = " << endl << '[';
00087   if (numCols == 1) {
00088     // Compact form for column vectors; valid Matlab.
00089     for (int i = 0; i < numRows; ++i) {
00090       out << A(i,0);
00091       if (i < numRows-1) {
00092         out << "; ";
00093       } 
00094     }
00095   } else {
00096     for (int i = 0; i < numRows; ++i) {
00097       for (int j = 0; j < numCols; ++j) {
00098         out << A(i,j);
00099         if (j < numCols-1) {
00100     out << ", ";
00101         } else if (i < numRows-1) {
00102     out << ';' << endl;
00103         }
00104       }
00105     }
00106   }
00107   out << ']' << endl;
00108       }
00109 
00112       template<class Scalar>
00113       void 
00114       print (std::ostream& out, 
00115        const Teuchos::SerialDenseMatrix<int, Scalar>& A,
00116        const std::string& linePrefix)
00117       {
00118   using std::endl;
00119 
00120         const int numRows = A.numRows();
00121   const int numCols = A.numCols();
00122 
00123   out << linePrefix << '[';
00124   if (numCols == 1) {
00125     // Compact form for column vectors; valid Matlab.
00126     for (int i = 0; i < numRows; ++i) {
00127       out << A(i,0);
00128       if (i < numRows-1) {
00129         out << "; ";
00130       } 
00131     }
00132   } else {
00133     for (int i = 0; i < numRows; ++i) {
00134       for (int j = 0; j < numCols; ++j) {
00135         if (numRows > 1) {
00136     out << linePrefix << "  ";
00137         }
00138         out << A(i,j);
00139         if (j < numCols-1) {
00140     out << ", ";
00141         } else if (i < numRows-1) {
00142     out << ';' << endl;
00143         }
00144       }
00145     }
00146   }
00147   out << linePrefix << ']' << endl;
00148       }
00149     } // namespace (anonymous)
00150 
00157     template<class Scalar>
00158     class ProjectedLeastSquaresProblem {
00159     public:
00162       typedef Scalar scalar_type;
00165       typedef typename Teuchos::ScalarTraits<Scalar>::magnitudeType magnitude_type;
00166 
00174       Teuchos::SerialDenseMatrix<int,Scalar> H;
00175 
00187       Teuchos::SerialDenseMatrix<int,Scalar> R;
00188 
00199       Teuchos::SerialDenseMatrix<int,Scalar> y;
00200 
00209       Teuchos::SerialDenseMatrix<int,Scalar> z;
00210 
00220       Teuchos::Array<Scalar> theCosines;
00221 
00226       Teuchos::Array<Scalar> theSines;
00227       
00236       ProjectedLeastSquaresProblem (const int maxNumIterations) :
00237   H (maxNumIterations+1, maxNumIterations),
00238   R (maxNumIterations+1, maxNumIterations),
00239   y (maxNumIterations+1, 1),
00240   z (maxNumIterations+1, 1),
00241   theCosines (maxNumIterations+1),
00242   theSines (maxNumIterations+1)
00243       {}
00244 
00263       void 
00264       reset (const typename Teuchos::ScalarTraits<Scalar>::magnitudeType beta)
00265       {
00266   typedef Teuchos::ScalarTraits<Scalar> STS;
00267 
00268   // Zero out the right-hand side of the least-squares problem.
00269   z.putScalar (STS::zero());
00270 
00271   // Promote the initial residual norm from a magnitude type to
00272   // a scalar type, so we can assign it to the first entry of z.
00273   const Scalar initialResidualNorm (beta);
00274   z(0,0) = initialResidualNorm;
00275       }
00276 
00290       void 
00291       reallocateAndReset (const typename Teuchos::ScalarTraits<Scalar>::magnitudeType beta,
00292         const int maxNumIterations) 
00293       {
00294   typedef Teuchos::ScalarTraits<Scalar> STS;
00295   typedef Teuchos::ScalarTraits<magnitude_type> STM;
00296 
00297   TEUCHOS_TEST_FOR_EXCEPTION(beta < STM::zero(), std::invalid_argument,
00298          "ProjectedLeastSquaresProblem::reset: initial "
00299          "residual beta = " << beta << " < 0.");
00300   TEUCHOS_TEST_FOR_EXCEPTION(maxNumIterations <= 0, std::invalid_argument,
00301          "ProjectedLeastSquaresProblem::reset: maximum number "
00302          "of iterations " << maxNumIterations << " <= 0.");
00303 
00304   if (H.numRows() < maxNumIterations+1 || H.numCols() < maxNumIterations) {
00305     const int errcode = H.reshape (maxNumIterations+1, maxNumIterations);
00306     TEUCHOS_TEST_FOR_EXCEPTION(errcode != 0, std::runtime_error, 
00307            "Failed to reshape H into a " << (maxNumIterations+1) 
00308            << " x " << maxNumIterations << " matrix.");
00309   }
00310   (void) H.putScalar (STS::zero());
00311 
00312   if (R.numRows() < maxNumIterations+1 || R.numCols() < maxNumIterations) {
00313     const int errcode = R.reshape (maxNumIterations+1, maxNumIterations);
00314     TEUCHOS_TEST_FOR_EXCEPTION(errcode != 0, std::runtime_error, 
00315            "Failed to reshape R into a " << (maxNumIterations+1) 
00316            << " x " << maxNumIterations << " matrix.");
00317   }
00318   (void) R.putScalar (STS::zero());
00319 
00320   if (y.numRows() < maxNumIterations+1 || y.numCols() < 1) {
00321     const int errcode = y.reshape (maxNumIterations+1, 1);
00322     TEUCHOS_TEST_FOR_EXCEPTION(errcode != 0, std::runtime_error, 
00323            "Failed to reshape y into a " << (maxNumIterations+1) 
00324            << " x " << 1 << " matrix.");
00325   }
00326   (void) y.putScalar (STS::zero());
00327 
00328   if (z.numRows() < maxNumIterations+1 || z.numCols() < 1) {
00329     const int errcode = z.reshape (maxNumIterations+1, 1);
00330     TEUCHOS_TEST_FOR_EXCEPTION(errcode != 0, std::runtime_error, 
00331            "Failed to reshape z into a " << (maxNumIterations+1) 
00332            << " x " << 1 << " matrix.");
00333   }
00334   reset (beta);
00335       }
00336 
00337     };
00338 
00339 
00347     template<class Scalar>
00348     class LocalDenseMatrixOps {
00349     public:
00352       typedef Scalar scalar_type;
00355       typedef typename Teuchos::ScalarTraits<Scalar>::magnitudeType magnitude_type;
00358       typedef Teuchos::SerialDenseMatrix<int,Scalar> mat_type;
00359 
00360     private:
00361       typedef Teuchos::ScalarTraits<scalar_type> STS;
00362       typedef Teuchos::ScalarTraits<magnitude_type> STM;
00363       typedef Teuchos::BLAS<int, scalar_type> blas_type;
00364       typedef Teuchos::LAPACK<int, scalar_type> lapack_type;
00365       
00366     public:
00368       void
00369       conjugateTranspose (mat_type& A_star, const mat_type& A) const
00370       {
00371   for (int i = 0; i < A.numRows(); ++i) {
00372     for (int j = 0; j < A.numCols(); ++j) {
00373       A_star(j,i) = STS::conjugate (A(i,j));
00374     }
00375   }
00376       }
00377 
00379       void
00380       conjugateTransposeOfUpperTriangular (mat_type& L, const mat_type& R) const
00381       {
00382   const int N = R.numCols();
00383 
00384   for (int j = 0; j < N; ++j) {
00385     for (int i = 0; i <= j; ++i) {
00386       L(j,i) = STS::conjugate (R(i,j));
00387     }
00388   }
00389       }
00390 
00392       void
00393       zeroOutStrictLowerTriangle (mat_type& A) const
00394       {
00395   const int N = std::min (A.numRows(), A.numCols());
00396 
00397   for (int j = 0; j < N; ++j) {
00398     for (int i = j+1; i < A.numRows(); ++i) {
00399       A(i,j) = STS::zero();
00400     }
00401   }
00402       }
00403 
00412       void
00413       partition (Teuchos::RCP<mat_type>& A_11, 
00414      Teuchos::RCP<mat_type>& A_21,
00415      Teuchos::RCP<mat_type>& A_12,
00416      Teuchos::RCP<mat_type>& A_22,
00417      mat_type& A, 
00418      const int numRows1, 
00419      const int numRows2, 
00420      const int numCols1, 
00421      const int numCols2)
00422       {
00423   using Teuchos::rcp;
00424   using Teuchos::View;
00425 
00426   A_11 = rcp (new mat_type (View, A, numRows1, numCols1, 0, 0));
00427   A_21 = rcp (new mat_type (View, A, numRows2, numCols1, numRows1, 0));
00428   A_12 = rcp (new mat_type (View, A, numRows1, numCols2, 0, numCols1));
00429   A_22 = rcp (new mat_type (View, A, numRows2, numCols2, numRows1, numCols1));
00430       }
00431 
00433       void
00434       matScale (mat_type& A, const scalar_type& alpha) const
00435       {
00436   const int LDA = A.stride();
00437   const int numRows = A.numRows();
00438   const int numCols = A.numCols();
00439 
00440   if (numRows == 0 || numCols == 0) {
00441     return;
00442   } else {
00443     for (int j = 0; j < numCols; ++j) {
00444       scalar_type* const A_j = &A(0,j);
00445 
00446       for (int i = 0; i < numRows; ++i) {
00447         A_j[i] *= alpha;
00448       }
00449     }
00450   }
00451       }
00452 
00457       void
00458       axpy (mat_type& Y, 
00459       const scalar_type& alpha, 
00460       const mat_type& X) const
00461       {
00462   const int numRows = Y.numRows();
00463   const int numCols = Y.numCols();
00464 
00465   TEUCHOS_TEST_FOR_EXCEPTION(numRows != X.numRows() || numCols != X.numCols(), 
00466          std::invalid_argument, "Dimensions of X and Y don't "
00467          "match.  X is " << X.numRows() << " x " << X.numCols() 
00468          << ", and Y is " << numRows << " x " << numCols << ".");
00469   for (int j = 0; j < numCols; ++j) {
00470     for (int i = 0; i < numRows; ++i) {
00471       Y(i,j) += alpha * X(i,j);
00472     }
00473   }
00474       }
00475 
00477       void 
00478       matAdd (mat_type& A, const mat_type& B) const
00479       {
00480   const int LDA = A.stride();
00481   const int LDB = B.stride();
00482   const int numRows = A.numRows();
00483   const int numCols = A.numCols();
00484 
00485   TEUCHOS_TEST_FOR_EXCEPTION(B.numRows() != numRows || B.numCols() != numCols,
00486          std::invalid_argument,
00487          "matAdd: The input matrices A and B have "
00488          "incompatible dimensions.  A is " << numRows 
00489          << " x " << numCols << ", but B is " << B.numRows()
00490          << " x " << B.numCols() << ".");
00491   if (numRows == 0 || numCols == 0) {
00492     return;
00493   } else {
00494     for (int j = 0; j < numCols; ++j) {
00495       scalar_type* const A_j = &A(0,j);
00496       const scalar_type* const B_j = &B(0,j);
00497 
00498       for (int i = 0; i < numRows; ++i) {
00499         A_j[i] += B_j[i];
00500       }
00501     }
00502   }
00503       }
00504 
00506       void 
00507       matSub (mat_type& A, const mat_type& B) const
00508       {
00509   const int LDA = A.stride();
00510   const int LDB = B.stride();
00511   const int numRows = A.numRows();
00512   const int numCols = A.numCols();
00513 
00514   TEUCHOS_TEST_FOR_EXCEPTION(B.numRows() != numRows || B.numCols() != numCols,
00515          std::invalid_argument,
00516          "matSub: The input matrices A and B have "
00517          "incompatible dimensions.  A is " << numRows 
00518          << " x " << numCols << ", but B is " << B.numRows()
00519          << " x " << B.numCols() << ".");
00520   if (numRows == 0 || numCols == 0) {
00521     return;
00522   } else {
00523     for (int j = 0; j < numCols; ++j) {
00524       scalar_type* const A_j = &A(0,j);
00525       const scalar_type* const B_j = &B(0,j);
00526 
00527       for (int i = 0; i < numRows; ++i) {
00528         A_j[i] -= B_j[i];
00529       }
00530     }
00531   }
00532       }
00533 
00538       void
00539       rightUpperTriSolve (mat_type& B,
00540         const mat_type& R) const
00541       {
00542   TEUCHOS_TEST_FOR_EXCEPTION(B.numCols() != R.numRows(),
00543          std::invalid_argument,
00544          "rightUpperTriSolve: R and B have incompatible "
00545          "dimensions.  B has " << B.numCols() << " columns, "
00546          "but R has " << R.numRows() << " rows.");
00547   blas_type blas;
00548   blas.TRSM (Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, 
00549        Teuchos::NO_TRANS, Teuchos::NON_UNIT_DIAG, 
00550        R.numCols(), B.numCols(), 
00551        STS::one(), R.values(), R.stride(), 
00552        B.values(), B.stride());
00553       }
00554 
00560       void
00561       matMatMult (const scalar_type& beta,
00562       mat_type& C, 
00563       const scalar_type& alpha,
00564       const mat_type& A, 
00565       const mat_type& B) const
00566       {
00567   using Teuchos::NO_TRANS;
00568 
00569   TEUCHOS_TEST_FOR_EXCEPTION(A.numCols() != B.numRows(),
00570          std::invalid_argument,
00571          "matMatMult: The input matrices A and B have "
00572          "incompatible dimensions.  A is " << A.numRows() 
00573          << " x " << A.numCols() << ", but B is " 
00574          << B.numRows() << " x " << B.numCols() << ".");
00575   TEUCHOS_TEST_FOR_EXCEPTION(A.numRows() != C.numRows(),
00576          std::invalid_argument,
00577          "matMatMult: The input matrix A and the output "
00578          "matrix C have incompatible dimensions.  A has " 
00579          << A.numRows() << " rows, but C has " << C.numRows()
00580          << " rows.");
00581   TEUCHOS_TEST_FOR_EXCEPTION(B.numCols() != C.numCols(),
00582          std::invalid_argument,
00583          "matMatMult: The input matrix B and the output "
00584          "matrix C have incompatible dimensions.  B has " 
00585          << B.numCols() << " columns, but C has " 
00586          << C.numCols() << " columns.");
00587   blas_type blas;
00588   blas.GEMM (NO_TRANS, NO_TRANS, C.numRows(), C.numCols(), A.numCols(), 
00589        alpha, A.values(), A.stride(), B.values(), B.stride(),
00590        beta, C.values(), C.stride());
00591       }
00592 
00600       int 
00601       infNaNCount (const mat_type& A, const bool upperTriangular=false) const
00602       {
00603   int count = 0;
00604   for (int j = 0; j < A.numCols(); ++j) {
00605     if (upperTriangular) {
00606       for (int i = 0; i <= j && i < A.numRows(); ++i) {
00607         if (STS::isnaninf (A(i,j))) {
00608     ++count;
00609         }
00610       }
00611     } else {
00612       for (int i = 0; i < A.numRows(); ++i) {
00613         if (STS::isnaninf (A(i,j))) {
00614     ++count;
00615         }
00616       }
00617     }
00618   }
00619   return count;
00620       }
00621 
00627       std::pair<bool, std::pair<magnitude_type, magnitude_type> >
00628       isUpperTriangular (const mat_type& A) const
00629       {
00630   magnitude_type lowerTri = STM::zero();
00631   magnitude_type upperTri = STM::zero();
00632   int count = 0;
00633 
00634   for (int j = 0; j < A.numCols(); ++j) {
00635     // Compute the Frobenius norm of the upper triangle /
00636     // trapezoid of A.  The second clause of the loop upper
00637     // bound is for matrices with fewer rows than columns.
00638     for (int i = 0; i <= j && i < A.numRows(); ++i) {
00639       const magnitude_type A_ij_mag = STS::magnitude (A(i,j));
00640       upperTri += A_ij_mag * A_ij_mag;
00641     }
00642     // Scan the strict lower triangle / trapezoid of A.
00643     for (int i = j+1; i < A.numRows(); ++i) {
00644       const magnitude_type A_ij_mag = STS::magnitude (A(i,j));
00645       lowerTri += A_ij_mag * A_ij_mag;
00646       if (A_ij_mag != STM::zero()) {
00647         ++count;
00648       }
00649     }
00650   }
00651   return std::make_pair (count == 0, std::make_pair (lowerTri, upperTri));
00652       }
00653 
00654 
00660       std::pair<bool, std::pair<magnitude_type, magnitude_type> >
00661       isUpperHessenberg (const mat_type& A) const
00662       {
00663   magnitude_type lower = STM::zero();
00664   magnitude_type upper = STM::zero();
00665   int count = 0;
00666 
00667   for (int j = 0; j < A.numCols(); ++j) {
00668     // Compute the Frobenius norm of the upper Hessenberg part
00669     // of A.  The second clause of the loop upper bound is for
00670     // matrices with fewer rows than columns.
00671     for (int i = 0; i <= j+1 && i < A.numRows(); ++i) {
00672       const magnitude_type A_ij_mag = STS::magnitude (A(i,j));
00673       upper += A_ij_mag * A_ij_mag;
00674     }
00675     // Scan the strict lower part of A.
00676     for (int i = j+2; i < A.numRows(); ++i) {
00677       const magnitude_type A_ij_mag = STS::magnitude (A(i,j));
00678       lower += A_ij_mag * A_ij_mag;
00679       if (A_ij_mag != STM::zero()) {
00680         ++count;
00681       }
00682     }
00683   }
00684   return std::make_pair (count == 0, std::make_pair (lower, upper));
00685       }
00686 
00693       void
00694       ensureUpperTriangular (const mat_type& A,
00695            const char* const matrixName) const
00696       {
00697   std::pair<bool, std::pair<magnitude_type, magnitude_type> > result = 
00698     isUpperTriangular (A);
00699 
00700   TEUCHOS_TEST_FOR_EXCEPTION(! result.first, std::invalid_argument,
00701          "The " << A.numRows() << " x " << A.numCols() 
00702          << " matrix " << matrixName << " is not upper "
00703          "triangular.  ||tril(A)||_F = " 
00704          << result.second.first << " and ||A||_F = " 
00705          << result.second.second << ".");
00706       }
00707 
00714       void
00715       ensureUpperHessenberg (const mat_type& A,
00716            const char* const matrixName) const
00717       {
00718   std::pair<bool, std::pair<magnitude_type, magnitude_type> > result = 
00719     isUpperHessenberg (A);
00720 
00721   TEUCHOS_TEST_FOR_EXCEPTION(! result.first, std::invalid_argument,
00722          "The " << A.numRows() << " x " << A.numCols() 
00723          << " matrix " << matrixName << " is not upper "
00724          "triangular.  ||tril(A(2:end, :))||_F = " 
00725          << result.second.first << " and ||A||_F = " 
00726          << result.second.second << ".");
00727       }
00728 
00744       void
00745       ensureUpperHessenberg (const mat_type& A,
00746            const char* const matrixName,
00747            const magnitude_type relativeTolerance) const
00748       {
00749   std::pair<bool, std::pair<magnitude_type, magnitude_type> > result = 
00750     isUpperHessenberg (A);
00751 
00752   if (result.first) {
00753     // Mollified relative departure from upper Hessenberg.
00754     const magnitude_type err = (result.second.second == STM::zero() ? 
00755               result.second.first : 
00756               result.second.first / result.second.second);
00757     TEUCHOS_TEST_FOR_EXCEPTION(err > relativeTolerance, std::invalid_argument,
00758            "The " << A.numRows() << " x " << A.numCols() 
00759            << " matrix " << matrixName << " is not upper "
00760            "triangular.  ||tril(A(2:end, :))||_F "
00761            << (result.second.second == STM::zero() ? "" : " / ||A||_F")
00762            << " = " << err << " > " << relativeTolerance << ".");
00763   }
00764       }
00765 
00777       void
00778       ensureMinimumDimensions (const mat_type& A, 
00779              const char* const matrixName,
00780              const int minNumRows, 
00781              const int minNumCols) const
00782       {
00783   TEUCHOS_TEST_FOR_EXCEPTION(A.numRows() < minNumRows || A.numCols() < minNumCols,
00784          std::invalid_argument,
00785          "The matrix " << matrixName << " is " << A.numRows() 
00786          << " x " << A.numCols() << ", and therefore does not "
00787          "satisfy the minimum dimensions " << minNumRows 
00788          << " x " << minNumCols << ".");
00789       }
00790 
00802       void
00803       ensureEqualDimensions (const mat_type& A, 
00804            const char* const matrixName,
00805            const int numRows, 
00806            const int numCols) const
00807       {
00808   TEUCHOS_TEST_FOR_EXCEPTION(A.numRows() != numRows || A.numCols() != numCols,
00809          std::invalid_argument,
00810          "The matrix " << matrixName << " is supposed to be " 
00811          << numRows << " x " << numCols << ", but is " 
00812          << A.numRows() << " x " << A.numCols() << " instead.");
00813       }
00814 
00815     };
00816 
00835     enum ERobustness {
00836       ROBUSTNESS_NONE,
00837       ROBUSTNESS_SOME,
00838       ROBUSTNESS_LOTS,
00839       ROBUSTNESS_INVALID
00840     };
00841 
00843     std::string
00844     robustnessEnumToString (const ERobustness x) 
00845     {
00846       const char* strings[] = {"None", "Some", "Lots"};
00847       TEUCHOS_TEST_FOR_EXCEPTION(x < ROBUSTNESS_NONE || x >= ROBUSTNESS_INVALID,
00848        std::invalid_argument,
00849        "Invalid enum value " << x << ".");
00850       return std::string (strings[x]);
00851     }
00852 
00854     ERobustness
00855     robustnessStringToEnum (const std::string& x)
00856     {
00857       const char* strings[] = {"None", "Some", "Lots"};
00858       for (int r = 0; r < static_cast<int> (ROBUSTNESS_INVALID); ++r) {
00859   if (x == strings[r]) {
00860     return static_cast<ERobustness> (r);
00861   }
00862       }
00863       TEUCHOS_TEST_FOR_EXCEPTION(true, std::invalid_argument,
00864        "Invalid robustness string " << x << ".");
00865     }
00866 
00877     Teuchos::RCP<Teuchos::ParameterEntryValidator>
00878     robustnessValidator ()
00879     {
00880       using Teuchos::stringToIntegralParameterEntryValidator;
00881 
00882       Teuchos::Array<std::string> strs (3);
00883       strs[0] = robustnessEnumToString (ROBUSTNESS_NONE);
00884       strs[1] = robustnessEnumToString (ROBUSTNESS_SOME);
00885       strs[2] = robustnessEnumToString (ROBUSTNESS_LOTS);
00886       Teuchos::Array<std::string> docs (3);
00887       docs[0] = "Use the BLAS' triangular solve.  This may result in Inf or "
00888   "NaN output if the triangular matrix is rank deficient.";
00889       docs[1] = "Robustness somewhere between \"None\" and \"Lots\".";
00890       docs[2] = "Solve the triangular system in a least-squares sense, using "
00891   "an SVD-based algorithm.  This will always succeed, though the "
00892   "solution may not make sense for GMRES.";
00893       Teuchos::Array<ERobustness> ints (3);
00894       ints[0] = ROBUSTNESS_NONE;
00895       ints[1] = ROBUSTNESS_SOME;
00896       ints[2] = ROBUSTNESS_LOTS;
00897       const std::string pname ("Robustness of Projected Least-Squares Solve");
00898 
00899       return stringToIntegralParameterEntryValidator<ERobustness> (strs, docs,
00900                    ints, pname);
00901     }
00902     
00965     template<class Scalar>
00966     class ProjectedLeastSquaresSolver {
00967     public:
00974       typedef Scalar scalar_type;
00979       typedef typename Teuchos::ScalarTraits<Scalar>::magnitudeType magnitude_type;
00982       typedef Teuchos::SerialDenseMatrix<int,Scalar> mat_type;
00983 
00984     private:
00985       typedef Teuchos::ScalarTraits<scalar_type> STS;
00986       typedef Teuchos::ScalarTraits<magnitude_type> STM;
00987       typedef Teuchos::BLAS<int, scalar_type> blas_type;
00988       typedef Teuchos::LAPACK<int, scalar_type> lapack_type;
00989 
00990     public:
01004       ProjectedLeastSquaresSolver (std::ostream& warnStream,
01005            const ERobustness defaultRobustness=ROBUSTNESS_NONE) :
01006   warn_ (warnStream),
01007   defaultRobustness_ (defaultRobustness) 
01008       {}
01009 
01023       magnitude_type
01024       updateColumn (ProjectedLeastSquaresProblem<Scalar>& problem,
01025         const int curCol) 
01026       {
01027   return updateColumnGivens (problem.H, problem.R, problem.y, problem.z,
01028            problem.theCosines, problem.theSines, curCol);
01029       }
01030 
01045       magnitude_type
01046       updateColumns (ProjectedLeastSquaresProblem<Scalar>& problem,
01047          const int startCol,
01048          const int endCol) 
01049       {
01050   return updateColumnsGivens (problem.H, problem.R, problem.y, problem.z,
01051             problem.theCosines, problem.theSines, 
01052             startCol, endCol);
01053       }
01054 
01067       void
01068       solve (ProjectedLeastSquaresProblem<Scalar>& problem,
01069        const int curCol) 
01070       {
01071   solveGivens (problem.y, problem.R, problem.z, curCol);
01072       }
01073 
01078       std::pair<int, bool>
01079       solveUpperTriangularSystem (Teuchos::ESide side,
01080           mat_type& X,
01081           const mat_type& R,
01082           const mat_type& B)
01083       {
01084   return solveUpperTriangularSystem (side, X, R, B, defaultRobustness_);
01085       }
01086 
01125       std::pair<int, bool>
01126       solveUpperTriangularSystem (Teuchos::ESide side,
01127           mat_type& X,
01128           const mat_type& R,
01129           const mat_type& B,
01130           const ERobustness robustness)
01131       {
01132   TEUCHOS_TEST_FOR_EXCEPTION(X.numRows() != B.numRows(), std::invalid_argument,
01133          "The output X and right-hand side B have different "
01134          "numbers of rows.  X has " << X.numRows() << " rows"
01135          ", and B has " << B.numRows() << " rows.");
01136   // If B has more columns than X, we ignore the remaining
01137   // columns of B when solving the upper triangular system.  If
01138   // B has _fewer_ columns than X, we can't solve for all the
01139   // columns of X, so we throw an exception.
01140   TEUCHOS_TEST_FOR_EXCEPTION(X.numCols() > B.numCols(), std::invalid_argument,
01141          "The output X has more columns than the "
01142          "right-hand side B.  X has " << X.numCols() 
01143          << " columns and B has " << B.numCols() 
01144          << " columns.");
01145   // See above explaining the number of columns in B_view.
01146   mat_type B_view (Teuchos::View, B, B.numRows(), X.numCols());
01147 
01148   // Both the BLAS' _TRSM and LAPACK's _LATRS overwrite the
01149   // right-hand side with the solution, so first copy B_view
01150   // into X.
01151   X.assign (B_view);
01152 
01153   // Solve the upper triangular system.
01154   return solveUpperTriangularSystemInPlace (side, X, R, robustness);
01155       }
01156 
01161       std::pair<int, bool>
01162       solveUpperTriangularSystemInPlace (Teuchos::ESide side,
01163            mat_type& X,
01164            const mat_type& R)
01165       {
01166   return solveUpperTriangularSystemInPlace (side, X, R, defaultRobustness_);
01167       }
01168 
01176       std::pair<int, bool>
01177       solveUpperTriangularSystemInPlace (Teuchos::ESide side,
01178            mat_type& X,
01179            const mat_type& R,
01180            const ERobustness robustness)
01181       {
01182   using Teuchos::Array;
01183   using Teuchos::Copy;
01184   using Teuchos::LEFT_SIDE;
01185   using Teuchos::RIGHT_SIDE;
01186   LocalDenseMatrixOps<Scalar> ops;
01187 
01188   const int M = R.numRows();
01189   const int N = R.numCols();
01190   TEUCHOS_TEST_FOR_EXCEPTION(M < N, std::invalid_argument, 
01191          "The input matrix R has fewer columns than rows.  "
01192          "R is " << M << " x " << N << ".");
01193   // Ignore any additional rows of R by working with a square view.
01194   mat_type R_view (Teuchos::View, R, N, N);
01195 
01196   if (side == LEFT_SIDE) {
01197     TEUCHOS_TEST_FOR_EXCEPTION(X.numRows() < N, std::invalid_argument,
01198            "The input/output matrix X has only "
01199            << X.numRows() << " rows, but needs at least " 
01200            << N << " rows to match the matrix for a "
01201            "left-side solve R \\ X.");
01202   } else if (side == RIGHT_SIDE) {
01203     TEUCHOS_TEST_FOR_EXCEPTION(X.numCols() < N, std::invalid_argument,
01204            "The input/output matrix X has only "
01205            << X.numCols() << " columns, but needs at least " 
01206            << N << " columns to match the matrix for a "
01207            "right-side solve X / R.");
01208   }
01209   TEUCHOS_TEST_FOR_EXCEPTION(robustness < ROBUSTNESS_NONE || 
01210          robustness >= ROBUSTNESS_INVALID, 
01211          std::invalid_argument,
01212          "Invalid robustness value " << robustness << ".");
01213 
01214   // In robust mode, scan the matrix and right-hand side(s) for
01215   // Infs and NaNs.  Only look at the upper triangle of the
01216   // matrix.
01217   if (robustness > ROBUSTNESS_NONE) {
01218     int count = ops.infNaNCount (R_view, true);
01219     TEUCHOS_TEST_FOR_EXCEPTION(count > 0, std::runtime_error,
01220            "There " << (count != 1 ? "are" : "is")
01221            << " " << count << " Inf or NaN entr"
01222            << (count != 1 ? "ies" : "y") 
01223            << " in the upper triangle of R.");
01224     count = ops.infNaNCount (X, false);
01225     TEUCHOS_TEST_FOR_EXCEPTION(count > 0, std::runtime_error,
01226            "There " << (count != 1 ? "are" : "is")
01227            << " " << count << " Inf or NaN entr"
01228            << (count != 1 ? "ies" : "y") << " in the "
01229            "right-hand side(s) X.");
01230   }
01231 
01232   // Pair of values to return from this method.
01233   int rank = N;
01234   bool foundRankDeficiency = false;
01235 
01236   // Solve for X.
01237   blas_type blas;
01238 
01239   if (robustness == ROBUSTNESS_NONE) {
01240     // Fast triangular solve using the BLAS' _TRSM.  This does
01241     // no checking for rank deficiency.
01242     blas.TRSM(side, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
01243         Teuchos::NON_UNIT_DIAG, X.numRows(), X.numCols(), 
01244         STS::one(), R.values(), R.stride(), 
01245         X.values(), X.stride());
01246   } else if (robustness < ROBUSTNESS_INVALID) {
01247     // Save a copy of X, since X contains the right-hand side on
01248     // input.
01249     mat_type B (Copy, X, X.numRows(), X.numCols());
01250 
01251     // Fast triangular solve using the BLAS' _TRSM.  This does
01252     // no checking for rank deficiency.
01253     blas.TRSM(side, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
01254         Teuchos::NON_UNIT_DIAG, X.numRows(), X.numCols(), 
01255         STS::one(), R.values(), R.stride(), 
01256         X.values(), X.stride());
01257 
01258     // Check for Infs or NaNs in X.  If there are any, then
01259     // assume that TRSM failed, and use a more robust algorithm.
01260     if (ops.infNaNCount (X, false) != 0) {
01261 
01262       warn_ << "Upper triangular solve: Found Infs and/or NaNs in the "
01263         "solution after using the fast algorithm.  Retrying using a more "
01264         "robust algorithm." << std::endl;
01265 
01266       // Restore X from the copy.
01267       X.assign (B);
01268 
01269       // Find the minimum-norm solution to the least-squares
01270       // problem $\min_x \|RX - B\|_2$, using the singular value
01271       // decomposition (SVD).
01272       LocalDenseMatrixOps<Scalar> ops;
01273       if (side == LEFT_SIDE) {
01274         // _GELSS overwrites its matrix input, so make a copy.
01275         mat_type R_copy (Teuchos::Copy, R_view, N, N);
01276 
01277         // Zero out the lower triangle of R_copy, since the
01278         // mat_type constructor copies all the entries, not just
01279         // the upper triangle.  _GELSS will read all the entries
01280         // of the input matrix.
01281         ops.zeroOutStrictLowerTriangle (R_copy);
01282 
01283         // Solve the least-squares problem.
01284         rank = solveLeastSquaresUsingSVD (R_copy, X);
01285       } else {
01286         // If solving with R on the right-hand side, the interface
01287         // requires that instead of solving $\min \|XR - B\|_2$,
01288         // we have to solve $\min \|R^* X^* - B^*\|_2$.  We
01289         // compute (conjugate) transposes in newly allocated
01290         // temporary matrices X_star resp. R_star.  (B is already
01291         // in X and _GELSS overwrites its input vector X with the
01292         // solution.)
01293         mat_type X_star (X.numCols(), X.numRows());
01294         ops.conjugateTranspose (X_star, X);
01295         mat_type R_star (N, N); // Filled with zeros automatically.
01296         ops.conjugateTransposeOfUpperTriangular (R_star, R);
01297 
01298         // Solve the least-squares problem.
01299         rank = solveLeastSquaresUsingSVD (R_star, X_star);
01300 
01301         // Copy the transpose of X_star back into X.
01302         ops.conjugateTranspose (X, X_star);
01303       }
01304       if (rank < N) {
01305         foundRankDeficiency = true;
01306       }
01307     }
01308   } else {
01309     TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, 
01310            "Should never get here!  Invalid robustness value " 
01311            << robustness << ".  Please report this bug to the "
01312            "Belos developers.");
01313   }
01314   return std::make_pair (rank, foundRankDeficiency);
01315       }
01316 
01317 
01318     public:
01327       bool
01328       testGivensRotations (std::ostream& out)
01329       {
01330   using std::endl;
01331 
01332   out << "Testing Givens rotations:" << endl;
01333   Scalar x = STS::random();
01334   Scalar y = STS::random();
01335   out << "  x = " << x << ", y = " << y << endl;
01336 
01337   Scalar theCosine, theSine, result;
01338   blas_type blas;
01339   computeGivensRotation (x, y, theCosine, theSine, result);
01340   out << "-- After computing rotation:" << endl;
01341   out << "---- cos,sin = " << theCosine << "," << theSine << endl;
01342   out << "---- x = " << x << ", y = " << y 
01343       << ", result = " << result << endl;
01344 
01345   blas.ROT (1, &x, 1, &y, 1, &theCosine, &theSine);
01346   out << "-- After applying rotation:" << endl;
01347   out << "---- cos,sin = " << theCosine << "," << theSine << endl;
01348   out << "---- x = " << x << ", y = " << y << endl;
01349 
01350   // Allow only a tiny bit of wiggle room for zeroing-out of y.
01351   if (STS::magnitude(y) > 2*STS::eps())
01352     return false;
01353   else 
01354     return true;
01355       }
01356 
01386       bool
01387       testUpdateColumn (std::ostream& out, 
01388       const int numCols,
01389       const bool testBlockGivens=false,
01390       const bool extraVerbose=false) 
01391       {
01392   using Teuchos::Array;
01393   using std::endl;
01394   
01395   TEUCHOS_TEST_FOR_EXCEPTION(numCols <= 0, std::invalid_argument,
01396          "numCols = " << numCols << " <= 0.");
01397   const int numRows = numCols + 1;
01398 
01399   mat_type H (numRows, numCols);
01400   mat_type z (numRows, 1);
01401 
01402   mat_type R_givens (numRows, numCols);
01403   mat_type y_givens (numRows, 1);
01404   mat_type z_givens (numRows, 1);
01405   Array<Scalar> theCosines (numCols);
01406   Array<Scalar> theSines (numCols);
01407 
01408   mat_type R_blockGivens (numRows, numCols);
01409   mat_type y_blockGivens (numRows, 1);
01410   mat_type z_blockGivens (numRows, 1);
01411   Array<Scalar> blockCosines (numCols);
01412   Array<Scalar> blockSines (numCols);
01413   const int panelWidth = std::min (3, numCols);
01414 
01415   mat_type R_lapack (numRows, numCols);
01416   mat_type y_lapack (numRows, 1);
01417   mat_type z_lapack (numRows, 1);
01418 
01419   // Make a random least-squares problem.  
01420   makeRandomProblem (H, z);
01421   if (extraVerbose) {
01422     printMatrix<Scalar> (out, "H", H);
01423     printMatrix<Scalar> (out, "z", z);
01424   }
01425 
01426   // Set up the right-hand side copies for each of the methods.
01427   // Each method is free to overwrite its given right-hand side.
01428   z_givens.assign (z);
01429   if (testBlockGivens) {
01430     z_blockGivens.assign (z);
01431   }
01432   z_lapack.assign (z);
01433 
01434   //
01435   // Imitate how one would update the least-squares problem in a
01436   // typical GMRES implementation, for each updating method.
01437   //
01438   // Update using Givens rotations, one at a time.
01439   magnitude_type residualNormGivens = STM::zero();
01440   for (int curCol = 0; curCol < numCols; ++curCol) {
01441     residualNormGivens = updateColumnGivens (H, R_givens, y_givens, z_givens, 
01442                theCosines, theSines, curCol);
01443   }
01444   solveGivens (y_givens, R_givens, z_givens, numCols-1);
01445 
01446   // Update using the "panel left-looking" Givens approach, with
01447   // the given panel width.
01448   magnitude_type residualNormBlockGivens = STM::zero();
01449   if (testBlockGivens) {
01450     const bool testBlocksAtATime = true;
01451     if (testBlocksAtATime) {
01452       // Blocks of columns at a time.
01453       for (int startCol = 0; startCol < numCols; startCol += panelWidth) {
01454         int endCol = std::min (startCol + panelWidth - 1, numCols - 1);
01455         residualNormBlockGivens = 
01456     updateColumnsGivens (H, R_blockGivens, y_blockGivens, z_blockGivens, 
01457              blockCosines, blockSines, startCol, endCol);
01458       }
01459     } else {
01460       // One column at a time.  This is good as a sanity check
01461       // to make sure updateColumnsGivens() with a single column
01462       // does the same thing as updateColumnGivens().
01463       for (int startCol = 0; startCol < numCols; ++startCol) {
01464         residualNormBlockGivens = 
01465     updateColumnsGivens (H, R_blockGivens, y_blockGivens, z_blockGivens, 
01466              blockCosines, blockSines, startCol, startCol);
01467       }
01468     }
01469     // The panel version of Givens should compute the same
01470     // cosines and sines as the non-panel version, and should
01471     // update the right-hand side z in the same way.  Thus, we
01472     // should be able to use the same triangular solver.
01473     solveGivens (y_blockGivens, R_blockGivens, z_blockGivens, numCols-1);
01474   }
01475 
01476   // Solve using LAPACK's least-squares solver.
01477   const magnitude_type residualNormLapack = 
01478     solveLapack (H, R_lapack, y_lapack, z_lapack, numCols-1);
01479 
01480   // Compute the condition number of the least-squares problem.
01481   // This requires a residual, so use the residual from the
01482   // LAPACK method.  All that the method needs for an accurate
01483   // residual norm is forward stability.
01484   const magnitude_type leastSquaresCondNum = 
01485     leastSquaresConditionNumber (H, z, residualNormLapack);
01486 
01487   // Compute the relative least-squares solution error for both
01488   // Givens methods.  We assume that the LAPACK solution is
01489   // "exact" and compare against the Givens rotations solution.
01490   // This is taking liberties with the definition of condition
01491   // number, but it's the best we can do, since we don't know
01492   // the exact solution and don't have an extended-precision
01493   // solver.
01494 
01495   // The solution lives only in y[0 .. numCols-1].
01496   mat_type y_givens_view (Teuchos::View, y_givens, numCols, 1);
01497   mat_type y_blockGivens_view (Teuchos::View, y_blockGivens, numCols, 1);
01498   mat_type y_lapack_view (Teuchos::View, y_lapack, numCols, 1);
01499 
01500   const magnitude_type givensSolutionError = 
01501     solutionError (y_givens_view, y_lapack_view);
01502   const magnitude_type blockGivensSolutionError = testBlockGivens ?
01503     solutionError (y_blockGivens_view, y_lapack_view) : 
01504     STM::zero();
01505 
01506   // If printing out the matrices, copy out the upper triangular
01507   // factors for printing.  (Both methods are free to leave data
01508   // below the lower triangle.)
01509   if (extraVerbose) {
01510     mat_type R_factorFromGivens (numCols, numCols);
01511     mat_type R_factorFromBlockGivens (numCols, numCols);
01512     mat_type R_factorFromLapack (numCols, numCols);
01513 
01514     for (int j = 0; j < numCols; ++j) {
01515       for (int i = 0; i <= j; ++i) {
01516         R_factorFromGivens(i,j) = R_givens(i,j);
01517         if (testBlockGivens) {
01518     R_factorFromBlockGivens(i,j) = R_blockGivens(i,j);
01519         }
01520         R_factorFromLapack(i,j) = R_lapack(i,j);
01521       }
01522     }
01523 
01524     printMatrix<Scalar> (out, "R_givens", R_factorFromGivens);
01525     printMatrix<Scalar> (out, "y_givens", y_givens_view);
01526     printMatrix<Scalar> (out, "z_givens", z_givens);
01527       
01528     if (testBlockGivens) {
01529       printMatrix<Scalar> (out, "R_blockGivens", R_factorFromBlockGivens);
01530       printMatrix<Scalar> (out, "y_blockGivens", y_blockGivens_view);
01531       printMatrix<Scalar> (out, "z_blockGivens", z_blockGivens);
01532     }
01533       
01534     printMatrix<Scalar> (out, "R_lapack", R_factorFromLapack);
01535     printMatrix<Scalar> (out, "y_lapack", y_lapack_view);
01536     printMatrix<Scalar> (out, "z_lapack", z_lapack);
01537   }
01538 
01539   // Compute the (Frobenius) norm of the original matrix H.
01540   const magnitude_type H_norm = H.normFrobenius();
01541 
01542   out << "||H||_F = " << H_norm << endl;
01543 
01544   out << "||H y_givens - z||_2 / ||H||_F = " 
01545       << leastSquaresResidualNorm (H, y_givens_view, z) / H_norm << endl;
01546   if (testBlockGivens) {
01547     out << "||H y_blockGivens - z||_2 / ||H||_F = " 
01548         << leastSquaresResidualNorm (H, y_blockGivens_view, z) / H_norm << endl;
01549   }
01550   out << "||H y_lapack - z||_2 / ||H||_F = " 
01551       << leastSquaresResidualNorm (H, y_lapack_view, z) / H_norm << endl;
01552 
01553   out << "||y_givens - y_lapack||_2 / ||y_lapack||_2 = " 
01554       << givensSolutionError << endl;
01555   if (testBlockGivens) {
01556     out << "||y_blockGivens - y_lapack||_2 / ||y_lapack||_2 = " 
01557         << blockGivensSolutionError << endl;
01558   }
01559 
01560   out << "Least-squares condition number = " 
01561       << leastSquaresCondNum << endl;
01562 
01563   // Now for the controversial part of the test: judging whether
01564   // we succeeded.  This includes the problem's condition
01565   // number, which is a measure of the maximum perturbation in
01566   // the solution for which we can still say that the solution
01567   // is valid.  We include a little wiggle room by including a
01568   // factor proportional to the square root of the number of
01569   // floating-point operations that influence the last entry
01570   // (the conventional Wilkinsonian heuristic), times 10 for
01571   // good measure.
01572   //
01573   // (The square root looks like it has something to do with an
01574   // average-case probabilistic argument, but doesn't really.
01575   // What's an "average problem"?)
01576   const magnitude_type wiggleFactor = 
01577     10 * STM::squareroot( numRows*numCols );
01578   const magnitude_type solutionErrorBoundFactor = 
01579     wiggleFactor * leastSquaresCondNum;
01580   const magnitude_type solutionErrorBound = 
01581     solutionErrorBoundFactor * STS::eps();
01582   out << "Solution error bound: " << solutionErrorBoundFactor 
01583       << " * eps = " << solutionErrorBound << endl;
01584 
01585   // Remember that NaN is not greater than, not less than, and
01586   // not equal to any other number, including itself.  Some
01587   // compilers will rudely optimize away the "x != x" test.
01588   if (STM::isnaninf (solutionErrorBound)) {
01589     // Hm, the solution error bound is Inf or NaN.  This
01590     // probably means that the test problem was generated
01591     // incorrectly.  We should return false in this case.
01592     return false; 
01593   } else { // solution error bound is finite.
01594     if (STM::isnaninf (givensSolutionError)) {
01595       return false; 
01596     } else if (givensSolutionError > solutionErrorBound) {
01597       return false;
01598     } else if (testBlockGivens) {
01599       if (STM::isnaninf (blockGivensSolutionError)) {
01600         return false; 
01601       } else if (blockGivensSolutionError > solutionErrorBound) {
01602         return false;
01603       } else { // Givens and Block Givens tests succeeded.
01604         return true;
01605       }
01606     } else { // Not testing block Givens; Givens test succeeded.
01607       return true;
01608     }
01609   }
01610       }
01611 
01627       bool 
01628       testTriangularSolves (std::ostream& out,
01629           const int testProblemSize,
01630           const ERobustness robustness,
01631           const bool verbose=false)
01632       {
01633   using Teuchos::LEFT_SIDE;
01634   using Teuchos::RIGHT_SIDE;
01635   using std::endl;
01636   typedef Teuchos::SerialDenseMatrix<int, scalar_type> mat_type;
01637 
01638   Teuchos::oblackholestream blackHole;
01639   std::ostream& verboseOut = verbose ? out : blackHole;
01640 
01641   verboseOut << "Testing upper triangular solves" << endl;
01642   //
01643   // Construct an upper triangular linear system to solve.
01644   //
01645   verboseOut << "-- Generating test matrix" << endl;
01646   const int N = testProblemSize;
01647   mat_type R (N, N);
01648   // Fill the upper triangle of R with random numbers.
01649   for (int j = 0; j < N; ++j) {
01650     for (int i = 0; i <= j; ++i) {
01651       R(i,j) = STS::random ();
01652     }
01653   }
01654   // Compute the Frobenius norm of R for later use.
01655   const magnitude_type R_norm = R.normFrobenius ();
01656   // Fill the right-hand side B with random numbers.
01657   mat_type B (N, 1);
01658   B.random ();
01659   // Compute the Frobenius norm of B for later use.
01660   const magnitude_type B_norm = B.normFrobenius ();
01661 
01662   // Save a copy of the original upper triangular system.
01663   mat_type R_copy (Teuchos::Copy, R, N, N);
01664   mat_type B_copy (Teuchos::Copy, B, N, 1);
01665 
01666   // Solution vector.
01667   mat_type X (N, 1);
01668 
01669   // Solve RX = B.
01670   verboseOut << "-- Solving RX=B" << endl;
01671   // We're ignoring the return values for now.
01672   (void) solveUpperTriangularSystem (LEFT_SIDE, X, R, B, robustness);
01673   // Test the residual error.
01674   mat_type Resid (N, 1);
01675   Resid.assign (B_copy);
01676   Belos::details::LocalDenseMatrixOps<scalar_type> ops;
01677   ops.matMatMult (STS::one(), Resid, -STS::one(), R_copy, X);
01678   verboseOut << "---- ||R*X - B||_F = " << Resid.normFrobenius() << endl;
01679   verboseOut << "---- ||R||_F ||X||_F + ||B||_F = " 
01680        << (R_norm * X.normFrobenius() + B_norm)
01681        << endl;
01682 
01683   // Restore R and B.
01684   R.assign (R_copy);
01685   B.assign (B_copy);
01686 
01687   // 
01688   // Set up a right-side test problem: YR = B^*.
01689   //
01690   mat_type Y (1, N);
01691   mat_type B_star (1, N);
01692   ops.conjugateTranspose (B_star, B);
01693   mat_type B_star_copy (1, N);
01694   B_star_copy.assign (B_star);
01695   // Solve YR = B^*.
01696   verboseOut << "-- Solving YR=B^*" << endl;
01697   // We're ignoring the return values for now.
01698   (void) solveUpperTriangularSystem (RIGHT_SIDE, Y, R, B_star, robustness);
01699   // Test the residual error.
01700   mat_type Resid2 (1, N);
01701   Resid2.assign (B_star_copy);
01702   ops.matMatMult (STS::one(), Resid2, -STS::one(), Y, R_copy);
01703   verboseOut << "---- ||Y*R - B^*||_F = " << Resid2.normFrobenius() << endl;
01704   verboseOut << "---- ||Y||_F ||R||_F + ||B^*||_F = " 
01705        << (Y.normFrobenius() * R_norm + B_norm)
01706        << endl;
01707 
01708   // FIXME (mfh 14 Oct 2011) The test always "passes" for now;
01709   // you have to inspect the output in order to see whether it
01710   // succeeded.  We really should fix the above to use the
01711   // infinity-norm bounds in Higham's book for triangular
01712   // solves.  That would automate the test.
01713   return true;
01714       }
01715 
01716     private:
01718       std::ostream& warn_;
01719 
01724       ERobustness defaultRobustness_;
01725 
01726     private:
01738       int 
01739       solveLeastSquaresUsingSVD (mat_type& A, mat_type& X)
01740       {
01741   using Teuchos::Array;
01742   LocalDenseMatrixOps<Scalar> ops;
01743 
01744   if (defaultRobustness_ > ROBUSTNESS_SOME) {
01745     int count = ops.infNaNCount (A);
01746     TEUCHOS_TEST_FOR_EXCEPTION(count != 0, std::invalid_argument, 
01747            "solveLeastSquaresUsingSVD: The input matrix A "
01748            "contains " << count << "Inf and/or NaN entr" 
01749            << (count != 1 ? "ies" : "y") << ".");
01750     count = ops.infNaNCount (X);
01751     TEUCHOS_TEST_FOR_EXCEPTION(count != 0, std::invalid_argument, 
01752            "solveLeastSquaresUsingSVD: The input matrix X "
01753            "contains " << count << "Inf and/or NaN entr" 
01754            << (count != 1 ? "ies" : "y") << ".");
01755   }
01756   const int N = std::min (A.numRows(), A.numCols());
01757   lapack_type lapack;
01758 
01759   // Rank of A; to be computed by _GELSS and returned.
01760   int rank = N;
01761 
01762   // Use Scalar's machine precision for the rank tolerance,
01763   // not magnitude_type's machine precision.
01764   const magnitude_type rankTolerance = STS::eps();
01765 
01766   // Array of singular values.
01767   Array<magnitude_type> singularValues (N);
01768 
01769   // Extra workspace.  This is only used by _GELSS if Scalar is
01770   // complex.  Teuchos::LAPACK presents a unified interface to
01771   // _GELSS that always includes the RWORK argument, even though
01772   // LAPACK's SGELSS and DGELSS don't have the RWORK argument.
01773   // We always allocate at least one entry so that &rwork[0]
01774   // makes sense.
01775   Array<magnitude_type> rwork (1);
01776   if (STS::isComplex) {
01777     rwork.resize (std::max (1, 5 * N));
01778   }
01779   //
01780   // Workspace query
01781   //
01782   Scalar lworkScalar = STS::one(); // To be set by workspace query
01783   int info = 0;
01784   lapack.GELSS (A.numRows(), A.numCols(), X.numCols(), 
01785           A.values(), A.stride(), X.values(), X.stride(),
01786           &singularValues[0], rankTolerance, &rank,
01787           &lworkScalar, -1, &rwork[0], &info);
01788   TEUCHOS_TEST_FOR_EXCEPTION(info != 0, std::logic_error,
01789          "_GELSS workspace query returned INFO = " 
01790          << info << " != 0.");
01791   const int lwork = static_cast<int> (STS::real (lworkScalar));
01792   TEUCHOS_TEST_FOR_EXCEPTION(lwork < 0, std::logic_error,
01793          "_GELSS workspace query returned LWORK = " 
01794          << lwork << " < 0.");
01795   // Allocate workspace.  Size > 0 means &work[0] makes sense.
01796   Array<Scalar> work (std::max (1, lwork));
01797   // Solve the least-squares problem.
01798   lapack.GELSS (A.numRows(), A.numCols(), X.numCols(), 
01799           A.values(), A.stride(), X.values(), X.stride(),
01800           &singularValues[0], rankTolerance, &rank,
01801           &work[0], lwork, &rwork[0], &info);
01802   TEUCHOS_TEST_FOR_EXCEPTION(info != 0, std::runtime_error,
01803          "_GELSS returned INFO = " << info << " != 0.");
01804   return rank;
01805       }
01806 
01813       void
01814       solveGivens (mat_type& y, 
01815        mat_type& R, 
01816        const mat_type& z, 
01817        const int curCol)
01818       {
01819   const int numRows = curCol + 2;
01820 
01821   // Now that we have the updated R factor of H, and the updated
01822   // right-hand side z, solve the least-squares problem by
01823   // solving the upper triangular linear system Ry=z for y.
01824   const mat_type R_view (Teuchos::View, R, numRows-1, numRows-1);
01825   const mat_type z_view (Teuchos::View, z, numRows-1, z.numCols());
01826   mat_type y_view (Teuchos::View, y, numRows-1, y.numCols());
01827 
01828   (void) solveUpperTriangularSystem (Teuchos::LEFT_SIDE, y_view, 
01829              R_view, z_view, defaultRobustness_);
01830       }
01831 
01833       void
01834       makeRandomProblem (mat_type& H, mat_type& z) 
01835       {
01836   // In GMRES, z always starts out with only the first entry
01837   // being nonzero.  That entry always has nonnegative real part
01838   // and zero imaginary part, since it is the initial residual
01839   // norm.
01840   H.random ();
01841   // Zero out the entries below the subdiagonal of H, so that it
01842   // is upper Hessenberg.
01843   for (int j = 0; j < H.numCols(); ++j) {
01844     for (int i = j+2; i < H.numRows(); ++i) {
01845       H(i,j) = STS::zero();
01846     }
01847   }
01848   // Initialize z, the right-hand side of the least-squares
01849   // problem.  Make the first entry of z nonzero.
01850   {
01851     // It's still possible that a random number will come up
01852     // zero after 1000 trials, but unlikely.  Nevertheless, it's
01853     // still important not to allow an infinite loop, for
01854     // example if the pseudorandom number generator is broken
01855     // and always returns zero.
01856     const int numTrials = 1000;
01857     magnitude_type z_init = STM::zero();
01858     for (int trial = 0; trial < numTrials && z_init == STM::zero(); ++trial) {
01859       z_init = STM::random();
01860     }
01861     TEUCHOS_TEST_FOR_EXCEPTION(z_init == STM::zero(), std::runtime_error,
01862            "After " << numTrials << " trial" 
01863            << (numTrials != 1 ? "s" : "") 
01864            << ", we were unable to generate a nonzero pseudo"
01865            "random real number.  This most likely indicates a "
01866            "broken pseudorandom number generator.");
01867     const magnitude_type z_first = (z_init < 0) ? -z_init : z_init;
01868 
01869     // NOTE I'm assuming here that "scalar_type = magnitude_type"
01870     // assignments make sense.
01871     z(0,0) = z_first;
01872   }
01873       }
01874 
01878       void
01879       computeGivensRotation (const Scalar& x, 
01880            const Scalar& y, 
01881            Scalar& theCosine, 
01882            Scalar& theSine,
01883            Scalar& result) 
01884       {
01885   // _LARTG, an LAPACK aux routine, is slower but more accurate
01886   // than the BLAS' _ROTG.
01887   const bool useLartg = false;
01888 
01889   if (useLartg) {
01890     lapack_type lapack;
01891     // _LARTG doesn't clobber its input arguments x and y.
01892     lapack.LARTG (x, y, &theCosine, &theSine, &result);
01893   } else {
01894     // _ROTG clobbers its first two arguments.  x is overwritten
01895     // with the result of applying the Givens rotation: [x; y] ->
01896     // [x (on output); 0].  y is overwritten with the "fast"
01897     // Givens transform (see Golub and Van Loan, 3rd ed.).
01898     Scalar x_temp = x;
01899     Scalar y_temp = y;
01900     blas_type blas;
01901     blas.ROTG (&x_temp, &y_temp, &theCosine, &theSine);
01902     result = x_temp;
01903   }
01904       }
01905 
01907       void
01908       singularValues (const mat_type& A, 
01909           Teuchos::ArrayView<magnitude_type> sigmas) 
01910       {
01911   using Teuchos::Array;
01912   using Teuchos::ArrayView;
01913 
01914   const int numRows = A.numRows();
01915   const int numCols = A.numCols();
01916   TEUCHOS_TEST_FOR_EXCEPTION(sigmas.size() < std::min (numRows, numCols),
01917          std::invalid_argument,
01918          "The sigmas array is only of length " << sigmas.size()
01919          << ", but must be of length at least "
01920          << std::min (numRows, numCols)
01921          << " in order to hold all the singular values of the "
01922          "matrix A.");
01923 
01924   // Compute the condition number of the matrix A, using a singular
01925   // value decomposition (SVD).  LAPACK's SVD routine overwrites the
01926   // input matrix, so make a copy.
01927   mat_type A_copy (numRows, numCols);
01928   A_copy.assign (A);
01929 
01930   // Workspace query.
01931   lapack_type lapack;
01932   int info = 0;
01933   Scalar lworkScalar = STS::zero();
01934   Array<magnitude_type> rwork (std::max (std::min (numRows, numCols) - 1, 1));
01935   lapack.GESVD ('N', 'N', numRows, numCols, 
01936           A_copy.values(), A_copy.stride(), &sigmas[0], 
01937           (Scalar*) NULL, 1, (Scalar*) NULL, 1, 
01938           &lworkScalar, -1, &rwork[0], &info);
01939 
01940   TEUCHOS_TEST_FOR_EXCEPTION(info != 0, std::logic_error,
01941          "LAPACK _GESVD workspace query failed with INFO = " 
01942          << info << ".");
01943   const int lwork = static_cast<int> (STS::real (lworkScalar));
01944   TEUCHOS_TEST_FOR_EXCEPTION(lwork < 0, std::logic_error,
01945          "LAPACK _GESVD workspace query returned LWORK = " 
01946          << lwork << " < 0.");
01947   // Make sure that the workspace array always has positive
01948   // length, so that &work[0] makes sense.
01949   Teuchos::Array<Scalar> work (std::max (1, lwork));
01950 
01951   // Compute the singular values of A.
01952   lapack.GESVD ('N', 'N', numRows, numCols, 
01953           A_copy.values(), A_copy.stride(), &sigmas[0], 
01954           (Scalar*) NULL, 1, (Scalar*) NULL, 1, 
01955           &work[0], lwork, &rwork[0], &info);
01956   TEUCHOS_TEST_FOR_EXCEPTION(info != 0, std::logic_error,
01957          "LAPACK _GESVD failed with INFO = " << info << ".");
01958       }
01959 
01967       std::pair<magnitude_type, magnitude_type>
01968       extremeSingularValues (const mat_type& A) 
01969       {
01970   using Teuchos::Array;
01971 
01972   const int numRows = A.numRows();
01973   const int numCols = A.numCols();
01974 
01975   Array<magnitude_type> sigmas (std::min (numRows, numCols));
01976   singularValues (A, sigmas);
01977   return std::make_pair (sigmas[0], sigmas[std::min(numRows, numCols) - 1]);
01978       }
01979       
01990       magnitude_type 
01991       leastSquaresConditionNumber (const mat_type& A,
01992            const mat_type& b,
01993            const magnitude_type& residualNorm) 
01994       {
01995   // Extreme singular values of A.
01996   const std::pair<magnitude_type, magnitude_type> sigmaMaxMin = 
01997     extremeSingularValues (A);
01998 
01999   // Our solvers currently assume that H has full rank.  If the
02000   // test matrix doesn't have full rank, we stop right away.
02001   TEUCHOS_TEST_FOR_EXCEPTION(sigmaMaxMin.second == STM::zero(), std::runtime_error,
02002          "The test matrix is rank deficient; LAPACK's _GESVD "
02003          "routine reports that its smallest singular value is "
02004          "zero.");
02005   // 2-norm condition number of A.  We checked above that the
02006   // denominator is nonzero.
02007   const magnitude_type A_cond = sigmaMaxMin.first / sigmaMaxMin.second;
02008 
02009   // "Theta" in the variable names below refers to the angle between
02010   // the vectors b and A*x, where x is the computed solution.  It
02011   // measures whether the residual norm is large (near ||b||) or
02012   // small (near 0).
02013   const magnitude_type sinTheta = residualNorm / b.normFrobenius();
02014 
02015   // \sin^2 \theta + \cos^2 \theta = 1.
02016   //
02017   // The range of sine is [-1,1], so squaring it won't overflow.
02018   // We still have to check whether sinTheta > 1, though.  This
02019   // is impossible in exact arithmetic, assuming that the
02020   // least-squares solver worked (b-A*0 = b and x minimizes
02021   // ||b-A*x||_2, so ||b-A*0||_2 >= ||b-A*x||_2).  However, it
02022   // might just be possible in floating-point arithmetic.  We're
02023   // just looking for an estimate, so if sinTheta > 1, we cap it
02024   // at 1.
02025   const magnitude_type cosTheta = (sinTheta > STM::one()) ? 
02026     STM::zero() : STM::squareroot (1 - sinTheta * sinTheta);
02027 
02028   // This may result in Inf, if cosTheta is zero.  That's OK; in
02029   // that case, the condition number of the (full-rank)
02030   // least-squares problem is rightfully infinite.
02031   const magnitude_type tanTheta = sinTheta / cosTheta;
02032 
02033   // Condition number for the full-rank least-squares problem.
02034   return 2 * A_cond / cosTheta + tanTheta * A_cond * A_cond;
02035       }
02036       
02038       magnitude_type
02039       leastSquaresResidualNorm (const mat_type& A,
02040         const mat_type& x,
02041         const mat_type& b) 
02042       {
02043   mat_type r (b.numRows(), b.numCols());
02044 
02045   // r := b - A*x
02046   r.assign (b);
02047   LocalDenseMatrixOps<Scalar> ops;
02048   ops.matMatMult (STS::one(), r, -STS::one(), A, x);
02049   return r.normFrobenius ();
02050       }
02051 
02056       magnitude_type
02057       solutionError (const mat_type& x_approx,
02058          const mat_type& x_exact) 
02059       {
02060   const int numRows = x_exact.numRows();
02061   const int numCols = x_exact.numCols();
02062 
02063   mat_type x_diff (numRows, numCols);
02064   for (int j = 0; j < numCols; ++j) {
02065     for (int i = 0; i < numRows; ++i) {
02066       x_diff(i,j) = x_exact(i,j) - x_approx(i,j);
02067     }
02068   }
02069   const magnitude_type scalingFactor = x_exact.normFrobenius();
02070 
02071   // If x_exact has zero norm, just use the absolute difference.
02072   return x_diff.normFrobenius() / 
02073     (scalingFactor == STM::zero() ? STM::one() : scalingFactor);
02074       }
02075 
02122       magnitude_type
02123       updateColumnGivens (const mat_type& H,
02124         mat_type& R,
02125         mat_type& y,
02126         mat_type& z,
02127         Teuchos::ArrayView<scalar_type> theCosines,
02128         Teuchos::ArrayView<scalar_type> theSines,
02129         const int curCol) 
02130       {
02131   using std::cerr;
02132   using std::endl;
02133 
02134   const int numRows = curCol + 2; // curCol is zero-based
02135   const int LDR = R.stride();
02136   const bool extraDebug = false;
02137 
02138   if (extraDebug) {
02139     cerr << "updateColumnGivens: curCol = " << curCol << endl;
02140   }
02141 
02142   // View of H( 1:curCol+1, curCol ) (in Matlab notation, if
02143   // curCol were a one-based index, as it would be in Matlab but
02144   // is not here).
02145   const mat_type H_col (Teuchos::View, H, numRows, 1, 0, curCol);
02146 
02147   // View of R( 1:curCol+1, curCol ) (again, in Matlab notation,
02148   // if curCol were a one-based index).
02149   mat_type R_col (Teuchos::View, R, numRows, 1, 0, curCol);
02150 
02151   // 1. Copy the current column from H into R, where it will be
02152   //    modified.
02153   R_col.assign (H_col);
02154 
02155   if (extraDebug) {
02156     printMatrix<Scalar> (cerr, "R_col before ", R_col);
02157   }
02158 
02159   // 2. Apply all the previous Givens rotations, if any, to the
02160   //    current column of the matrix.
02161   blas_type blas;
02162   for (int j = 0; j < curCol; ++j) {
02163     // BLAS::ROT really should take "const Scalar*" for these
02164     // arguments, but it wants a "Scalar*" instead, alas.
02165     Scalar theCosine = theCosines[j];
02166     Scalar theSine = theSines[j];
02167     
02168     if (extraDebug) {
02169       cerr << "  j = " << j << ": (cos,sin) = " 
02170      << theCosines[j] << "," << theSines[j] << endl;
02171     }
02172     blas.ROT (1, &R_col(j,0), LDR, &R_col(j+1,0), LDR,
02173         &theCosine, &theSine);
02174   }
02175   if (extraDebug && curCol > 0) {
02176     printMatrix<Scalar> (cerr, "R_col after applying previous "
02177              "Givens rotations", R_col);
02178   }
02179 
02180   // 3. Calculate new Givens rotation for R(curCol, curCol),
02181   //    R(curCol+1, curCol).
02182   Scalar theCosine, theSine, result;
02183   computeGivensRotation (R_col(curCol,0), R_col(curCol+1,0), 
02184              theCosine, theSine, result);
02185   theCosines[curCol] = theCosine;
02186   theSines[curCol] = theSine;
02187 
02188   if (extraDebug) {
02189     cerr << "  New cos,sin = " << theCosine << "," << theSine << endl;
02190   }
02191 
02192   // 4. _Apply_ the new Givens rotation.  We don't need to
02193   //    invoke _ROT here, because computeGivensRotation()
02194   //    already gives us the result: [x; y] -> [result; 0].
02195   R_col(curCol, 0) = result;
02196   R_col(curCol+1, 0) = STS::zero();
02197 
02198   if (extraDebug) {
02199     printMatrix<Scalar> (cerr, "R_col after applying current "
02200              "Givens rotation", R_col);
02201   }
02202 
02203   // 5. Apply the resulting Givens rotation to z (the right-hand
02204   //    side of the projected least-squares problem).
02205   //
02206   // We prefer overgeneralization to undergeneralization by assuming
02207   // here that z may have more than one column.
02208   const int LDZ = z.stride();
02209   blas.ROT (z.numCols(), 
02210       &z(curCol,0), LDZ, &z(curCol+1,0), LDZ,
02211       &theCosine, &theSine);
02212 
02213   if (extraDebug) {
02214     //mat_type R_after (Teuchos::View, R, numRows, numRows-1);
02215     //printMatrix<Scalar> (cerr, "R_after", R_after);
02216     //mat_type z_after (Teuchos::View, z, numRows, z.numCols());
02217     printMatrix<Scalar> (cerr, "z_after", z);
02218   }
02219 
02220   // The last entry of z is the nonzero part of the residual of the
02221   // least-squares problem.  Its magnitude gives the residual 2-norm
02222   // of the least-squares problem.
02223   return STS::magnitude( z(numRows-1, 0) );
02224       }
02225 
02259       magnitude_type
02260       solveLapack (const mat_type& H, 
02261        mat_type& R, 
02262        mat_type& y, 
02263        mat_type& z,
02264        const int curCol) 
02265       {
02266   const int numRows = curCol + 2;
02267   const int numCols = curCol + 1;
02268   const int LDR = R.stride();
02269 
02270   // Copy H( 1:curCol+1, 1:curCol ) into R( 1:curCol+1, 1:curCol ).
02271   const mat_type H_view (Teuchos::View, H, numRows, numCols);
02272   mat_type R_view (Teuchos::View, R, numRows, numCols);
02273   R_view.assign (H_view);
02274 
02275   // The LAPACK least-squares solver overwrites the right-hand side
02276   // vector with the solution, so first copy z into y.
02277   mat_type y_view (Teuchos::View, y, numRows, y.numCols());
02278   mat_type z_view (Teuchos::View, z, numRows, y.numCols());
02279   y_view.assign (z_view);
02280 
02281   // Workspace query for the least-squares routine.
02282   int info = 0;
02283   Scalar lworkScalar = STS::zero();
02284   lapack_type lapack;
02285   lapack.GELS ('N', numRows, numCols, y_view.numCols(),
02286          NULL, LDR, NULL, y_view.stride(), 
02287          &lworkScalar, -1, &info);
02288   TEUCHOS_TEST_FOR_EXCEPTION(info != 0, std::logic_error,
02289          "LAPACK _GELS workspace query failed with INFO = " 
02290          << info << ", for a " << numRows << " x " << numCols
02291          << " matrix with " << y_view.numCols() 
02292          << " right hand side"
02293          << ((y_view.numCols() != 1) ? "s" : "") << ".");
02294   TEUCHOS_TEST_FOR_EXCEPTION(STS::real(lworkScalar) < STM::zero(),
02295          std::logic_error,
02296          "LAPACK _GELS workspace query returned an LWORK with "
02297          "negative real part: LWORK = " << lworkScalar 
02298          << ".  That should never happen.  Please report this "
02299          "to the Belos developers.");
02300   TEUCHOS_TEST_FOR_EXCEPTION(STS::isComplex && STS::imag(lworkScalar) != STM::zero(),
02301          std::logic_error,
02302          "LAPACK _GELS workspace query returned an LWORK with "
02303          "nonzero imaginary part: LWORK = " << lworkScalar 
02304          << ".  That should never happen.  Please report this "
02305          "to the Belos developers.");
02306   // Cast workspace from Scalar to int.  Scalar may be complex,
02307   // hence the request for the real part.  Don't ask for the
02308   // magnitude, since computing the magnitude may overflow due
02309   // to squaring and square root to int.  Hopefully LAPACK
02310   // doesn't ever overflow int this way.
02311   const int lwork = std::max (1, static_cast<int> (STS::real (lworkScalar)));
02312 
02313   // Allocate workspace for solving the least-squares problem.
02314   Teuchos::Array<Scalar> work (lwork);
02315 
02316   // Solve the least-squares problem.  The ?: operator prevents
02317   // accessing the first element of the work array, if it has
02318   // length zero.
02319   lapack.GELS ('N', numRows, numCols, y_view.numCols(),
02320          R_view.values(), R_view.stride(),
02321          y_view.values(), y_view.stride(),
02322          (lwork > 0 ? &work[0] : (Scalar*) NULL), 
02323          lwork, &info);
02324 
02325   TEUCHOS_TEST_FOR_EXCEPTION(info != 0, std::logic_error,
02326          "Solving projected least-squares problem with LAPACK "
02327          "_GELS failed with INFO = " << info << ", for a " 
02328          << numRows << " x " << numCols << " matrix with " 
02329          << y_view.numCols() << " right hand side"
02330          << (y_view.numCols() != 1 ? "s" : "") << ".");
02331   // Extract the projected least-squares problem's residual error.
02332   // It's the magnitude of the last entry of y_view on output from
02333   // LAPACK's least-squares solver.  
02334   return STS::magnitude( y_view(numRows-1, 0) );  
02335       }
02336 
02347       magnitude_type
02348       updateColumnsGivens (const mat_type& H,
02349          mat_type& R,
02350          mat_type& y,
02351          mat_type& z,
02352          Teuchos::ArrayView<scalar_type> theCosines,
02353          Teuchos::ArrayView<scalar_type> theSines,
02354          const int startCol,
02355          const int endCol) 
02356       {
02357   TEUCHOS_TEST_FOR_EXCEPTION(startCol > endCol, std::invalid_argument,
02358          "updateColumnGivens: startCol = " << startCol 
02359          << " > endCol = " << endCol << ".");
02360   magnitude_type lastResult = STM::zero();
02361   // [startCol, endCol] is an inclusive range.
02362   for (int curCol = startCol; curCol <= endCol; ++curCol) {
02363     lastResult = updateColumnGivens (H, R, y, z, theCosines, theSines, curCol);
02364   }
02365   return lastResult;
02366       }
02367 
02380       magnitude_type
02381       updateColumnsGivensBlock (const mat_type& H,
02382         mat_type& R,
02383         mat_type& y,
02384         mat_type& z,
02385         Teuchos::ArrayView<scalar_type> theCosines,
02386         Teuchos::ArrayView<scalar_type> theSines,
02387         const int startCol,
02388         const int endCol) 
02389       {
02390   const int numRows = endCol + 2;
02391   const int numColsToUpdate = endCol - startCol + 1;
02392   const int LDR = R.stride();
02393 
02394   // 1. Copy columns [startCol, endCol] from H into R, where they
02395   //    will be modified.
02396   {
02397     const mat_type H_view (Teuchos::View, H, numRows, numColsToUpdate, 0, startCol);
02398     mat_type R_view (Teuchos::View, R, numRows, numColsToUpdate, 0, startCol);
02399     R_view.assign (H_view);
02400   }
02401 
02402   // 2. Apply all the previous Givens rotations, if any, to
02403   //    columns [startCol, endCol] of the matrix.  (Remember
02404   //    that we're using a left-looking QR factorization
02405   //    approach; we haven't yet touched those columns.)
02406   blas_type blas;
02407   for (int j = 0; j < startCol; ++j) {
02408     blas.ROT (numColsToUpdate,
02409         &R(j, startCol), LDR, &R(j+1, startCol), LDR, 
02410         &theCosines[j], &theSines[j]);
02411   }
02412 
02413   // 3. Update each column in turn of columns [startCol, endCol].
02414   for (int curCol = startCol; curCol < endCol; ++curCol) {
02415     // a. Apply the Givens rotations computed in previous
02416     //    iterations of this loop to the current column of R.
02417     for (int j = startCol; j < curCol; ++j) {
02418       blas.ROT (1, &R(j, curCol), LDR, &R(j+1, curCol), LDR, 
02419           &theCosines[j], &theSines[j]);
02420     }
02421     // b. Calculate new Givens rotation for R(curCol, curCol),
02422     //    R(curCol+1, curCol).
02423     Scalar theCosine, theSine, result;
02424     computeGivensRotation (R(curCol, curCol), R(curCol+1, curCol), 
02425          theCosine, theSine, result);
02426     theCosines[curCol] = theCosine;
02427     theSines[curCol] = theSine;
02428 
02429     // c. _Apply_ the new Givens rotation.  We don't need to
02430     //    invoke _ROT here, because computeGivensRotation()
02431     //    already gives us the result: [x; y] -> [result; 0].
02432     R(curCol+1, curCol) = result;
02433     R(curCol+1, curCol) = STS::zero();
02434 
02435     // d. Apply the resulting Givens rotation to z (the right-hand
02436     //    side of the projected least-squares problem).  
02437     //
02438     // We prefer overgeneralization to undergeneralization by
02439     // assuming here that z may have more than one column.
02440     const int LDZ = z.stride();
02441     blas.ROT (z.numCols(), 
02442         &z(curCol,0), LDZ, &z(curCol+1,0), LDZ,
02443         &theCosine, &theSine);
02444   }
02445 
02446   // The last entry of z is the nonzero part of the residual of the
02447   // least-squares problem.  Its magnitude gives the residual 2-norm
02448   // of the least-squares problem.
02449   return STS::magnitude( z(numRows-1, 0) );
02450       }
02451     }; // class ProjectedLeastSquaresSolver
02452   } // namespace details
02453 } // namespace Belos
02454 
02455 #endif // __Belos_ProjectedLeastSquaresSolver_hpp
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines