|
Belos Version of the Day
|
00001 //@HEADER 00002 // ************************************************************************ 00003 // 00004 // Belos: Block Linear Solvers Package 00005 // Copyright 2004 Sandia Corporation 00006 // 00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, 00008 // the U.S. Government retains certain rights in this software. 00009 // 00010 // Redistribution and use in source and binary forms, with or without 00011 // modification, are permitted provided that the following conditions are 00012 // met: 00013 // 00014 // 1. Redistributions of source code must retain the above copyright 00015 // notice, this list of conditions and the following disclaimer. 00016 // 00017 // 2. Redistributions in binary form must reproduce the above copyright 00018 // notice, this list of conditions and the following disclaimer in the 00019 // documentation and/or other materials provided with the distribution. 00020 // 00021 // 3. Neither the name of the Corporation nor the names of the 00022 // contributors may be used to endorse or promote products derived from 00023 // this software without specific prior written permission. 00024 // 00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY 00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE 00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00036 // 00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 00038 // 00039 // ************************************************************************ 00040 //@HEADER 00041 00042 #ifndef __Belos_ProjectedLeastSquaresSolver_hpp 00043 #define __Belos_ProjectedLeastSquaresSolver_hpp 00044 00045 #include "BelosConfigDefs.hpp" 00046 #include "BelosTypes.hpp" 00047 #include "Teuchos_Array.hpp" 00048 #include "Teuchos_BLAS.hpp" 00049 #include "Teuchos_LAPACK.hpp" 00050 #include "Teuchos_oblackholestream.hpp" 00051 #include "Teuchos_ScalarTraits.hpp" 00052 #include "Teuchos_SerialDenseMatrix.hpp" 00053 #include "Teuchos_StandardParameterEntryValidators.hpp" 00054 00058 00059 namespace Belos { 00060 00069 namespace details { 00070 00071 // Anonymous namespace restricts contents to file scope. 00072 namespace { 00075 template<class Scalar> 00076 void 00077 printMatrix (std::ostream& out, 00078 const std::string& name, 00079 const Teuchos::SerialDenseMatrix<int, Scalar>& A) 00080 { 00081 using std::endl; 00082 00083 const int numRows = A.numRows(); 00084 const int numCols = A.numCols(); 00085 00086 out << name << " = " << endl << '['; 00087 if (numCols == 1) { 00088 // Compact form for column vectors; valid Matlab. 00089 for (int i = 0; i < numRows; ++i) { 00090 out << A(i,0); 00091 if (i < numRows-1) { 00092 out << "; "; 00093 } 00094 } 00095 } else { 00096 for (int i = 0; i < numRows; ++i) { 00097 for (int j = 0; j < numCols; ++j) { 00098 out << A(i,j); 00099 if (j < numCols-1) { 00100 out << ", "; 00101 } else if (i < numRows-1) { 00102 out << ';' << endl; 00103 } 00104 } 00105 } 00106 } 00107 out << ']' << endl; 00108 } 00109 00112 template<class Scalar> 00113 void 00114 print (std::ostream& out, 00115 const Teuchos::SerialDenseMatrix<int, Scalar>& A, 00116 const std::string& linePrefix) 00117 { 00118 using std::endl; 00119 00120 const int numRows = A.numRows(); 00121 const int numCols = A.numCols(); 00122 00123 out << linePrefix << '['; 00124 if (numCols == 1) { 00125 // Compact form for column vectors; valid Matlab. 00126 for (int i = 0; i < numRows; ++i) { 00127 out << A(i,0); 00128 if (i < numRows-1) { 00129 out << "; "; 00130 } 00131 } 00132 } else { 00133 for (int i = 0; i < numRows; ++i) { 00134 for (int j = 0; j < numCols; ++j) { 00135 if (numRows > 1) { 00136 out << linePrefix << " "; 00137 } 00138 out << A(i,j); 00139 if (j < numCols-1) { 00140 out << ", "; 00141 } else if (i < numRows-1) { 00142 out << ';' << endl; 00143 } 00144 } 00145 } 00146 } 00147 out << linePrefix << ']' << endl; 00148 } 00149 } // namespace (anonymous) 00150 00157 template<class Scalar> 00158 class ProjectedLeastSquaresProblem { 00159 public: 00162 typedef Scalar scalar_type; 00165 typedef typename Teuchos::ScalarTraits<Scalar>::magnitudeType magnitude_type; 00166 00174 Teuchos::SerialDenseMatrix<int,Scalar> H; 00175 00187 Teuchos::SerialDenseMatrix<int,Scalar> R; 00188 00199 Teuchos::SerialDenseMatrix<int,Scalar> y; 00200 00209 Teuchos::SerialDenseMatrix<int,Scalar> z; 00210 00220 Teuchos::Array<Scalar> theCosines; 00221 00226 Teuchos::Array<Scalar> theSines; 00227 00236 ProjectedLeastSquaresProblem (const int maxNumIterations) : 00237 H (maxNumIterations+1, maxNumIterations), 00238 R (maxNumIterations+1, maxNumIterations), 00239 y (maxNumIterations+1, 1), 00240 z (maxNumIterations+1, 1), 00241 theCosines (maxNumIterations+1), 00242 theSines (maxNumIterations+1) 00243 {} 00244 00263 void 00264 reset (const typename Teuchos::ScalarTraits<Scalar>::magnitudeType beta) 00265 { 00266 typedef Teuchos::ScalarTraits<Scalar> STS; 00267 00268 // Zero out the right-hand side of the least-squares problem. 00269 z.putScalar (STS::zero()); 00270 00271 // Promote the initial residual norm from a magnitude type to 00272 // a scalar type, so we can assign it to the first entry of z. 00273 const Scalar initialResidualNorm (beta); 00274 z(0,0) = initialResidualNorm; 00275 } 00276 00290 void 00291 reallocateAndReset (const typename Teuchos::ScalarTraits<Scalar>::magnitudeType beta, 00292 const int maxNumIterations) 00293 { 00294 typedef Teuchos::ScalarTraits<Scalar> STS; 00295 typedef Teuchos::ScalarTraits<magnitude_type> STM; 00296 00297 TEUCHOS_TEST_FOR_EXCEPTION(beta < STM::zero(), std::invalid_argument, 00298 "ProjectedLeastSquaresProblem::reset: initial " 00299 "residual beta = " << beta << " < 0."); 00300 TEUCHOS_TEST_FOR_EXCEPTION(maxNumIterations <= 0, std::invalid_argument, 00301 "ProjectedLeastSquaresProblem::reset: maximum number " 00302 "of iterations " << maxNumIterations << " <= 0."); 00303 00304 if (H.numRows() < maxNumIterations+1 || H.numCols() < maxNumIterations) { 00305 const int errcode = H.reshape (maxNumIterations+1, maxNumIterations); 00306 TEUCHOS_TEST_FOR_EXCEPTION(errcode != 0, std::runtime_error, 00307 "Failed to reshape H into a " << (maxNumIterations+1) 00308 << " x " << maxNumIterations << " matrix."); 00309 } 00310 (void) H.putScalar (STS::zero()); 00311 00312 if (R.numRows() < maxNumIterations+1 || R.numCols() < maxNumIterations) { 00313 const int errcode = R.reshape (maxNumIterations+1, maxNumIterations); 00314 TEUCHOS_TEST_FOR_EXCEPTION(errcode != 0, std::runtime_error, 00315 "Failed to reshape R into a " << (maxNumIterations+1) 00316 << " x " << maxNumIterations << " matrix."); 00317 } 00318 (void) R.putScalar (STS::zero()); 00319 00320 if (y.numRows() < maxNumIterations+1 || y.numCols() < 1) { 00321 const int errcode = y.reshape (maxNumIterations+1, 1); 00322 TEUCHOS_TEST_FOR_EXCEPTION(errcode != 0, std::runtime_error, 00323 "Failed to reshape y into a " << (maxNumIterations+1) 00324 << " x " << 1 << " matrix."); 00325 } 00326 (void) y.putScalar (STS::zero()); 00327 00328 if (z.numRows() < maxNumIterations+1 || z.numCols() < 1) { 00329 const int errcode = z.reshape (maxNumIterations+1, 1); 00330 TEUCHOS_TEST_FOR_EXCEPTION(errcode != 0, std::runtime_error, 00331 "Failed to reshape z into a " << (maxNumIterations+1) 00332 << " x " << 1 << " matrix."); 00333 } 00334 reset (beta); 00335 } 00336 00337 }; 00338 00339 00347 template<class Scalar> 00348 class LocalDenseMatrixOps { 00349 public: 00352 typedef Scalar scalar_type; 00355 typedef typename Teuchos::ScalarTraits<Scalar>::magnitudeType magnitude_type; 00358 typedef Teuchos::SerialDenseMatrix<int,Scalar> mat_type; 00359 00360 private: 00361 typedef Teuchos::ScalarTraits<scalar_type> STS; 00362 typedef Teuchos::ScalarTraits<magnitude_type> STM; 00363 typedef Teuchos::BLAS<int, scalar_type> blas_type; 00364 typedef Teuchos::LAPACK<int, scalar_type> lapack_type; 00365 00366 public: 00368 void 00369 conjugateTranspose (mat_type& A_star, const mat_type& A) const 00370 { 00371 for (int i = 0; i < A.numRows(); ++i) { 00372 for (int j = 0; j < A.numCols(); ++j) { 00373 A_star(j,i) = STS::conjugate (A(i,j)); 00374 } 00375 } 00376 } 00377 00379 void 00380 conjugateTransposeOfUpperTriangular (mat_type& L, const mat_type& R) const 00381 { 00382 const int N = R.numCols(); 00383 00384 for (int j = 0; j < N; ++j) { 00385 for (int i = 0; i <= j; ++i) { 00386 L(j,i) = STS::conjugate (R(i,j)); 00387 } 00388 } 00389 } 00390 00392 void 00393 zeroOutStrictLowerTriangle (mat_type& A) const 00394 { 00395 const int N = std::min (A.numRows(), A.numCols()); 00396 00397 for (int j = 0; j < N; ++j) { 00398 for (int i = j+1; i < A.numRows(); ++i) { 00399 A(i,j) = STS::zero(); 00400 } 00401 } 00402 } 00403 00412 void 00413 partition (Teuchos::RCP<mat_type>& A_11, 00414 Teuchos::RCP<mat_type>& A_21, 00415 Teuchos::RCP<mat_type>& A_12, 00416 Teuchos::RCP<mat_type>& A_22, 00417 mat_type& A, 00418 const int numRows1, 00419 const int numRows2, 00420 const int numCols1, 00421 const int numCols2) 00422 { 00423 using Teuchos::rcp; 00424 using Teuchos::View; 00425 00426 A_11 = rcp (new mat_type (View, A, numRows1, numCols1, 0, 0)); 00427 A_21 = rcp (new mat_type (View, A, numRows2, numCols1, numRows1, 0)); 00428 A_12 = rcp (new mat_type (View, A, numRows1, numCols2, 0, numCols1)); 00429 A_22 = rcp (new mat_type (View, A, numRows2, numCols2, numRows1, numCols1)); 00430 } 00431 00433 void 00434 matScale (mat_type& A, const scalar_type& alpha) const 00435 { 00436 const int LDA = A.stride(); 00437 const int numRows = A.numRows(); 00438 const int numCols = A.numCols(); 00439 00440 if (numRows == 0 || numCols == 0) { 00441 return; 00442 } else { 00443 for (int j = 0; j < numCols; ++j) { 00444 scalar_type* const A_j = &A(0,j); 00445 00446 for (int i = 0; i < numRows; ++i) { 00447 A_j[i] *= alpha; 00448 } 00449 } 00450 } 00451 } 00452 00457 void 00458 axpy (mat_type& Y, 00459 const scalar_type& alpha, 00460 const mat_type& X) const 00461 { 00462 const int numRows = Y.numRows(); 00463 const int numCols = Y.numCols(); 00464 00465 TEUCHOS_TEST_FOR_EXCEPTION(numRows != X.numRows() || numCols != X.numCols(), 00466 std::invalid_argument, "Dimensions of X and Y don't " 00467 "match. X is " << X.numRows() << " x " << X.numCols() 00468 << ", and Y is " << numRows << " x " << numCols << "."); 00469 for (int j = 0; j < numCols; ++j) { 00470 for (int i = 0; i < numRows; ++i) { 00471 Y(i,j) += alpha * X(i,j); 00472 } 00473 } 00474 } 00475 00477 void 00478 matAdd (mat_type& A, const mat_type& B) const 00479 { 00480 const int LDA = A.stride(); 00481 const int LDB = B.stride(); 00482 const int numRows = A.numRows(); 00483 const int numCols = A.numCols(); 00484 00485 TEUCHOS_TEST_FOR_EXCEPTION(B.numRows() != numRows || B.numCols() != numCols, 00486 std::invalid_argument, 00487 "matAdd: The input matrices A and B have " 00488 "incompatible dimensions. A is " << numRows 00489 << " x " << numCols << ", but B is " << B.numRows() 00490 << " x " << B.numCols() << "."); 00491 if (numRows == 0 || numCols == 0) { 00492 return; 00493 } else { 00494 for (int j = 0; j < numCols; ++j) { 00495 scalar_type* const A_j = &A(0,j); 00496 const scalar_type* const B_j = &B(0,j); 00497 00498 for (int i = 0; i < numRows; ++i) { 00499 A_j[i] += B_j[i]; 00500 } 00501 } 00502 } 00503 } 00504 00506 void 00507 matSub (mat_type& A, const mat_type& B) const 00508 { 00509 const int LDA = A.stride(); 00510 const int LDB = B.stride(); 00511 const int numRows = A.numRows(); 00512 const int numCols = A.numCols(); 00513 00514 TEUCHOS_TEST_FOR_EXCEPTION(B.numRows() != numRows || B.numCols() != numCols, 00515 std::invalid_argument, 00516 "matSub: The input matrices A and B have " 00517 "incompatible dimensions. A is " << numRows 00518 << " x " << numCols << ", but B is " << B.numRows() 00519 << " x " << B.numCols() << "."); 00520 if (numRows == 0 || numCols == 0) { 00521 return; 00522 } else { 00523 for (int j = 0; j < numCols; ++j) { 00524 scalar_type* const A_j = &A(0,j); 00525 const scalar_type* const B_j = &B(0,j); 00526 00527 for (int i = 0; i < numRows; ++i) { 00528 A_j[i] -= B_j[i]; 00529 } 00530 } 00531 } 00532 } 00533 00538 void 00539 rightUpperTriSolve (mat_type& B, 00540 const mat_type& R) const 00541 { 00542 TEUCHOS_TEST_FOR_EXCEPTION(B.numCols() != R.numRows(), 00543 std::invalid_argument, 00544 "rightUpperTriSolve: R and B have incompatible " 00545 "dimensions. B has " << B.numCols() << " columns, " 00546 "but R has " << R.numRows() << " rows."); 00547 blas_type blas; 00548 blas.TRSM (Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, 00549 Teuchos::NO_TRANS, Teuchos::NON_UNIT_DIAG, 00550 R.numCols(), B.numCols(), 00551 STS::one(), R.values(), R.stride(), 00552 B.values(), B.stride()); 00553 } 00554 00560 void 00561 matMatMult (const scalar_type& beta, 00562 mat_type& C, 00563 const scalar_type& alpha, 00564 const mat_type& A, 00565 const mat_type& B) const 00566 { 00567 using Teuchos::NO_TRANS; 00568 00569 TEUCHOS_TEST_FOR_EXCEPTION(A.numCols() != B.numRows(), 00570 std::invalid_argument, 00571 "matMatMult: The input matrices A and B have " 00572 "incompatible dimensions. A is " << A.numRows() 00573 << " x " << A.numCols() << ", but B is " 00574 << B.numRows() << " x " << B.numCols() << "."); 00575 TEUCHOS_TEST_FOR_EXCEPTION(A.numRows() != C.numRows(), 00576 std::invalid_argument, 00577 "matMatMult: The input matrix A and the output " 00578 "matrix C have incompatible dimensions. A has " 00579 << A.numRows() << " rows, but C has " << C.numRows() 00580 << " rows."); 00581 TEUCHOS_TEST_FOR_EXCEPTION(B.numCols() != C.numCols(), 00582 std::invalid_argument, 00583 "matMatMult: The input matrix B and the output " 00584 "matrix C have incompatible dimensions. B has " 00585 << B.numCols() << " columns, but C has " 00586 << C.numCols() << " columns."); 00587 blas_type blas; 00588 blas.GEMM (NO_TRANS, NO_TRANS, C.numRows(), C.numCols(), A.numCols(), 00589 alpha, A.values(), A.stride(), B.values(), B.stride(), 00590 beta, C.values(), C.stride()); 00591 } 00592 00600 int 00601 infNaNCount (const mat_type& A, const bool upperTriangular=false) const 00602 { 00603 int count = 0; 00604 for (int j = 0; j < A.numCols(); ++j) { 00605 if (upperTriangular) { 00606 for (int i = 0; i <= j && i < A.numRows(); ++i) { 00607 if (STS::isnaninf (A(i,j))) { 00608 ++count; 00609 } 00610 } 00611 } else { 00612 for (int i = 0; i < A.numRows(); ++i) { 00613 if (STS::isnaninf (A(i,j))) { 00614 ++count; 00615 } 00616 } 00617 } 00618 } 00619 return count; 00620 } 00621 00627 std::pair<bool, std::pair<magnitude_type, magnitude_type> > 00628 isUpperTriangular (const mat_type& A) const 00629 { 00630 magnitude_type lowerTri = STM::zero(); 00631 magnitude_type upperTri = STM::zero(); 00632 int count = 0; 00633 00634 for (int j = 0; j < A.numCols(); ++j) { 00635 // Compute the Frobenius norm of the upper triangle / 00636 // trapezoid of A. The second clause of the loop upper 00637 // bound is for matrices with fewer rows than columns. 00638 for (int i = 0; i <= j && i < A.numRows(); ++i) { 00639 const magnitude_type A_ij_mag = STS::magnitude (A(i,j)); 00640 upperTri += A_ij_mag * A_ij_mag; 00641 } 00642 // Scan the strict lower triangle / trapezoid of A. 00643 for (int i = j+1; i < A.numRows(); ++i) { 00644 const magnitude_type A_ij_mag = STS::magnitude (A(i,j)); 00645 lowerTri += A_ij_mag * A_ij_mag; 00646 if (A_ij_mag != STM::zero()) { 00647 ++count; 00648 } 00649 } 00650 } 00651 return std::make_pair (count == 0, std::make_pair (lowerTri, upperTri)); 00652 } 00653 00654 00660 std::pair<bool, std::pair<magnitude_type, magnitude_type> > 00661 isUpperHessenberg (const mat_type& A) const 00662 { 00663 magnitude_type lower = STM::zero(); 00664 magnitude_type upper = STM::zero(); 00665 int count = 0; 00666 00667 for (int j = 0; j < A.numCols(); ++j) { 00668 // Compute the Frobenius norm of the upper Hessenberg part 00669 // of A. The second clause of the loop upper bound is for 00670 // matrices with fewer rows than columns. 00671 for (int i = 0; i <= j+1 && i < A.numRows(); ++i) { 00672 const magnitude_type A_ij_mag = STS::magnitude (A(i,j)); 00673 upper += A_ij_mag * A_ij_mag; 00674 } 00675 // Scan the strict lower part of A. 00676 for (int i = j+2; i < A.numRows(); ++i) { 00677 const magnitude_type A_ij_mag = STS::magnitude (A(i,j)); 00678 lower += A_ij_mag * A_ij_mag; 00679 if (A_ij_mag != STM::zero()) { 00680 ++count; 00681 } 00682 } 00683 } 00684 return std::make_pair (count == 0, std::make_pair (lower, upper)); 00685 } 00686 00693 void 00694 ensureUpperTriangular (const mat_type& A, 00695 const char* const matrixName) const 00696 { 00697 std::pair<bool, std::pair<magnitude_type, magnitude_type> > result = 00698 isUpperTriangular (A); 00699 00700 TEUCHOS_TEST_FOR_EXCEPTION(! result.first, std::invalid_argument, 00701 "The " << A.numRows() << " x " << A.numCols() 00702 << " matrix " << matrixName << " is not upper " 00703 "triangular. ||tril(A)||_F = " 00704 << result.second.first << " and ||A||_F = " 00705 << result.second.second << "."); 00706 } 00707 00714 void 00715 ensureUpperHessenberg (const mat_type& A, 00716 const char* const matrixName) const 00717 { 00718 std::pair<bool, std::pair<magnitude_type, magnitude_type> > result = 00719 isUpperHessenberg (A); 00720 00721 TEUCHOS_TEST_FOR_EXCEPTION(! result.first, std::invalid_argument, 00722 "The " << A.numRows() << " x " << A.numCols() 00723 << " matrix " << matrixName << " is not upper " 00724 "triangular. ||tril(A(2:end, :))||_F = " 00725 << result.second.first << " and ||A||_F = " 00726 << result.second.second << "."); 00727 } 00728 00744 void 00745 ensureUpperHessenberg (const mat_type& A, 00746 const char* const matrixName, 00747 const magnitude_type relativeTolerance) const 00748 { 00749 std::pair<bool, std::pair<magnitude_type, magnitude_type> > result = 00750 isUpperHessenberg (A); 00751 00752 if (result.first) { 00753 // Mollified relative departure from upper Hessenberg. 00754 const magnitude_type err = (result.second.second == STM::zero() ? 00755 result.second.first : 00756 result.second.first / result.second.second); 00757 TEUCHOS_TEST_FOR_EXCEPTION(err > relativeTolerance, std::invalid_argument, 00758 "The " << A.numRows() << " x " << A.numCols() 00759 << " matrix " << matrixName << " is not upper " 00760 "triangular. ||tril(A(2:end, :))||_F " 00761 << (result.second.second == STM::zero() ? "" : " / ||A||_F") 00762 << " = " << err << " > " << relativeTolerance << "."); 00763 } 00764 } 00765 00777 void 00778 ensureMinimumDimensions (const mat_type& A, 00779 const char* const matrixName, 00780 const int minNumRows, 00781 const int minNumCols) const 00782 { 00783 TEUCHOS_TEST_FOR_EXCEPTION(A.numRows() < minNumRows || A.numCols() < minNumCols, 00784 std::invalid_argument, 00785 "The matrix " << matrixName << " is " << A.numRows() 00786 << " x " << A.numCols() << ", and therefore does not " 00787 "satisfy the minimum dimensions " << minNumRows 00788 << " x " << minNumCols << "."); 00789 } 00790 00802 void 00803 ensureEqualDimensions (const mat_type& A, 00804 const char* const matrixName, 00805 const int numRows, 00806 const int numCols) const 00807 { 00808 TEUCHOS_TEST_FOR_EXCEPTION(A.numRows() != numRows || A.numCols() != numCols, 00809 std::invalid_argument, 00810 "The matrix " << matrixName << " is supposed to be " 00811 << numRows << " x " << numCols << ", but is " 00812 << A.numRows() << " x " << A.numCols() << " instead."); 00813 } 00814 00815 }; 00816 00835 enum ERobustness { 00836 ROBUSTNESS_NONE, 00837 ROBUSTNESS_SOME, 00838 ROBUSTNESS_LOTS, 00839 ROBUSTNESS_INVALID 00840 }; 00841 00843 std::string 00844 robustnessEnumToString (const ERobustness x) 00845 { 00846 const char* strings[] = {"None", "Some", "Lots"}; 00847 TEUCHOS_TEST_FOR_EXCEPTION(x < ROBUSTNESS_NONE || x >= ROBUSTNESS_INVALID, 00848 std::invalid_argument, 00849 "Invalid enum value " << x << "."); 00850 return std::string (strings[x]); 00851 } 00852 00854 ERobustness 00855 robustnessStringToEnum (const std::string& x) 00856 { 00857 const char* strings[] = {"None", "Some", "Lots"}; 00858 for (int r = 0; r < static_cast<int> (ROBUSTNESS_INVALID); ++r) { 00859 if (x == strings[r]) { 00860 return static_cast<ERobustness> (r); 00861 } 00862 } 00863 TEUCHOS_TEST_FOR_EXCEPTION(true, std::invalid_argument, 00864 "Invalid robustness string " << x << "."); 00865 } 00866 00877 Teuchos::RCP<Teuchos::ParameterEntryValidator> 00878 robustnessValidator () 00879 { 00880 using Teuchos::stringToIntegralParameterEntryValidator; 00881 00882 Teuchos::Array<std::string> strs (3); 00883 strs[0] = robustnessEnumToString (ROBUSTNESS_NONE); 00884 strs[1] = robustnessEnumToString (ROBUSTNESS_SOME); 00885 strs[2] = robustnessEnumToString (ROBUSTNESS_LOTS); 00886 Teuchos::Array<std::string> docs (3); 00887 docs[0] = "Use the BLAS' triangular solve. This may result in Inf or " 00888 "NaN output if the triangular matrix is rank deficient."; 00889 docs[1] = "Robustness somewhere between \"None\" and \"Lots\"."; 00890 docs[2] = "Solve the triangular system in a least-squares sense, using " 00891 "an SVD-based algorithm. This will always succeed, though the " 00892 "solution may not make sense for GMRES."; 00893 Teuchos::Array<ERobustness> ints (3); 00894 ints[0] = ROBUSTNESS_NONE; 00895 ints[1] = ROBUSTNESS_SOME; 00896 ints[2] = ROBUSTNESS_LOTS; 00897 const std::string pname ("Robustness of Projected Least-Squares Solve"); 00898 00899 return stringToIntegralParameterEntryValidator<ERobustness> (strs, docs, 00900 ints, pname); 00901 } 00902 00965 template<class Scalar> 00966 class ProjectedLeastSquaresSolver { 00967 public: 00974 typedef Scalar scalar_type; 00979 typedef typename Teuchos::ScalarTraits<Scalar>::magnitudeType magnitude_type; 00982 typedef Teuchos::SerialDenseMatrix<int,Scalar> mat_type; 00983 00984 private: 00985 typedef Teuchos::ScalarTraits<scalar_type> STS; 00986 typedef Teuchos::ScalarTraits<magnitude_type> STM; 00987 typedef Teuchos::BLAS<int, scalar_type> blas_type; 00988 typedef Teuchos::LAPACK<int, scalar_type> lapack_type; 00989 00990 public: 01004 ProjectedLeastSquaresSolver (std::ostream& warnStream, 01005 const ERobustness defaultRobustness=ROBUSTNESS_NONE) : 01006 warn_ (warnStream), 01007 defaultRobustness_ (defaultRobustness) 01008 {} 01009 01023 magnitude_type 01024 updateColumn (ProjectedLeastSquaresProblem<Scalar>& problem, 01025 const int curCol) 01026 { 01027 return updateColumnGivens (problem.H, problem.R, problem.y, problem.z, 01028 problem.theCosines, problem.theSines, curCol); 01029 } 01030 01045 magnitude_type 01046 updateColumns (ProjectedLeastSquaresProblem<Scalar>& problem, 01047 const int startCol, 01048 const int endCol) 01049 { 01050 return updateColumnsGivens (problem.H, problem.R, problem.y, problem.z, 01051 problem.theCosines, problem.theSines, 01052 startCol, endCol); 01053 } 01054 01067 void 01068 solve (ProjectedLeastSquaresProblem<Scalar>& problem, 01069 const int curCol) 01070 { 01071 solveGivens (problem.y, problem.R, problem.z, curCol); 01072 } 01073 01078 std::pair<int, bool> 01079 solveUpperTriangularSystem (Teuchos::ESide side, 01080 mat_type& X, 01081 const mat_type& R, 01082 const mat_type& B) 01083 { 01084 return solveUpperTriangularSystem (side, X, R, B, defaultRobustness_); 01085 } 01086 01125 std::pair<int, bool> 01126 solveUpperTriangularSystem (Teuchos::ESide side, 01127 mat_type& X, 01128 const mat_type& R, 01129 const mat_type& B, 01130 const ERobustness robustness) 01131 { 01132 TEUCHOS_TEST_FOR_EXCEPTION(X.numRows() != B.numRows(), std::invalid_argument, 01133 "The output X and right-hand side B have different " 01134 "numbers of rows. X has " << X.numRows() << " rows" 01135 ", and B has " << B.numRows() << " rows."); 01136 // If B has more columns than X, we ignore the remaining 01137 // columns of B when solving the upper triangular system. If 01138 // B has _fewer_ columns than X, we can't solve for all the 01139 // columns of X, so we throw an exception. 01140 TEUCHOS_TEST_FOR_EXCEPTION(X.numCols() > B.numCols(), std::invalid_argument, 01141 "The output X has more columns than the " 01142 "right-hand side B. X has " << X.numCols() 01143 << " columns and B has " << B.numCols() 01144 << " columns."); 01145 // See above explaining the number of columns in B_view. 01146 mat_type B_view (Teuchos::View, B, B.numRows(), X.numCols()); 01147 01148 // Both the BLAS' _TRSM and LAPACK's _LATRS overwrite the 01149 // right-hand side with the solution, so first copy B_view 01150 // into X. 01151 X.assign (B_view); 01152 01153 // Solve the upper triangular system. 01154 return solveUpperTriangularSystemInPlace (side, X, R, robustness); 01155 } 01156 01161 std::pair<int, bool> 01162 solveUpperTriangularSystemInPlace (Teuchos::ESide side, 01163 mat_type& X, 01164 const mat_type& R) 01165 { 01166 return solveUpperTriangularSystemInPlace (side, X, R, defaultRobustness_); 01167 } 01168 01176 std::pair<int, bool> 01177 solveUpperTriangularSystemInPlace (Teuchos::ESide side, 01178 mat_type& X, 01179 const mat_type& R, 01180 const ERobustness robustness) 01181 { 01182 using Teuchos::Array; 01183 using Teuchos::Copy; 01184 using Teuchos::LEFT_SIDE; 01185 using Teuchos::RIGHT_SIDE; 01186 LocalDenseMatrixOps<Scalar> ops; 01187 01188 const int M = R.numRows(); 01189 const int N = R.numCols(); 01190 TEUCHOS_TEST_FOR_EXCEPTION(M < N, std::invalid_argument, 01191 "The input matrix R has fewer columns than rows. " 01192 "R is " << M << " x " << N << "."); 01193 // Ignore any additional rows of R by working with a square view. 01194 mat_type R_view (Teuchos::View, R, N, N); 01195 01196 if (side == LEFT_SIDE) { 01197 TEUCHOS_TEST_FOR_EXCEPTION(X.numRows() < N, std::invalid_argument, 01198 "The input/output matrix X has only " 01199 << X.numRows() << " rows, but needs at least " 01200 << N << " rows to match the matrix for a " 01201 "left-side solve R \\ X."); 01202 } else if (side == RIGHT_SIDE) { 01203 TEUCHOS_TEST_FOR_EXCEPTION(X.numCols() < N, std::invalid_argument, 01204 "The input/output matrix X has only " 01205 << X.numCols() << " columns, but needs at least " 01206 << N << " columns to match the matrix for a " 01207 "right-side solve X / R."); 01208 } 01209 TEUCHOS_TEST_FOR_EXCEPTION(robustness < ROBUSTNESS_NONE || 01210 robustness >= ROBUSTNESS_INVALID, 01211 std::invalid_argument, 01212 "Invalid robustness value " << robustness << "."); 01213 01214 // In robust mode, scan the matrix and right-hand side(s) for 01215 // Infs and NaNs. Only look at the upper triangle of the 01216 // matrix. 01217 if (robustness > ROBUSTNESS_NONE) { 01218 int count = ops.infNaNCount (R_view, true); 01219 TEUCHOS_TEST_FOR_EXCEPTION(count > 0, std::runtime_error, 01220 "There " << (count != 1 ? "are" : "is") 01221 << " " << count << " Inf or NaN entr" 01222 << (count != 1 ? "ies" : "y") 01223 << " in the upper triangle of R."); 01224 count = ops.infNaNCount (X, false); 01225 TEUCHOS_TEST_FOR_EXCEPTION(count > 0, std::runtime_error, 01226 "There " << (count != 1 ? "are" : "is") 01227 << " " << count << " Inf or NaN entr" 01228 << (count != 1 ? "ies" : "y") << " in the " 01229 "right-hand side(s) X."); 01230 } 01231 01232 // Pair of values to return from this method. 01233 int rank = N; 01234 bool foundRankDeficiency = false; 01235 01236 // Solve for X. 01237 blas_type blas; 01238 01239 if (robustness == ROBUSTNESS_NONE) { 01240 // Fast triangular solve using the BLAS' _TRSM. This does 01241 // no checking for rank deficiency. 01242 blas.TRSM(side, Teuchos::UPPER_TRI, Teuchos::NO_TRANS, 01243 Teuchos::NON_UNIT_DIAG, X.numRows(), X.numCols(), 01244 STS::one(), R.values(), R.stride(), 01245 X.values(), X.stride()); 01246 } else if (robustness < ROBUSTNESS_INVALID) { 01247 // Save a copy of X, since X contains the right-hand side on 01248 // input. 01249 mat_type B (Copy, X, X.numRows(), X.numCols()); 01250 01251 // Fast triangular solve using the BLAS' _TRSM. This does 01252 // no checking for rank deficiency. 01253 blas.TRSM(side, Teuchos::UPPER_TRI, Teuchos::NO_TRANS, 01254 Teuchos::NON_UNIT_DIAG, X.numRows(), X.numCols(), 01255 STS::one(), R.values(), R.stride(), 01256 X.values(), X.stride()); 01257 01258 // Check for Infs or NaNs in X. If there are any, then 01259 // assume that TRSM failed, and use a more robust algorithm. 01260 if (ops.infNaNCount (X, false) != 0) { 01261 01262 warn_ << "Upper triangular solve: Found Infs and/or NaNs in the " 01263 "solution after using the fast algorithm. Retrying using a more " 01264 "robust algorithm." << std::endl; 01265 01266 // Restore X from the copy. 01267 X.assign (B); 01268 01269 // Find the minimum-norm solution to the least-squares 01270 // problem $\min_x \|RX - B\|_2$, using the singular value 01271 // decomposition (SVD). 01272 LocalDenseMatrixOps<Scalar> ops; 01273 if (side == LEFT_SIDE) { 01274 // _GELSS overwrites its matrix input, so make a copy. 01275 mat_type R_copy (Teuchos::Copy, R_view, N, N); 01276 01277 // Zero out the lower triangle of R_copy, since the 01278 // mat_type constructor copies all the entries, not just 01279 // the upper triangle. _GELSS will read all the entries 01280 // of the input matrix. 01281 ops.zeroOutStrictLowerTriangle (R_copy); 01282 01283 // Solve the least-squares problem. 01284 rank = solveLeastSquaresUsingSVD (R_copy, X); 01285 } else { 01286 // If solving with R on the right-hand side, the interface 01287 // requires that instead of solving $\min \|XR - B\|_2$, 01288 // we have to solve $\min \|R^* X^* - B^*\|_2$. We 01289 // compute (conjugate) transposes in newly allocated 01290 // temporary matrices X_star resp. R_star. (B is already 01291 // in X and _GELSS overwrites its input vector X with the 01292 // solution.) 01293 mat_type X_star (X.numCols(), X.numRows()); 01294 ops.conjugateTranspose (X_star, X); 01295 mat_type R_star (N, N); // Filled with zeros automatically. 01296 ops.conjugateTransposeOfUpperTriangular (R_star, R); 01297 01298 // Solve the least-squares problem. 01299 rank = solveLeastSquaresUsingSVD (R_star, X_star); 01300 01301 // Copy the transpose of X_star back into X. 01302 ops.conjugateTranspose (X, X_star); 01303 } 01304 if (rank < N) { 01305 foundRankDeficiency = true; 01306 } 01307 } 01308 } else { 01309 TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, 01310 "Should never get here! Invalid robustness value " 01311 << robustness << ". Please report this bug to the " 01312 "Belos developers."); 01313 } 01314 return std::make_pair (rank, foundRankDeficiency); 01315 } 01316 01317 01318 public: 01327 bool 01328 testGivensRotations (std::ostream& out) 01329 { 01330 using std::endl; 01331 01332 out << "Testing Givens rotations:" << endl; 01333 Scalar x = STS::random(); 01334 Scalar y = STS::random(); 01335 out << " x = " << x << ", y = " << y << endl; 01336 01337 Scalar theCosine, theSine, result; 01338 blas_type blas; 01339 computeGivensRotation (x, y, theCosine, theSine, result); 01340 out << "-- After computing rotation:" << endl; 01341 out << "---- cos,sin = " << theCosine << "," << theSine << endl; 01342 out << "---- x = " << x << ", y = " << y 01343 << ", result = " << result << endl; 01344 01345 blas.ROT (1, &x, 1, &y, 1, &theCosine, &theSine); 01346 out << "-- After applying rotation:" << endl; 01347 out << "---- cos,sin = " << theCosine << "," << theSine << endl; 01348 out << "---- x = " << x << ", y = " << y << endl; 01349 01350 // Allow only a tiny bit of wiggle room for zeroing-out of y. 01351 if (STS::magnitude(y) > 2*STS::eps()) 01352 return false; 01353 else 01354 return true; 01355 } 01356 01386 bool 01387 testUpdateColumn (std::ostream& out, 01388 const int numCols, 01389 const bool testBlockGivens=false, 01390 const bool extraVerbose=false) 01391 { 01392 using Teuchos::Array; 01393 using std::endl; 01394 01395 TEUCHOS_TEST_FOR_EXCEPTION(numCols <= 0, std::invalid_argument, 01396 "numCols = " << numCols << " <= 0."); 01397 const int numRows = numCols + 1; 01398 01399 mat_type H (numRows, numCols); 01400 mat_type z (numRows, 1); 01401 01402 mat_type R_givens (numRows, numCols); 01403 mat_type y_givens (numRows, 1); 01404 mat_type z_givens (numRows, 1); 01405 Array<Scalar> theCosines (numCols); 01406 Array<Scalar> theSines (numCols); 01407 01408 mat_type R_blockGivens (numRows, numCols); 01409 mat_type y_blockGivens (numRows, 1); 01410 mat_type z_blockGivens (numRows, 1); 01411 Array<Scalar> blockCosines (numCols); 01412 Array<Scalar> blockSines (numCols); 01413 const int panelWidth = std::min (3, numCols); 01414 01415 mat_type R_lapack (numRows, numCols); 01416 mat_type y_lapack (numRows, 1); 01417 mat_type z_lapack (numRows, 1); 01418 01419 // Make a random least-squares problem. 01420 makeRandomProblem (H, z); 01421 if (extraVerbose) { 01422 printMatrix<Scalar> (out, "H", H); 01423 printMatrix<Scalar> (out, "z", z); 01424 } 01425 01426 // Set up the right-hand side copies for each of the methods. 01427 // Each method is free to overwrite its given right-hand side. 01428 z_givens.assign (z); 01429 if (testBlockGivens) { 01430 z_blockGivens.assign (z); 01431 } 01432 z_lapack.assign (z); 01433 01434 // 01435 // Imitate how one would update the least-squares problem in a 01436 // typical GMRES implementation, for each updating method. 01437 // 01438 // Update using Givens rotations, one at a time. 01439 magnitude_type residualNormGivens = STM::zero(); 01440 for (int curCol = 0; curCol < numCols; ++curCol) { 01441 residualNormGivens = updateColumnGivens (H, R_givens, y_givens, z_givens, 01442 theCosines, theSines, curCol); 01443 } 01444 solveGivens (y_givens, R_givens, z_givens, numCols-1); 01445 01446 // Update using the "panel left-looking" Givens approach, with 01447 // the given panel width. 01448 magnitude_type residualNormBlockGivens = STM::zero(); 01449 if (testBlockGivens) { 01450 const bool testBlocksAtATime = true; 01451 if (testBlocksAtATime) { 01452 // Blocks of columns at a time. 01453 for (int startCol = 0; startCol < numCols; startCol += panelWidth) { 01454 int endCol = std::min (startCol + panelWidth - 1, numCols - 1); 01455 residualNormBlockGivens = 01456 updateColumnsGivens (H, R_blockGivens, y_blockGivens, z_blockGivens, 01457 blockCosines, blockSines, startCol, endCol); 01458 } 01459 } else { 01460 // One column at a time. This is good as a sanity check 01461 // to make sure updateColumnsGivens() with a single column 01462 // does the same thing as updateColumnGivens(). 01463 for (int startCol = 0; startCol < numCols; ++startCol) { 01464 residualNormBlockGivens = 01465 updateColumnsGivens (H, R_blockGivens, y_blockGivens, z_blockGivens, 01466 blockCosines, blockSines, startCol, startCol); 01467 } 01468 } 01469 // The panel version of Givens should compute the same 01470 // cosines and sines as the non-panel version, and should 01471 // update the right-hand side z in the same way. Thus, we 01472 // should be able to use the same triangular solver. 01473 solveGivens (y_blockGivens, R_blockGivens, z_blockGivens, numCols-1); 01474 } 01475 01476 // Solve using LAPACK's least-squares solver. 01477 const magnitude_type residualNormLapack = 01478 solveLapack (H, R_lapack, y_lapack, z_lapack, numCols-1); 01479 01480 // Compute the condition number of the least-squares problem. 01481 // This requires a residual, so use the residual from the 01482 // LAPACK method. All that the method needs for an accurate 01483 // residual norm is forward stability. 01484 const magnitude_type leastSquaresCondNum = 01485 leastSquaresConditionNumber (H, z, residualNormLapack); 01486 01487 // Compute the relative least-squares solution error for both 01488 // Givens methods. We assume that the LAPACK solution is 01489 // "exact" and compare against the Givens rotations solution. 01490 // This is taking liberties with the definition of condition 01491 // number, but it's the best we can do, since we don't know 01492 // the exact solution and don't have an extended-precision 01493 // solver. 01494 01495 // The solution lives only in y[0 .. numCols-1]. 01496 mat_type y_givens_view (Teuchos::View, y_givens, numCols, 1); 01497 mat_type y_blockGivens_view (Teuchos::View, y_blockGivens, numCols, 1); 01498 mat_type y_lapack_view (Teuchos::View, y_lapack, numCols, 1); 01499 01500 const magnitude_type givensSolutionError = 01501 solutionError (y_givens_view, y_lapack_view); 01502 const magnitude_type blockGivensSolutionError = testBlockGivens ? 01503 solutionError (y_blockGivens_view, y_lapack_view) : 01504 STM::zero(); 01505 01506 // If printing out the matrices, copy out the upper triangular 01507 // factors for printing. (Both methods are free to leave data 01508 // below the lower triangle.) 01509 if (extraVerbose) { 01510 mat_type R_factorFromGivens (numCols, numCols); 01511 mat_type R_factorFromBlockGivens (numCols, numCols); 01512 mat_type R_factorFromLapack (numCols, numCols); 01513 01514 for (int j = 0; j < numCols; ++j) { 01515 for (int i = 0; i <= j; ++i) { 01516 R_factorFromGivens(i,j) = R_givens(i,j); 01517 if (testBlockGivens) { 01518 R_factorFromBlockGivens(i,j) = R_blockGivens(i,j); 01519 } 01520 R_factorFromLapack(i,j) = R_lapack(i,j); 01521 } 01522 } 01523 01524 printMatrix<Scalar> (out, "R_givens", R_factorFromGivens); 01525 printMatrix<Scalar> (out, "y_givens", y_givens_view); 01526 printMatrix<Scalar> (out, "z_givens", z_givens); 01527 01528 if (testBlockGivens) { 01529 printMatrix<Scalar> (out, "R_blockGivens", R_factorFromBlockGivens); 01530 printMatrix<Scalar> (out, "y_blockGivens", y_blockGivens_view); 01531 printMatrix<Scalar> (out, "z_blockGivens", z_blockGivens); 01532 } 01533 01534 printMatrix<Scalar> (out, "R_lapack", R_factorFromLapack); 01535 printMatrix<Scalar> (out, "y_lapack", y_lapack_view); 01536 printMatrix<Scalar> (out, "z_lapack", z_lapack); 01537 } 01538 01539 // Compute the (Frobenius) norm of the original matrix H. 01540 const magnitude_type H_norm = H.normFrobenius(); 01541 01542 out << "||H||_F = " << H_norm << endl; 01543 01544 out << "||H y_givens - z||_2 / ||H||_F = " 01545 << leastSquaresResidualNorm (H, y_givens_view, z) / H_norm << endl; 01546 if (testBlockGivens) { 01547 out << "||H y_blockGivens - z||_2 / ||H||_F = " 01548 << leastSquaresResidualNorm (H, y_blockGivens_view, z) / H_norm << endl; 01549 } 01550 out << "||H y_lapack - z||_2 / ||H||_F = " 01551 << leastSquaresResidualNorm (H, y_lapack_view, z) / H_norm << endl; 01552 01553 out << "||y_givens - y_lapack||_2 / ||y_lapack||_2 = " 01554 << givensSolutionError << endl; 01555 if (testBlockGivens) { 01556 out << "||y_blockGivens - y_lapack||_2 / ||y_lapack||_2 = " 01557 << blockGivensSolutionError << endl; 01558 } 01559 01560 out << "Least-squares condition number = " 01561 << leastSquaresCondNum << endl; 01562 01563 // Now for the controversial part of the test: judging whether 01564 // we succeeded. This includes the problem's condition 01565 // number, which is a measure of the maximum perturbation in 01566 // the solution for which we can still say that the solution 01567 // is valid. We include a little wiggle room by including a 01568 // factor proportional to the square root of the number of 01569 // floating-point operations that influence the last entry 01570 // (the conventional Wilkinsonian heuristic), times 10 for 01571 // good measure. 01572 // 01573 // (The square root looks like it has something to do with an 01574 // average-case probabilistic argument, but doesn't really. 01575 // What's an "average problem"?) 01576 const magnitude_type wiggleFactor = 01577 10 * STM::squareroot( numRows*numCols ); 01578 const magnitude_type solutionErrorBoundFactor = 01579 wiggleFactor * leastSquaresCondNum; 01580 const magnitude_type solutionErrorBound = 01581 solutionErrorBoundFactor * STS::eps(); 01582 out << "Solution error bound: " << solutionErrorBoundFactor 01583 << " * eps = " << solutionErrorBound << endl; 01584 01585 // Remember that NaN is not greater than, not less than, and 01586 // not equal to any other number, including itself. Some 01587 // compilers will rudely optimize away the "x != x" test. 01588 if (STM::isnaninf (solutionErrorBound)) { 01589 // Hm, the solution error bound is Inf or NaN. This 01590 // probably means that the test problem was generated 01591 // incorrectly. We should return false in this case. 01592 return false; 01593 } else { // solution error bound is finite. 01594 if (STM::isnaninf (givensSolutionError)) { 01595 return false; 01596 } else if (givensSolutionError > solutionErrorBound) { 01597 return false; 01598 } else if (testBlockGivens) { 01599 if (STM::isnaninf (blockGivensSolutionError)) { 01600 return false; 01601 } else if (blockGivensSolutionError > solutionErrorBound) { 01602 return false; 01603 } else { // Givens and Block Givens tests succeeded. 01604 return true; 01605 } 01606 } else { // Not testing block Givens; Givens test succeeded. 01607 return true; 01608 } 01609 } 01610 } 01611 01627 bool 01628 testTriangularSolves (std::ostream& out, 01629 const int testProblemSize, 01630 const ERobustness robustness, 01631 const bool verbose=false) 01632 { 01633 using Teuchos::LEFT_SIDE; 01634 using Teuchos::RIGHT_SIDE; 01635 using std::endl; 01636 typedef Teuchos::SerialDenseMatrix<int, scalar_type> mat_type; 01637 01638 Teuchos::oblackholestream blackHole; 01639 std::ostream& verboseOut = verbose ? out : blackHole; 01640 01641 verboseOut << "Testing upper triangular solves" << endl; 01642 // 01643 // Construct an upper triangular linear system to solve. 01644 // 01645 verboseOut << "-- Generating test matrix" << endl; 01646 const int N = testProblemSize; 01647 mat_type R (N, N); 01648 // Fill the upper triangle of R with random numbers. 01649 for (int j = 0; j < N; ++j) { 01650 for (int i = 0; i <= j; ++i) { 01651 R(i,j) = STS::random (); 01652 } 01653 } 01654 // Compute the Frobenius norm of R for later use. 01655 const magnitude_type R_norm = R.normFrobenius (); 01656 // Fill the right-hand side B with random numbers. 01657 mat_type B (N, 1); 01658 B.random (); 01659 // Compute the Frobenius norm of B for later use. 01660 const magnitude_type B_norm = B.normFrobenius (); 01661 01662 // Save a copy of the original upper triangular system. 01663 mat_type R_copy (Teuchos::Copy, R, N, N); 01664 mat_type B_copy (Teuchos::Copy, B, N, 1); 01665 01666 // Solution vector. 01667 mat_type X (N, 1); 01668 01669 // Solve RX = B. 01670 verboseOut << "-- Solving RX=B" << endl; 01671 // We're ignoring the return values for now. 01672 (void) solveUpperTriangularSystem (LEFT_SIDE, X, R, B, robustness); 01673 // Test the residual error. 01674 mat_type Resid (N, 1); 01675 Resid.assign (B_copy); 01676 Belos::details::LocalDenseMatrixOps<scalar_type> ops; 01677 ops.matMatMult (STS::one(), Resid, -STS::one(), R_copy, X); 01678 verboseOut << "---- ||R*X - B||_F = " << Resid.normFrobenius() << endl; 01679 verboseOut << "---- ||R||_F ||X||_F + ||B||_F = " 01680 << (R_norm * X.normFrobenius() + B_norm) 01681 << endl; 01682 01683 // Restore R and B. 01684 R.assign (R_copy); 01685 B.assign (B_copy); 01686 01687 // 01688 // Set up a right-side test problem: YR = B^*. 01689 // 01690 mat_type Y (1, N); 01691 mat_type B_star (1, N); 01692 ops.conjugateTranspose (B_star, B); 01693 mat_type B_star_copy (1, N); 01694 B_star_copy.assign (B_star); 01695 // Solve YR = B^*. 01696 verboseOut << "-- Solving YR=B^*" << endl; 01697 // We're ignoring the return values for now. 01698 (void) solveUpperTriangularSystem (RIGHT_SIDE, Y, R, B_star, robustness); 01699 // Test the residual error. 01700 mat_type Resid2 (1, N); 01701 Resid2.assign (B_star_copy); 01702 ops.matMatMult (STS::one(), Resid2, -STS::one(), Y, R_copy); 01703 verboseOut << "---- ||Y*R - B^*||_F = " << Resid2.normFrobenius() << endl; 01704 verboseOut << "---- ||Y||_F ||R||_F + ||B^*||_F = " 01705 << (Y.normFrobenius() * R_norm + B_norm) 01706 << endl; 01707 01708 // FIXME (mfh 14 Oct 2011) The test always "passes" for now; 01709 // you have to inspect the output in order to see whether it 01710 // succeeded. We really should fix the above to use the 01711 // infinity-norm bounds in Higham's book for triangular 01712 // solves. That would automate the test. 01713 return true; 01714 } 01715 01716 private: 01718 std::ostream& warn_; 01719 01724 ERobustness defaultRobustness_; 01725 01726 private: 01738 int 01739 solveLeastSquaresUsingSVD (mat_type& A, mat_type& X) 01740 { 01741 using Teuchos::Array; 01742 LocalDenseMatrixOps<Scalar> ops; 01743 01744 if (defaultRobustness_ > ROBUSTNESS_SOME) { 01745 int count = ops.infNaNCount (A); 01746 TEUCHOS_TEST_FOR_EXCEPTION(count != 0, std::invalid_argument, 01747 "solveLeastSquaresUsingSVD: The input matrix A " 01748 "contains " << count << "Inf and/or NaN entr" 01749 << (count != 1 ? "ies" : "y") << "."); 01750 count = ops.infNaNCount (X); 01751 TEUCHOS_TEST_FOR_EXCEPTION(count != 0, std::invalid_argument, 01752 "solveLeastSquaresUsingSVD: The input matrix X " 01753 "contains " << count << "Inf and/or NaN entr" 01754 << (count != 1 ? "ies" : "y") << "."); 01755 } 01756 const int N = std::min (A.numRows(), A.numCols()); 01757 lapack_type lapack; 01758 01759 // Rank of A; to be computed by _GELSS and returned. 01760 int rank = N; 01761 01762 // Use Scalar's machine precision for the rank tolerance, 01763 // not magnitude_type's machine precision. 01764 const magnitude_type rankTolerance = STS::eps(); 01765 01766 // Array of singular values. 01767 Array<magnitude_type> singularValues (N); 01768 01769 // Extra workspace. This is only used by _GELSS if Scalar is 01770 // complex. Teuchos::LAPACK presents a unified interface to 01771 // _GELSS that always includes the RWORK argument, even though 01772 // LAPACK's SGELSS and DGELSS don't have the RWORK argument. 01773 // We always allocate at least one entry so that &rwork[0] 01774 // makes sense. 01775 Array<magnitude_type> rwork (1); 01776 if (STS::isComplex) { 01777 rwork.resize (std::max (1, 5 * N)); 01778 } 01779 // 01780 // Workspace query 01781 // 01782 Scalar lworkScalar = STS::one(); // To be set by workspace query 01783 int info = 0; 01784 lapack.GELSS (A.numRows(), A.numCols(), X.numCols(), 01785 A.values(), A.stride(), X.values(), X.stride(), 01786 &singularValues[0], rankTolerance, &rank, 01787 &lworkScalar, -1, &rwork[0], &info); 01788 TEUCHOS_TEST_FOR_EXCEPTION(info != 0, std::logic_error, 01789 "_GELSS workspace query returned INFO = " 01790 << info << " != 0."); 01791 const int lwork = static_cast<int> (STS::real (lworkScalar)); 01792 TEUCHOS_TEST_FOR_EXCEPTION(lwork < 0, std::logic_error, 01793 "_GELSS workspace query returned LWORK = " 01794 << lwork << " < 0."); 01795 // Allocate workspace. Size > 0 means &work[0] makes sense. 01796 Array<Scalar> work (std::max (1, lwork)); 01797 // Solve the least-squares problem. 01798 lapack.GELSS (A.numRows(), A.numCols(), X.numCols(), 01799 A.values(), A.stride(), X.values(), X.stride(), 01800 &singularValues[0], rankTolerance, &rank, 01801 &work[0], lwork, &rwork[0], &info); 01802 TEUCHOS_TEST_FOR_EXCEPTION(info != 0, std::runtime_error, 01803 "_GELSS returned INFO = " << info << " != 0."); 01804 return rank; 01805 } 01806 01813 void 01814 solveGivens (mat_type& y, 01815 mat_type& R, 01816 const mat_type& z, 01817 const int curCol) 01818 { 01819 const int numRows = curCol + 2; 01820 01821 // Now that we have the updated R factor of H, and the updated 01822 // right-hand side z, solve the least-squares problem by 01823 // solving the upper triangular linear system Ry=z for y. 01824 const mat_type R_view (Teuchos::View, R, numRows-1, numRows-1); 01825 const mat_type z_view (Teuchos::View, z, numRows-1, z.numCols()); 01826 mat_type y_view (Teuchos::View, y, numRows-1, y.numCols()); 01827 01828 (void) solveUpperTriangularSystem (Teuchos::LEFT_SIDE, y_view, 01829 R_view, z_view, defaultRobustness_); 01830 } 01831 01833 void 01834 makeRandomProblem (mat_type& H, mat_type& z) 01835 { 01836 // In GMRES, z always starts out with only the first entry 01837 // being nonzero. That entry always has nonnegative real part 01838 // and zero imaginary part, since it is the initial residual 01839 // norm. 01840 H.random (); 01841 // Zero out the entries below the subdiagonal of H, so that it 01842 // is upper Hessenberg. 01843 for (int j = 0; j < H.numCols(); ++j) { 01844 for (int i = j+2; i < H.numRows(); ++i) { 01845 H(i,j) = STS::zero(); 01846 } 01847 } 01848 // Initialize z, the right-hand side of the least-squares 01849 // problem. Make the first entry of z nonzero. 01850 { 01851 // It's still possible that a random number will come up 01852 // zero after 1000 trials, but unlikely. Nevertheless, it's 01853 // still important not to allow an infinite loop, for 01854 // example if the pseudorandom number generator is broken 01855 // and always returns zero. 01856 const int numTrials = 1000; 01857 magnitude_type z_init = STM::zero(); 01858 for (int trial = 0; trial < numTrials && z_init == STM::zero(); ++trial) { 01859 z_init = STM::random(); 01860 } 01861 TEUCHOS_TEST_FOR_EXCEPTION(z_init == STM::zero(), std::runtime_error, 01862 "After " << numTrials << " trial" 01863 << (numTrials != 1 ? "s" : "") 01864 << ", we were unable to generate a nonzero pseudo" 01865 "random real number. This most likely indicates a " 01866 "broken pseudorandom number generator."); 01867 const magnitude_type z_first = (z_init < 0) ? -z_init : z_init; 01868 01869 // NOTE I'm assuming here that "scalar_type = magnitude_type" 01870 // assignments make sense. 01871 z(0,0) = z_first; 01872 } 01873 } 01874 01878 void 01879 computeGivensRotation (const Scalar& x, 01880 const Scalar& y, 01881 Scalar& theCosine, 01882 Scalar& theSine, 01883 Scalar& result) 01884 { 01885 // _LARTG, an LAPACK aux routine, is slower but more accurate 01886 // than the BLAS' _ROTG. 01887 const bool useLartg = false; 01888 01889 if (useLartg) { 01890 lapack_type lapack; 01891 // _LARTG doesn't clobber its input arguments x and y. 01892 lapack.LARTG (x, y, &theCosine, &theSine, &result); 01893 } else { 01894 // _ROTG clobbers its first two arguments. x is overwritten 01895 // with the result of applying the Givens rotation: [x; y] -> 01896 // [x (on output); 0]. y is overwritten with the "fast" 01897 // Givens transform (see Golub and Van Loan, 3rd ed.). 01898 Scalar x_temp = x; 01899 Scalar y_temp = y; 01900 blas_type blas; 01901 blas.ROTG (&x_temp, &y_temp, &theCosine, &theSine); 01902 result = x_temp; 01903 } 01904 } 01905 01907 void 01908 singularValues (const mat_type& A, 01909 Teuchos::ArrayView<magnitude_type> sigmas) 01910 { 01911 using Teuchos::Array; 01912 using Teuchos::ArrayView; 01913 01914 const int numRows = A.numRows(); 01915 const int numCols = A.numCols(); 01916 TEUCHOS_TEST_FOR_EXCEPTION(sigmas.size() < std::min (numRows, numCols), 01917 std::invalid_argument, 01918 "The sigmas array is only of length " << sigmas.size() 01919 << ", but must be of length at least " 01920 << std::min (numRows, numCols) 01921 << " in order to hold all the singular values of the " 01922 "matrix A."); 01923 01924 // Compute the condition number of the matrix A, using a singular 01925 // value decomposition (SVD). LAPACK's SVD routine overwrites the 01926 // input matrix, so make a copy. 01927 mat_type A_copy (numRows, numCols); 01928 A_copy.assign (A); 01929 01930 // Workspace query. 01931 lapack_type lapack; 01932 int info = 0; 01933 Scalar lworkScalar = STS::zero(); 01934 Array<magnitude_type> rwork (std::max (std::min (numRows, numCols) - 1, 1)); 01935 lapack.GESVD ('N', 'N', numRows, numCols, 01936 A_copy.values(), A_copy.stride(), &sigmas[0], 01937 (Scalar*) NULL, 1, (Scalar*) NULL, 1, 01938 &lworkScalar, -1, &rwork[0], &info); 01939 01940 TEUCHOS_TEST_FOR_EXCEPTION(info != 0, std::logic_error, 01941 "LAPACK _GESVD workspace query failed with INFO = " 01942 << info << "."); 01943 const int lwork = static_cast<int> (STS::real (lworkScalar)); 01944 TEUCHOS_TEST_FOR_EXCEPTION(lwork < 0, std::logic_error, 01945 "LAPACK _GESVD workspace query returned LWORK = " 01946 << lwork << " < 0."); 01947 // Make sure that the workspace array always has positive 01948 // length, so that &work[0] makes sense. 01949 Teuchos::Array<Scalar> work (std::max (1, lwork)); 01950 01951 // Compute the singular values of A. 01952 lapack.GESVD ('N', 'N', numRows, numCols, 01953 A_copy.values(), A_copy.stride(), &sigmas[0], 01954 (Scalar*) NULL, 1, (Scalar*) NULL, 1, 01955 &work[0], lwork, &rwork[0], &info); 01956 TEUCHOS_TEST_FOR_EXCEPTION(info != 0, std::logic_error, 01957 "LAPACK _GESVD failed with INFO = " << info << "."); 01958 } 01959 01967 std::pair<magnitude_type, magnitude_type> 01968 extremeSingularValues (const mat_type& A) 01969 { 01970 using Teuchos::Array; 01971 01972 const int numRows = A.numRows(); 01973 const int numCols = A.numCols(); 01974 01975 Array<magnitude_type> sigmas (std::min (numRows, numCols)); 01976 singularValues (A, sigmas); 01977 return std::make_pair (sigmas[0], sigmas[std::min(numRows, numCols) - 1]); 01978 } 01979 01990 magnitude_type 01991 leastSquaresConditionNumber (const mat_type& A, 01992 const mat_type& b, 01993 const magnitude_type& residualNorm) 01994 { 01995 // Extreme singular values of A. 01996 const std::pair<magnitude_type, magnitude_type> sigmaMaxMin = 01997 extremeSingularValues (A); 01998 01999 // Our solvers currently assume that H has full rank. If the 02000 // test matrix doesn't have full rank, we stop right away. 02001 TEUCHOS_TEST_FOR_EXCEPTION(sigmaMaxMin.second == STM::zero(), std::runtime_error, 02002 "The test matrix is rank deficient; LAPACK's _GESVD " 02003 "routine reports that its smallest singular value is " 02004 "zero."); 02005 // 2-norm condition number of A. We checked above that the 02006 // denominator is nonzero. 02007 const magnitude_type A_cond = sigmaMaxMin.first / sigmaMaxMin.second; 02008 02009 // "Theta" in the variable names below refers to the angle between 02010 // the vectors b and A*x, where x is the computed solution. It 02011 // measures whether the residual norm is large (near ||b||) or 02012 // small (near 0). 02013 const magnitude_type sinTheta = residualNorm / b.normFrobenius(); 02014 02015 // \sin^2 \theta + \cos^2 \theta = 1. 02016 // 02017 // The range of sine is [-1,1], so squaring it won't overflow. 02018 // We still have to check whether sinTheta > 1, though. This 02019 // is impossible in exact arithmetic, assuming that the 02020 // least-squares solver worked (b-A*0 = b and x minimizes 02021 // ||b-A*x||_2, so ||b-A*0||_2 >= ||b-A*x||_2). However, it 02022 // might just be possible in floating-point arithmetic. We're 02023 // just looking for an estimate, so if sinTheta > 1, we cap it 02024 // at 1. 02025 const magnitude_type cosTheta = (sinTheta > STM::one()) ? 02026 STM::zero() : STM::squareroot (1 - sinTheta * sinTheta); 02027 02028 // This may result in Inf, if cosTheta is zero. That's OK; in 02029 // that case, the condition number of the (full-rank) 02030 // least-squares problem is rightfully infinite. 02031 const magnitude_type tanTheta = sinTheta / cosTheta; 02032 02033 // Condition number for the full-rank least-squares problem. 02034 return 2 * A_cond / cosTheta + tanTheta * A_cond * A_cond; 02035 } 02036 02038 magnitude_type 02039 leastSquaresResidualNorm (const mat_type& A, 02040 const mat_type& x, 02041 const mat_type& b) 02042 { 02043 mat_type r (b.numRows(), b.numCols()); 02044 02045 // r := b - A*x 02046 r.assign (b); 02047 LocalDenseMatrixOps<Scalar> ops; 02048 ops.matMatMult (STS::one(), r, -STS::one(), A, x); 02049 return r.normFrobenius (); 02050 } 02051 02056 magnitude_type 02057 solutionError (const mat_type& x_approx, 02058 const mat_type& x_exact) 02059 { 02060 const int numRows = x_exact.numRows(); 02061 const int numCols = x_exact.numCols(); 02062 02063 mat_type x_diff (numRows, numCols); 02064 for (int j = 0; j < numCols; ++j) { 02065 for (int i = 0; i < numRows; ++i) { 02066 x_diff(i,j) = x_exact(i,j) - x_approx(i,j); 02067 } 02068 } 02069 const magnitude_type scalingFactor = x_exact.normFrobenius(); 02070 02071 // If x_exact has zero norm, just use the absolute difference. 02072 return x_diff.normFrobenius() / 02073 (scalingFactor == STM::zero() ? STM::one() : scalingFactor); 02074 } 02075 02122 magnitude_type 02123 updateColumnGivens (const mat_type& H, 02124 mat_type& R, 02125 mat_type& y, 02126 mat_type& z, 02127 Teuchos::ArrayView<scalar_type> theCosines, 02128 Teuchos::ArrayView<scalar_type> theSines, 02129 const int curCol) 02130 { 02131 using std::cerr; 02132 using std::endl; 02133 02134 const int numRows = curCol + 2; // curCol is zero-based 02135 const int LDR = R.stride(); 02136 const bool extraDebug = false; 02137 02138 if (extraDebug) { 02139 cerr << "updateColumnGivens: curCol = " << curCol << endl; 02140 } 02141 02142 // View of H( 1:curCol+1, curCol ) (in Matlab notation, if 02143 // curCol were a one-based index, as it would be in Matlab but 02144 // is not here). 02145 const mat_type H_col (Teuchos::View, H, numRows, 1, 0, curCol); 02146 02147 // View of R( 1:curCol+1, curCol ) (again, in Matlab notation, 02148 // if curCol were a one-based index). 02149 mat_type R_col (Teuchos::View, R, numRows, 1, 0, curCol); 02150 02151 // 1. Copy the current column from H into R, where it will be 02152 // modified. 02153 R_col.assign (H_col); 02154 02155 if (extraDebug) { 02156 printMatrix<Scalar> (cerr, "R_col before ", R_col); 02157 } 02158 02159 // 2. Apply all the previous Givens rotations, if any, to the 02160 // current column of the matrix. 02161 blas_type blas; 02162 for (int j = 0; j < curCol; ++j) { 02163 // BLAS::ROT really should take "const Scalar*" for these 02164 // arguments, but it wants a "Scalar*" instead, alas. 02165 Scalar theCosine = theCosines[j]; 02166 Scalar theSine = theSines[j]; 02167 02168 if (extraDebug) { 02169 cerr << " j = " << j << ": (cos,sin) = " 02170 << theCosines[j] << "," << theSines[j] << endl; 02171 } 02172 blas.ROT (1, &R_col(j,0), LDR, &R_col(j+1,0), LDR, 02173 &theCosine, &theSine); 02174 } 02175 if (extraDebug && curCol > 0) { 02176 printMatrix<Scalar> (cerr, "R_col after applying previous " 02177 "Givens rotations", R_col); 02178 } 02179 02180 // 3. Calculate new Givens rotation for R(curCol, curCol), 02181 // R(curCol+1, curCol). 02182 Scalar theCosine, theSine, result; 02183 computeGivensRotation (R_col(curCol,0), R_col(curCol+1,0), 02184 theCosine, theSine, result); 02185 theCosines[curCol] = theCosine; 02186 theSines[curCol] = theSine; 02187 02188 if (extraDebug) { 02189 cerr << " New cos,sin = " << theCosine << "," << theSine << endl; 02190 } 02191 02192 // 4. _Apply_ the new Givens rotation. We don't need to 02193 // invoke _ROT here, because computeGivensRotation() 02194 // already gives us the result: [x; y] -> [result; 0]. 02195 R_col(curCol, 0) = result; 02196 R_col(curCol+1, 0) = STS::zero(); 02197 02198 if (extraDebug) { 02199 printMatrix<Scalar> (cerr, "R_col after applying current " 02200 "Givens rotation", R_col); 02201 } 02202 02203 // 5. Apply the resulting Givens rotation to z (the right-hand 02204 // side of the projected least-squares problem). 02205 // 02206 // We prefer overgeneralization to undergeneralization by assuming 02207 // here that z may have more than one column. 02208 const int LDZ = z.stride(); 02209 blas.ROT (z.numCols(), 02210 &z(curCol,0), LDZ, &z(curCol+1,0), LDZ, 02211 &theCosine, &theSine); 02212 02213 if (extraDebug) { 02214 //mat_type R_after (Teuchos::View, R, numRows, numRows-1); 02215 //printMatrix<Scalar> (cerr, "R_after", R_after); 02216 //mat_type z_after (Teuchos::View, z, numRows, z.numCols()); 02217 printMatrix<Scalar> (cerr, "z_after", z); 02218 } 02219 02220 // The last entry of z is the nonzero part of the residual of the 02221 // least-squares problem. Its magnitude gives the residual 2-norm 02222 // of the least-squares problem. 02223 return STS::magnitude( z(numRows-1, 0) ); 02224 } 02225 02259 magnitude_type 02260 solveLapack (const mat_type& H, 02261 mat_type& R, 02262 mat_type& y, 02263 mat_type& z, 02264 const int curCol) 02265 { 02266 const int numRows = curCol + 2; 02267 const int numCols = curCol + 1; 02268 const int LDR = R.stride(); 02269 02270 // Copy H( 1:curCol+1, 1:curCol ) into R( 1:curCol+1, 1:curCol ). 02271 const mat_type H_view (Teuchos::View, H, numRows, numCols); 02272 mat_type R_view (Teuchos::View, R, numRows, numCols); 02273 R_view.assign (H_view); 02274 02275 // The LAPACK least-squares solver overwrites the right-hand side 02276 // vector with the solution, so first copy z into y. 02277 mat_type y_view (Teuchos::View, y, numRows, y.numCols()); 02278 mat_type z_view (Teuchos::View, z, numRows, y.numCols()); 02279 y_view.assign (z_view); 02280 02281 // Workspace query for the least-squares routine. 02282 int info = 0; 02283 Scalar lworkScalar = STS::zero(); 02284 lapack_type lapack; 02285 lapack.GELS ('N', numRows, numCols, y_view.numCols(), 02286 NULL, LDR, NULL, y_view.stride(), 02287 &lworkScalar, -1, &info); 02288 TEUCHOS_TEST_FOR_EXCEPTION(info != 0, std::logic_error, 02289 "LAPACK _GELS workspace query failed with INFO = " 02290 << info << ", for a " << numRows << " x " << numCols 02291 << " matrix with " << y_view.numCols() 02292 << " right hand side" 02293 << ((y_view.numCols() != 1) ? "s" : "") << "."); 02294 TEUCHOS_TEST_FOR_EXCEPTION(STS::real(lworkScalar) < STM::zero(), 02295 std::logic_error, 02296 "LAPACK _GELS workspace query returned an LWORK with " 02297 "negative real part: LWORK = " << lworkScalar 02298 << ". That should never happen. Please report this " 02299 "to the Belos developers."); 02300 TEUCHOS_TEST_FOR_EXCEPTION(STS::isComplex && STS::imag(lworkScalar) != STM::zero(), 02301 std::logic_error, 02302 "LAPACK _GELS workspace query returned an LWORK with " 02303 "nonzero imaginary part: LWORK = " << lworkScalar 02304 << ". That should never happen. Please report this " 02305 "to the Belos developers."); 02306 // Cast workspace from Scalar to int. Scalar may be complex, 02307 // hence the request for the real part. Don't ask for the 02308 // magnitude, since computing the magnitude may overflow due 02309 // to squaring and square root to int. Hopefully LAPACK 02310 // doesn't ever overflow int this way. 02311 const int lwork = std::max (1, static_cast<int> (STS::real (lworkScalar))); 02312 02313 // Allocate workspace for solving the least-squares problem. 02314 Teuchos::Array<Scalar> work (lwork); 02315 02316 // Solve the least-squares problem. The ?: operator prevents 02317 // accessing the first element of the work array, if it has 02318 // length zero. 02319 lapack.GELS ('N', numRows, numCols, y_view.numCols(), 02320 R_view.values(), R_view.stride(), 02321 y_view.values(), y_view.stride(), 02322 (lwork > 0 ? &work[0] : (Scalar*) NULL), 02323 lwork, &info); 02324 02325 TEUCHOS_TEST_FOR_EXCEPTION(info != 0, std::logic_error, 02326 "Solving projected least-squares problem with LAPACK " 02327 "_GELS failed with INFO = " << info << ", for a " 02328 << numRows << " x " << numCols << " matrix with " 02329 << y_view.numCols() << " right hand side" 02330 << (y_view.numCols() != 1 ? "s" : "") << "."); 02331 // Extract the projected least-squares problem's residual error. 02332 // It's the magnitude of the last entry of y_view on output from 02333 // LAPACK's least-squares solver. 02334 return STS::magnitude( y_view(numRows-1, 0) ); 02335 } 02336 02347 magnitude_type 02348 updateColumnsGivens (const mat_type& H, 02349 mat_type& R, 02350 mat_type& y, 02351 mat_type& z, 02352 Teuchos::ArrayView<scalar_type> theCosines, 02353 Teuchos::ArrayView<scalar_type> theSines, 02354 const int startCol, 02355 const int endCol) 02356 { 02357 TEUCHOS_TEST_FOR_EXCEPTION(startCol > endCol, std::invalid_argument, 02358 "updateColumnGivens: startCol = " << startCol 02359 << " > endCol = " << endCol << "."); 02360 magnitude_type lastResult = STM::zero(); 02361 // [startCol, endCol] is an inclusive range. 02362 for (int curCol = startCol; curCol <= endCol; ++curCol) { 02363 lastResult = updateColumnGivens (H, R, y, z, theCosines, theSines, curCol); 02364 } 02365 return lastResult; 02366 } 02367 02380 magnitude_type 02381 updateColumnsGivensBlock (const mat_type& H, 02382 mat_type& R, 02383 mat_type& y, 02384 mat_type& z, 02385 Teuchos::ArrayView<scalar_type> theCosines, 02386 Teuchos::ArrayView<scalar_type> theSines, 02387 const int startCol, 02388 const int endCol) 02389 { 02390 const int numRows = endCol + 2; 02391 const int numColsToUpdate = endCol - startCol + 1; 02392 const int LDR = R.stride(); 02393 02394 // 1. Copy columns [startCol, endCol] from H into R, where they 02395 // will be modified. 02396 { 02397 const mat_type H_view (Teuchos::View, H, numRows, numColsToUpdate, 0, startCol); 02398 mat_type R_view (Teuchos::View, R, numRows, numColsToUpdate, 0, startCol); 02399 R_view.assign (H_view); 02400 } 02401 02402 // 2. Apply all the previous Givens rotations, if any, to 02403 // columns [startCol, endCol] of the matrix. (Remember 02404 // that we're using a left-looking QR factorization 02405 // approach; we haven't yet touched those columns.) 02406 blas_type blas; 02407 for (int j = 0; j < startCol; ++j) { 02408 blas.ROT (numColsToUpdate, 02409 &R(j, startCol), LDR, &R(j+1, startCol), LDR, 02410 &theCosines[j], &theSines[j]); 02411 } 02412 02413 // 3. Update each column in turn of columns [startCol, endCol]. 02414 for (int curCol = startCol; curCol < endCol; ++curCol) { 02415 // a. Apply the Givens rotations computed in previous 02416 // iterations of this loop to the current column of R. 02417 for (int j = startCol; j < curCol; ++j) { 02418 blas.ROT (1, &R(j, curCol), LDR, &R(j+1, curCol), LDR, 02419 &theCosines[j], &theSines[j]); 02420 } 02421 // b. Calculate new Givens rotation for R(curCol, curCol), 02422 // R(curCol+1, curCol). 02423 Scalar theCosine, theSine, result; 02424 computeGivensRotation (R(curCol, curCol), R(curCol+1, curCol), 02425 theCosine, theSine, result); 02426 theCosines[curCol] = theCosine; 02427 theSines[curCol] = theSine; 02428 02429 // c. _Apply_ the new Givens rotation. We don't need to 02430 // invoke _ROT here, because computeGivensRotation() 02431 // already gives us the result: [x; y] -> [result; 0]. 02432 R(curCol+1, curCol) = result; 02433 R(curCol+1, curCol) = STS::zero(); 02434 02435 // d. Apply the resulting Givens rotation to z (the right-hand 02436 // side of the projected least-squares problem). 02437 // 02438 // We prefer overgeneralization to undergeneralization by 02439 // assuming here that z may have more than one column. 02440 const int LDZ = z.stride(); 02441 blas.ROT (z.numCols(), 02442 &z(curCol,0), LDZ, &z(curCol+1,0), LDZ, 02443 &theCosine, &theSine); 02444 } 02445 02446 // The last entry of z is the nonzero part of the residual of the 02447 // least-squares problem. Its magnitude gives the residual 2-norm 02448 // of the least-squares problem. 02449 return STS::magnitude( z(numRows-1, 0) ); 02450 } 02451 }; // class ProjectedLeastSquaresSolver 02452 } // namespace details 02453 } // namespace Belos 02454 02455 #endif // __Belos_ProjectedLeastSquaresSolver_hpp
1.7.4