Kokkos Node API and Local Linear Algebra Kernels Version of the Day
Tsqr_NodeTsqr.hpp
Go to the documentation of this file.
00001 //@HEADER
00002 // ************************************************************************
00003 // 
00004 //          Kokkos: Node API and Parallel Node Kernels
00005 //              Copyright (2009) Sandia Corporation
00006 // 
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 // 
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //  
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //  
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00025 // 
00026 // ************************************************************************
00027 //@HEADER
00028 
00032 #ifndef __TSQR_Tsqr_NodeTsqr_hpp
00033 #define __TSQR_Tsqr_NodeTsqr_hpp
00034 
00035 #include <Tsqr_ApplyType.hpp>
00036 #include <Tsqr_Lapack.hpp>
00037 #include <Tsqr_Matrix.hpp>
00038 
00039 #include <Teuchos_Describable.hpp>
00040 #include <Teuchos_ScalarTraits.hpp>
00041 #include <Teuchos_TypeNameTraits.hpp>
00042 
00043 #include <vector>
00044 
00045 namespace TSQR {
00046 
00081   template<class Ordinal, class Scalar, class FactorOutputType>
00082   class NodeTsqr : public Teuchos::Describable {
00083   public:
00084     typedef Ordinal ordinal_type;
00085     typedef Scalar scalar_type;
00086     typedef FactorOutputType factor_output_type;
00087 
00089     NodeTsqr() {}
00090 
00092     virtual ~NodeTsqr() {}
00093 
00098     virtual size_t TEUCHOS_DEPRECATED cache_block_size() const = 0;
00099 
00101     virtual size_t cache_size_hint() const = 0;
00102 
00111     virtual std::string description () const {
00112       using Teuchos::TypeNameTraits;
00113       std::ostringstream os;
00114       os << "NodeTsqr<Ordinal=" << TypeNameTraits<Ordinal>::name()
00115    << ", Scalar=" << TypeNameTraits<Scalar>::name()
00116    << ", ...>: Intranode Tall Skinny QR (TSQR), with cache size hint " 
00117    << cache_size_hint();
00118       return os.str();
00119     }
00120 
00152     virtual factor_output_type
00153     factor (const Ordinal nrows,
00154       const Ordinal ncols, 
00155       Scalar A[],
00156       const Ordinal lda,
00157       Scalar R[],
00158       const Ordinal ldr,
00159       const bool contiguousCacheBlocks) const = 0;
00160 
00187     virtual void
00188     apply (const ApplyType& applyType,
00189      const Ordinal nrows,
00190      const Ordinal ncols_Q,
00191      const Scalar Q[],
00192      const Ordinal ldq,
00193      const FactorOutputType& factorOutput,
00194      const Ordinal ncols_C,
00195      Scalar C[],
00196      const Ordinal ldc,
00197      const bool contiguousCacheBlocks) const = 0;
00198 
00227     virtual void
00228     explicit_Q (const Ordinal nrows,
00229     const Ordinal ncols_Q,
00230     const Scalar Q[],
00231     const Ordinal ldq,
00232     const factor_output_type& factorOutput,
00233     const Ordinal ncols_C,
00234     Scalar C[],
00235     const Ordinal ldc,
00236     const bool contiguousCacheBlocks) const = 0;
00237 
00248     virtual void
00249     cache_block (const Ordinal nrows,
00250      const Ordinal ncols, 
00251      Scalar A_out[],
00252      const Scalar A_in[],
00253      const Ordinal lda_in) const = 0;
00254 
00271     virtual void
00272     un_cache_block (const Ordinal nrows,
00273         const Ordinal ncols,
00274         Scalar A_out[],
00275         const Ordinal lda_out,        
00276         const Scalar A_in[]) const = 0;
00277 
00282     virtual void
00283     Q_times_B (const Ordinal nrows,
00284          const Ordinal ncols,
00285          Scalar Q[],
00286          const Ordinal ldq,
00287          const Scalar B[],
00288          const Ordinal ldb,
00289          const bool contiguousCacheBlocks) const = 0;
00290 
00303     virtual void
00304     fill_with_zeros (const Ordinal nrows,
00305          const Ordinal ncols,
00306          Scalar A[],
00307          const Ordinal lda, 
00308          const bool contiguousCacheBlocks) const = 0;
00309     
00310   protected:
00311 
00328     virtual ConstMatView<Ordinal, Scalar>
00329     const_top_block (const ConstMatView<Ordinal, Scalar>& C,
00330          const bool contiguousCacheBlocks) const = 0;
00331 
00332   public:
00333 
00354     template<class MatrixViewType>
00355     MatrixViewType
00356     top_block (const MatrixViewType& C, 
00357          const bool contiguous_cache_blocks) const 
00358     {
00359       // The *_top_block() methods don't actually modify the data, so
00360       // it's safe to handle the matrix's data as const within this
00361       // method.  The only cast from const to nonconst may be in the
00362       // return value, but there it's legitimate since we're just
00363       // using the same constness as C has.
00364       ConstMatView<Ordinal, Scalar> C_view (C.nrows(), C.ncols(), 
00365               C.get(), C.lda());
00366       ConstMatView<Ordinal, Scalar> C_top = 
00367   const_top_block (C_view, contiguous_cache_blocks);
00368       TEST_FOR_EXCEPTION(C_top.nrows() < C_top.ncols(), std::logic_error,
00369        "The subclass of NodeTsqr has a bug in const_top_block"
00370        "(); it returned a block with fewer rows than columns "
00371        "(" << C_top.nrows() << " rows and " << C_top.ncols() 
00372        << " columns).  Please report this bug to the Kokkos "
00373        "developers.");
00374       typedef typename MatrixViewType::pointer_type ptr_type;
00375       return MatrixViewType (C_top.nrows(), C_top.ncols(), 
00376            const_cast<ptr_type> (C_top.get()), 
00377            C_top.lda());
00378     }
00379 
00392     virtual bool 
00393     QR_produces_R_factor_with_nonnegative_diagonal () const = 0;
00394 
00416     Ordinal
00417     reveal_R_rank (const Ordinal ncols,
00418        Scalar R[],
00419        const Ordinal ldr,
00420        Scalar U[],
00421        const Ordinal ldu,
00422        const typename Teuchos::ScalarTraits<Scalar>::magnitudeType tol) const;
00423 
00435     Ordinal
00436     reveal_rank (const Ordinal nrows,
00437      const Ordinal ncols,
00438      Scalar Q[],
00439      const Ordinal ldq,
00440      Scalar R[],
00441      const Ordinal ldr,
00442      const typename Teuchos::ScalarTraits<Scalar>::magnitudeType tol,
00443      const bool contiguousCacheBlocks) const;
00444   };
00445 
00446 
00447   template<class Ordinal, class Scalar, class FactorOutputType>
00448   Ordinal
00449   NodeTsqr<Ordinal, Scalar, FactorOutputType>::
00450   reveal_R_rank (const Ordinal ncols,
00451      Scalar R[],
00452      const Ordinal ldr,
00453      Scalar U[],
00454      const Ordinal ldu,
00455      const typename Teuchos::ScalarTraits<Scalar>::magnitudeType tol) const
00456   {
00457     using Teuchos::TypeNameTraits;
00458     typedef Teuchos::ScalarTraits<scalar_type> STS;
00459     typedef typename STS::magnitudeType magnitude_type;
00460     typedef Teuchos::ScalarTraits<magnitude_type> STM;
00461 
00462     TEST_FOR_EXCEPTION(tol < 0, std::invalid_argument,
00463            "In NodeTsqr::reveal_R_rank: numerical rank tolerance "
00464            "(tol = " << tol << ") is negative.");
00465     TEST_FOR_EXCEPTION(ncols < 0, std::invalid_argument,
00466            "In NodeTsqr::reveal_R_rank: number of columns "
00467            "(ncols = " << ncols << ") is negative.");
00468     TEST_FOR_EXCEPTION(ldr < ncols, std::invalid_argument,
00469            "In NodeTsqr::reveal_R_ank: stride of R (ldr = " 
00470            << ldr << ") is less than the number of columns "
00471            "(ncols = " << ncols << ").");
00472     TEST_FOR_EXCEPTION(ldu < ncols, std::invalid_argument,
00473            "In NodeTsqr::reveal_R_rank: stride of U (ldu = " 
00474            << ldu << ") is less than the number of columns "
00475            "(ncols = " << ncols << ")");
00476 
00477     // Zero columns always means rank zero.
00478     if (ncols == 0)
00479       return 0;
00480     //
00481     // Compute the SVD (singular value decomposition) of the R
00482     // factor, using LAPACK's GESVD routine.  We do so in a deep
00483     // copy (B) because LAPACK overwrites the input.  If the R
00484     // factor is full rank (expected to be the common case), we need
00485     // to leave it alone (so that it stays upper triangular).
00486     //
00487     LAPACK<Ordinal, Scalar> lapack;
00488     MatView<Ordinal, Scalar> R_view (ncols, ncols, R, ldr);
00489     Matrix<Ordinal, Scalar> B (R_view); // B := R (deep copy)
00490     MatView<Ordinal, Scalar> U_view (ncols, ncols, U, ldu);
00491     Matrix<Ordinal, Scalar> VT (ncols, ncols, Scalar(0));
00492 
00493     // Set up workspace for the SVD.
00494     std::vector<magnitude_type> svd_rwork (5*ncols);
00495     std::vector<magnitude_type> singular_values (ncols);
00496     Ordinal svd_lwork = -1; // -1 for LWORK query; will be changed
00497     int svd_info = 0;
00498 
00499     // LAPACK workspace ("LWORK") query for SVD.  The workspace
00500     // ("WORK") array is always of Scalar type, even in the complex
00501     // case.
00502     {
00503       // Exception messages in this scope all start with this.
00504       const char prefix[] = "In NodeTsqr::reveal_R_rank: LAPACK SVD (_GESVD) "
00505   "workspace query returned ";
00506       // std::logic_error messages in this scope all end with this.
00507       const char postfix[] = ".  Please report this bug to the Kokkos "
00508   "developers.";
00509 
00510       Scalar svd_lwork_scalar = Teuchos::ScalarTraits<Scalar>::zero();
00511       lapack.GESVD ("A", "A", ncols, ncols, B.get(), B.lda(), 
00512         &singular_values[0], U_view.get(), U_view.lda(), 
00513         VT.get(), VT.lda(), &svd_lwork_scalar, svd_lwork, 
00514         &svd_rwork[0], &svd_info);
00515       // Failure of the LAPACK workspace query is a logic error (a
00516       // bug) because we have already validated the matrix
00517       // dimensions above.
00518       TEST_FOR_EXCEPTION(svd_info != 0, std::logic_error,
00519        prefix << "a nonzero INFO = " << svd_info 
00520        << postfix);
00521       // LAPACK returns the workspace array length as a Scalar.  We
00522       // have to convert it back to an Ordinal in order to allocate
00523       // the workspace array and pass it in to LAPACK as the LWORK
00524       // argument.  Ordinal definitely must be a signed type, since
00525       // LWORK = -1 indicates a workspace query.  If Scalar is
00526       // complex, LAPACK had better return something with a zero
00527       // imaginary part, since I can't allocate imaginary amounts of
00528       // memory!  (Take the real part to avoid rounding error, since
00529       // magnitude() may be implemented using a square root always...)
00530       svd_lwork = static_cast<Ordinal> (Teuchos::ScalarTraits<Scalar>::real (svd_lwork_scalar));
00531 
00532       // LAPACK should always return an LWORK that fits in Ordinal,
00533       // but it's a good idea to check anyway.  We do so by checking
00534       // whether casting back from Ordinal to Scalar gives the same
00535       // original Scalar result.  This should work unless Scalar and
00536       // Ordinal are user-defined types with weird definitions of
00537       // the type casts.
00538       TEST_FOR_EXCEPTION(static_cast<Scalar> (svd_lwork) != svd_lwork_scalar,
00539        std::logic_error,
00540        prefix << "a workspace array length (LWORK) of type "
00541        "Scalar=" << TypeNameTraits<Scalar>::name() 
00542        << " that does not fit in an Ordinal=" 
00543        << TypeNameTraits<Ordinal>::name() << " type.  "
00544        "As a Scalar, LWORK=" << svd_lwork_scalar 
00545        << ", but cast to Ordinal, LWORK=" << svd_lwork 
00546        << postfix);
00547       // Make sure svd_lwork is nonnegative.  (Ordinal must be a
00548       // signed type, as we explain above, so this test should never
00549       // signal any unsigned-to-signed conversions from the compiler.
00550       // If it does, you're probably using the wrong Ordinal type.
00551       TEST_FOR_EXCEPTION(svd_lwork < 0, std::logic_error,
00552        prefix << "a negative workspace array length (LWORK)"
00553        " = " << svd_lwork << postfix);
00554     }
00555     // Allocate workspace for LAPACK's SVD routine.
00556     std::vector<Scalar> svd_work (svd_lwork);
00557 
00558     // Compute SVD $B := U \Sigma V^*$.  B is overwritten, which is
00559     // why we copied R into B (so that we don't overwrite R if R is
00560     // full rank).
00561     lapack.GESVD ("A", "A", ncols, ncols, B.get(), B.lda(), 
00562       &singular_values[0], U_view.get(), U_view.lda(), 
00563       VT.get(), VT.lda(), &svd_work[0], svd_lwork, 
00564       &svd_rwork[0], &svd_info);
00565     //
00566     // Compute the numerical rank of B, using the given relative
00567     // tolerance and the computed singular values.  GESVD computes
00568     // singular values in decreasing order and they are all
00569     // nonnegative.  We know by now that ncols > 0.
00570     //
00571     // The tolerance "tol" is relative to the largest singular
00572     // value, which is the 2-norm of the matrix.
00573     const magnitude_type absolute_tolerance = tol * singular_values[0];
00574     Ordinal rank = ncols; // "innocent unless proven guilty"
00575     for (Ordinal k = 0; k < ncols; ++k)
00576       // First branch of the IF ensures correctness even if all the
00577       // singular values are zero and the absolute tolerance is
00578       // zero.  Recall that LAPACK sorts the singular values in
00579       // decreasing order.
00580       if (singular_values[k] == STM::zero() || 
00581     singular_values[k] < absolute_tolerance)
00582   {
00583     rank = k;
00584     break;
00585   }
00586     // Don't modify Q or R, if R is full rank.
00587     if (rank < ncols)
00588       { //
00589   // R is not full rank.  
00590   //
00591   // 1. Compute \f$R := \Sigma V^*\f$.
00592   // 2. Return rank (0 <= rank < ncols).
00593   //
00594   // Compute R := \Sigma VT.  \Sigma is diagonal so we apply it
00595   // column by column (normally one would think of this as row by
00596   // row, but this "Hadamard product" formulation iterates more
00597   // efficiently over VT).  
00598   //
00599   // After this computation, R may no longer be upper triangular.
00600   // R may be zero if all the singular values are zero, but we
00601   // don't need to check for this case; it's rare in practice, and
00602   // the computations below will be correct regardless.
00603   for (Ordinal j = 0; j < ncols; ++j)
00604     {
00605       const Scalar* const VT_j = &VT(0,j);
00606       Scalar* const R_j = &R_view(0,j);
00607 
00608       for (Ordinal i = 0; i < ncols; ++i)
00609         R_j[i] = singular_values[i] * VT_j[i];
00610     }
00611       }
00612     return rank;
00613   }
00614 
00615   template<class Ordinal, class Scalar, class FactorOutputType>
00616   Ordinal
00617   NodeTsqr<Ordinal, Scalar, FactorOutputType>::
00618   reveal_rank (const Ordinal nrows,
00619          const Ordinal ncols,
00620          Scalar Q[],
00621          const Ordinal ldq,
00622          Scalar R[],
00623          const Ordinal ldr,
00624          const typename Teuchos::ScalarTraits<Scalar>::magnitudeType tol,
00625          const bool contiguousCacheBlocks) const
00626   {
00627     // Take the easy exit if available.
00628     if (ncols == 0)
00629       return 0;
00630     // Matrix to hold the left singular vectors of the R factor.
00631     Matrix<Ordinal, Scalar> U (ncols, ncols, Scalar(0));
00632     // Compute numerical rank of the R factor using the SVD.
00633     // Store the left singular vectors in U.
00634     const Ordinal rank = 
00635       reveal_R_rank (ncols, R, ldr, U.get(), U.ldu(), tol);
00636 
00637     // If R is full rank, we're done.  Otherwise, reveal_R_rank()
00638     // already computed the SVD \f$R = U \Sigma V^*\f$ of (the
00639     // input) R, and overwrote R with \f$\Sigma V^*\f$.  Now, we
00640     // compute \f$Q := Q \cdot U\f$, respecting cache blocks of Q.
00641     if (rank < ncols)
00642       Q_times_B (nrows, ncols, Q, ldq, U.get(), U.lda(), 
00643      contiguousCacheBlocks);
00644     return rank;
00645   }
00646 
00647 } // namespace TSQR
00648 
00649 
00650 #endif // __TSQR_Tsqr_NodeTsqr_hpp
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends