Kokkos Node API and Local Linear Algebra Kernels Version of the Day
Tsqr_NodeTsqr.hpp
Go to the documentation of this file.
00001 //@HEADER
00002 // ************************************************************************
00003 // 
00004 //          Kokkos: Node API and Parallel Node Kernels
00005 //              Copyright (2008) Sandia Corporation
00006 // 
00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00008 // the U.S. Government retains certain rights in this software.
00009 // 
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00038 // 
00039 // ************************************************************************
00040 //@HEADER
00041 
00045 #ifndef __TSQR_Tsqr_NodeTsqr_hpp
00046 #define __TSQR_Tsqr_NodeTsqr_hpp
00047 
00048 #include <Tsqr_ApplyType.hpp>
00049 #include <Tsqr_Lapack.hpp>
00050 #include <Tsqr_Matrix.hpp>
00051 
00052 #include <Teuchos_Describable.hpp>
00053 #include <Teuchos_ScalarTraits.hpp>
00054 #include <Teuchos_TypeNameTraits.hpp>
00055 
00056 #include <vector>
00057 
00058 namespace TSQR {
00059 
00094   template<class Ordinal, class Scalar, class FactorOutputType>
00095   class NodeTsqr : public Teuchos::Describable {
00096   public:
00097     typedef Ordinal ordinal_type;
00098     typedef Scalar scalar_type;
00099     typedef FactorOutputType factor_output_type;
00100 
00102     NodeTsqr() {}
00103 
00105     virtual ~NodeTsqr() {}
00106 
00114     virtual bool ready() const = 0;
00115 
00120     virtual size_t TEUCHOS_DEPRECATED cache_block_size() const = 0;
00121 
00123     virtual size_t cache_size_hint() const = 0;
00124 
00133     virtual std::string description () const {
00134       using Teuchos::TypeNameTraits;
00135       std::ostringstream os;
00136       os << "NodeTsqr<Ordinal=" << TypeNameTraits<Ordinal>::name()
00137    << ", Scalar=" << TypeNameTraits<Scalar>::name()
00138    << ", ...>: Intranode Tall Skinny QR (TSQR), with cache size hint " 
00139    << cache_size_hint();
00140       return os.str();
00141     }
00142 
00174     virtual factor_output_type
00175     factor (const Ordinal nrows,
00176       const Ordinal ncols, 
00177       Scalar A[],
00178       const Ordinal lda,
00179       Scalar R[],
00180       const Ordinal ldr,
00181       const bool contiguousCacheBlocks) const = 0;
00182 
00209     virtual void
00210     apply (const ApplyType& applyType,
00211      const Ordinal nrows,
00212      const Ordinal ncols_Q,
00213      const Scalar Q[],
00214      const Ordinal ldq,
00215      const FactorOutputType& factorOutput,
00216      const Ordinal ncols_C,
00217      Scalar C[],
00218      const Ordinal ldc,
00219      const bool contiguousCacheBlocks) const = 0;
00220 
00249     virtual void
00250     explicit_Q (const Ordinal nrows,
00251     const Ordinal ncols_Q,
00252     const Scalar Q[],
00253     const Ordinal ldq,
00254     const factor_output_type& factorOutput,
00255     const Ordinal ncols_C,
00256     Scalar C[],
00257     const Ordinal ldc,
00258     const bool contiguousCacheBlocks) const = 0;
00259 
00270     virtual void
00271     cache_block (const Ordinal nrows,
00272      const Ordinal ncols, 
00273      Scalar A_out[],
00274      const Scalar A_in[],
00275      const Ordinal lda_in) const = 0;
00276 
00293     virtual void
00294     un_cache_block (const Ordinal nrows,
00295         const Ordinal ncols,
00296         Scalar A_out[],
00297         const Ordinal lda_out,        
00298         const Scalar A_in[]) const = 0;
00299 
00304     virtual void
00305     Q_times_B (const Ordinal nrows,
00306          const Ordinal ncols,
00307          Scalar Q[],
00308          const Ordinal ldq,
00309          const Scalar B[],
00310          const Ordinal ldb,
00311          const bool contiguousCacheBlocks) const = 0;
00312 
00325     virtual void
00326     fill_with_zeros (const Ordinal nrows,
00327          const Ordinal ncols,
00328          Scalar A[],
00329          const Ordinal lda, 
00330          const bool contiguousCacheBlocks) const = 0;
00331     
00332   protected:
00333 
00350     virtual ConstMatView<Ordinal, Scalar>
00351     const_top_block (const ConstMatView<Ordinal, Scalar>& C,
00352          const bool contiguousCacheBlocks) const = 0;
00353 
00354   public:
00355 
00376     template<class MatrixViewType>
00377     MatrixViewType
00378     top_block (const MatrixViewType& C, 
00379          const bool contiguous_cache_blocks) const 
00380     {
00381       // The *_top_block() methods don't actually modify the data, so
00382       // it's safe to handle the matrix's data as const within this
00383       // method.  The only cast from const to nonconst may be in the
00384       // return value, but there it's legitimate since we're just
00385       // using the same constness as C has.
00386       ConstMatView<Ordinal, Scalar> C_view (C.nrows(), C.ncols(), 
00387               C.get(), C.lda());
00388       ConstMatView<Ordinal, Scalar> C_top = 
00389   const_top_block (C_view, contiguous_cache_blocks);
00390       TEUCHOS_TEST_FOR_EXCEPTION(C_top.nrows() < C_top.ncols(), std::logic_error,
00391        "The subclass of NodeTsqr has a bug in const_top_block"
00392        "(); it returned a block with fewer rows than columns "
00393        "(" << C_top.nrows() << " rows and " << C_top.ncols() 
00394        << " columns).  Please report this bug to the Kokkos "
00395        "developers.");
00396       typedef typename MatrixViewType::pointer_type ptr_type;
00397       return MatrixViewType (C_top.nrows(), C_top.ncols(), 
00398            const_cast<ptr_type> (C_top.get()), 
00399            C_top.lda());
00400     }
00401 
00414     virtual bool 
00415     QR_produces_R_factor_with_nonnegative_diagonal () const = 0;
00416 
00438     Ordinal
00439     reveal_R_rank (const Ordinal ncols,
00440        Scalar R[],
00441        const Ordinal ldr,
00442        Scalar U[],
00443        const Ordinal ldu,
00444        const typename Teuchos::ScalarTraits<Scalar>::magnitudeType tol) const;
00445 
00457     Ordinal
00458     reveal_rank (const Ordinal nrows,
00459      const Ordinal ncols,
00460      Scalar Q[],
00461      const Ordinal ldq,
00462      Scalar R[],
00463      const Ordinal ldr,
00464      const typename Teuchos::ScalarTraits<Scalar>::magnitudeType tol,
00465      const bool contiguousCacheBlocks) const;
00466   };
00467 
00468 
00469   template<class Ordinal, class Scalar, class FactorOutputType>
00470   Ordinal
00471   NodeTsqr<Ordinal, Scalar, FactorOutputType>::
00472   reveal_R_rank (const Ordinal ncols,
00473      Scalar R[],
00474      const Ordinal ldr,
00475      Scalar U[],
00476      const Ordinal ldu,
00477      const typename Teuchos::ScalarTraits<Scalar>::magnitudeType tol) const
00478   {
00479     using Teuchos::TypeNameTraits;
00480     typedef Teuchos::ScalarTraits<scalar_type> STS;
00481     typedef typename STS::magnitudeType magnitude_type;
00482     typedef Teuchos::ScalarTraits<magnitude_type> STM;
00483 
00484     TEUCHOS_TEST_FOR_EXCEPTION(tol < 0, std::invalid_argument,
00485            "In NodeTsqr::reveal_R_rank: numerical rank tolerance "
00486            "(tol = " << tol << ") is negative.");
00487     TEUCHOS_TEST_FOR_EXCEPTION(ncols < 0, std::invalid_argument,
00488            "In NodeTsqr::reveal_R_rank: number of columns "
00489            "(ncols = " << ncols << ") is negative.");
00490     TEUCHOS_TEST_FOR_EXCEPTION(ldr < ncols, std::invalid_argument,
00491            "In NodeTsqr::reveal_R_ank: stride of R (ldr = " 
00492            << ldr << ") is less than the number of columns "
00493            "(ncols = " << ncols << ").");
00494     TEUCHOS_TEST_FOR_EXCEPTION(ldu < ncols, std::invalid_argument,
00495            "In NodeTsqr::reveal_R_rank: stride of U (ldu = " 
00496            << ldu << ") is less than the number of columns "
00497            "(ncols = " << ncols << ")");
00498 
00499     // Zero columns always means rank zero.
00500     if (ncols == 0)
00501       return 0;
00502     //
00503     // Compute the SVD (singular value decomposition) of the R
00504     // factor, using LAPACK's GESVD routine.  We do so in a deep
00505     // copy (B) because LAPACK overwrites the input.  If the R
00506     // factor is full rank (expected to be the common case), we need
00507     // to leave it alone (so that it stays upper triangular).
00508     //
00509     LAPACK<Ordinal, Scalar> lapack;
00510     MatView<Ordinal, Scalar> R_view (ncols, ncols, R, ldr);
00511     Matrix<Ordinal, Scalar> B (R_view); // B := R (deep copy)
00512     MatView<Ordinal, Scalar> U_view (ncols, ncols, U, ldu);
00513     Matrix<Ordinal, Scalar> VT (ncols, ncols, Scalar(0));
00514 
00515     // Set up workspace for the SVD.
00516     std::vector<magnitude_type> svd_rwork (5*ncols);
00517     std::vector<magnitude_type> singular_values (ncols);
00518     Ordinal svd_lwork = -1; // -1 for LWORK query; will be changed
00519     int svd_info = 0;
00520 
00521     // LAPACK workspace ("LWORK") query for SVD.  The workspace
00522     // ("WORK") array is always of Scalar type, even in the complex
00523     // case.
00524     {
00525       // Exception messages in this scope all start with this.
00526       const char prefix[] = "In NodeTsqr::reveal_R_rank: LAPACK SVD (_GESVD) "
00527   "workspace query returned ";
00528       // std::logic_error messages in this scope all end with this.
00529       const char postfix[] = ".  Please report this bug to the Kokkos "
00530   "developers.";
00531 
00532       Scalar svd_lwork_scalar = Teuchos::ScalarTraits<Scalar>::zero();
00533       lapack.GESVD ("A", "A", ncols, ncols, B.get(), B.lda(), 
00534         &singular_values[0], U_view.get(), U_view.lda(), 
00535         VT.get(), VT.lda(), &svd_lwork_scalar, svd_lwork, 
00536         &svd_rwork[0], &svd_info);
00537       // Failure of the LAPACK workspace query is a logic error (a
00538       // bug) because we have already validated the matrix
00539       // dimensions above.
00540       TEUCHOS_TEST_FOR_EXCEPTION(svd_info != 0, std::logic_error,
00541        prefix << "a nonzero INFO = " << svd_info 
00542        << postfix);
00543       // LAPACK returns the workspace array length as a Scalar.  We
00544       // have to convert it back to an Ordinal in order to allocate
00545       // the workspace array and pass it in to LAPACK as the LWORK
00546       // argument.  Ordinal definitely must be a signed type, since
00547       // LWORK = -1 indicates a workspace query.  If Scalar is
00548       // complex, LAPACK had better return something with a zero
00549       // imaginary part, since I can't allocate imaginary amounts of
00550       // memory!  (Take the real part to avoid rounding error, since
00551       // magnitude() may be implemented using a square root always...)
00552       svd_lwork = static_cast<Ordinal> (Teuchos::ScalarTraits<Scalar>::real (svd_lwork_scalar));
00553 
00554       // LAPACK should always return an LWORK that fits in Ordinal,
00555       // but it's a good idea to check anyway.  We do so by checking
00556       // whether casting back from Ordinal to Scalar gives the same
00557       // original Scalar result.  This should work unless Scalar and
00558       // Ordinal are user-defined types with weird definitions of
00559       // the type casts.
00560       TEUCHOS_TEST_FOR_EXCEPTION(static_cast<Scalar> (svd_lwork) != svd_lwork_scalar,
00561        std::logic_error,
00562        prefix << "a workspace array length (LWORK) of type "
00563        "Scalar=" << TypeNameTraits<Scalar>::name() 
00564        << " that does not fit in an Ordinal=" 
00565        << TypeNameTraits<Ordinal>::name() << " type.  "
00566        "As a Scalar, LWORK=" << svd_lwork_scalar 
00567        << ", but cast to Ordinal, LWORK=" << svd_lwork 
00568        << postfix);
00569       // Make sure svd_lwork is nonnegative.  (Ordinal must be a
00570       // signed type, as we explain above, so this test should never
00571       // signal any unsigned-to-signed conversions from the compiler.
00572       // If it does, you're probably using the wrong Ordinal type.
00573       TEUCHOS_TEST_FOR_EXCEPTION(svd_lwork < 0, std::logic_error,
00574        prefix << "a negative workspace array length (LWORK)"
00575        " = " << svd_lwork << postfix);
00576     }
00577     // Allocate workspace for LAPACK's SVD routine.
00578     std::vector<Scalar> svd_work (svd_lwork);
00579 
00580     // Compute SVD $B := U \Sigma V^*$.  B is overwritten, which is
00581     // why we copied R into B (so that we don't overwrite R if R is
00582     // full rank).
00583     lapack.GESVD ("A", "A", ncols, ncols, B.get(), B.lda(), 
00584       &singular_values[0], U_view.get(), U_view.lda(), 
00585       VT.get(), VT.lda(), &svd_work[0], svd_lwork, 
00586       &svd_rwork[0], &svd_info);
00587     //
00588     // Compute the numerical rank of B, using the given relative
00589     // tolerance and the computed singular values.  GESVD computes
00590     // singular values in decreasing order and they are all
00591     // nonnegative.  We know by now that ncols > 0.
00592     //
00593     // The tolerance "tol" is relative to the largest singular
00594     // value, which is the 2-norm of the matrix.
00595     const magnitude_type absolute_tolerance = tol * singular_values[0];
00596     Ordinal rank = ncols; // "innocent unless proven guilty"
00597     for (Ordinal k = 0; k < ncols; ++k)
00598       // First branch of the IF ensures correctness even if all the
00599       // singular values are zero and the absolute tolerance is
00600       // zero.  Recall that LAPACK sorts the singular values in
00601       // decreasing order.
00602       if (singular_values[k] == STM::zero() || 
00603     singular_values[k] < absolute_tolerance)
00604   {
00605     rank = k;
00606     break;
00607   }
00608     // Don't modify Q or R, if R is full rank.
00609     if (rank < ncols)
00610       { //
00611   // R is not full rank.  
00612   //
00613   // 1. Compute \f$R := \Sigma V^*\f$.
00614   // 2. Return rank (0 <= rank < ncols).
00615   //
00616   // Compute R := \Sigma VT.  \Sigma is diagonal so we apply it
00617   // column by column (normally one would think of this as row by
00618   // row, but this "Hadamard product" formulation iterates more
00619   // efficiently over VT).  
00620   //
00621   // After this computation, R may no longer be upper triangular.
00622   // R may be zero if all the singular values are zero, but we
00623   // don't need to check for this case; it's rare in practice, and
00624   // the computations below will be correct regardless.
00625   for (Ordinal j = 0; j < ncols; ++j)
00626     {
00627       const Scalar* const VT_j = &VT(0,j);
00628       Scalar* const R_j = &R_view(0,j);
00629 
00630       for (Ordinal i = 0; i < ncols; ++i)
00631         R_j[i] = singular_values[i] * VT_j[i];
00632     }
00633       }
00634     return rank;
00635   }
00636 
00637   template<class Ordinal, class Scalar, class FactorOutputType>
00638   Ordinal
00639   NodeTsqr<Ordinal, Scalar, FactorOutputType>::
00640   reveal_rank (const Ordinal nrows,
00641          const Ordinal ncols,
00642          Scalar Q[],
00643          const Ordinal ldq,
00644          Scalar R[],
00645          const Ordinal ldr,
00646          const typename Teuchos::ScalarTraits<Scalar>::magnitudeType tol,
00647          const bool contiguousCacheBlocks) const
00648   {
00649     // Take the easy exit if available.
00650     if (ncols == 0)
00651       return 0;
00652     // Matrix to hold the left singular vectors of the R factor.
00653     Matrix<Ordinal, Scalar> U (ncols, ncols, Scalar(0));
00654     // Compute numerical rank of the R factor using the SVD.
00655     // Store the left singular vectors in U.
00656     const Ordinal rank = 
00657       reveal_R_rank (ncols, R, ldr, U.get(), U.ldu(), tol);
00658 
00659     // If R is full rank, we're done.  Otherwise, reveal_R_rank()
00660     // already computed the SVD \f$R = U \Sigma V^*\f$ of (the
00661     // input) R, and overwrote R with \f$\Sigma V^*\f$.  Now, we
00662     // compute \f$Q := Q \cdot U\f$, respecting cache blocks of Q.
00663     if (rank < ncols)
00664       Q_times_B (nrows, ncols, Q, ldq, U.get(), U.lda(), 
00665      contiguousCacheBlocks);
00666     return rank;
00667   }
00668 
00669 } // namespace TSQR
00670 
00671 
00672 #endif // __TSQR_Tsqr_NodeTsqr_hpp
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends