Kokkos Node API and Local Linear Algebra Kernels Version of the Day
Tsqr_CombineDefault.hpp
Go to the documentation of this file.
00001 //@HEADER
00002 // ************************************************************************
00003 // 
00004 //          Kokkos: Node API and Parallel Node Kernels
00005 //              Copyright (2009) Sandia Corporation
00006 // 
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 // 
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //  
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //  
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00025 // 
00026 // ************************************************************************
00027 //@HEADER
00028 
00032 #ifndef __TSQR_CombineDefault_hpp
00033 #define __TSQR_CombineDefault_hpp
00034 
00035 #include <Teuchos_ScalarTraits.hpp>
00036 
00037 #include <Tsqr_ApplyType.hpp>
00038 #include <Tsqr_Lapack.hpp>
00039 #include <Tsqr_Matrix.hpp>
00040 
00041 #include <algorithm>
00042 #include <sstream>
00043 #include <stdexcept>
00044 
00045 
00046 namespace TSQR {
00047 
00059   template<class Ordinal, class Scalar>
00060   class CombineDefault {
00061   private:
00062     typedef LAPACK< Ordinal, Scalar > lapack_type;
00063 
00064   public:
00065     typedef Scalar scalar_type;
00066     typedef typename Teuchos::ScalarTraits< Scalar >::magnitudeType magnitude_type;
00067     typedef Ordinal ordinal_type;
00068 
00069     CombineDefault () {}
00070 
00079     static bool QR_produces_R_factor_with_nonnegative_diagonal()
00080     { 
00081       return lapack_type::QR_produces_R_factor_with_nonnegative_diagonal();
00082     }
00083 
00084     void
00085     factor_first (const Ordinal nrows,
00086       const Ordinal ncols,
00087       Scalar A[],
00088       const Ordinal lda,
00089       Scalar tau[],
00090       Scalar work[])
00091     {
00092       // info must be an int, not a LocalOrdinal, since LAPACK
00093       // routines always (???) use int for the INFO output argument,
00094       // whether or not they were built with 64-bit integer index
00095       // support.
00096       int info = 0;
00097       lapack_.GEQR2 (nrows, ncols, A, lda, tau, work, &info);
00098       if (info != 0)
00099   {
00100     std::ostringstream os;
00101     os << "TSQR::CombineDefault::factor_first(): LAPACK\'s "
00102        << "GEQR2 failed with INFO = " << info;
00103     throw std::logic_error (os.str());
00104   }
00105     }
00106 
00107     void
00108     apply_first (const ApplyType& applyType,
00109      const Ordinal nrows,
00110      const Ordinal ncols_C,
00111      const Ordinal ncols_A,
00112      const Scalar A[],
00113      const Ordinal lda,
00114      const Scalar tau[],
00115      Scalar C[],
00116      const Ordinal ldc,
00117      Scalar work[])
00118     {
00119       int info = 0;
00120       // LAPACK has the nice feature that it only reads the first
00121       // letter of input strings that specify things like which side
00122       // to which to apply the operator, or whether to apply the
00123       // transpose.  That means we can make the strings more verbose,
00124       // as in "Left" here for the SIDE parameter.
00125       lapack_.ORM2R ("Left", applyType.toString().c_str(), 
00126          nrows, ncols_C, ncols_A,
00127          A, lda, tau, 
00128          C, ldc, work, &info);
00129       if (info != 0)
00130   {
00131     std::ostringstream os;
00132     os << "TSQR::CombineDefault::apply_first(): LAPACK\'s "
00133        << "ORM2R failed with INFO = " << info;
00134     throw std::logic_error (os.str());
00135   }
00136     }
00137 
00138     void
00139     apply_inner (const ApplyType& apply_type,
00140      const Ordinal m,
00141      const Ordinal ncols_C,
00142      const Ordinal ncols_Q,
00143      const Scalar A[],
00144      const Ordinal lda,
00145      const Scalar tau[],
00146      Scalar C_top[],
00147      const Ordinal ldc_top,
00148      Scalar C_bot[],
00149      const Ordinal ldc_bot,
00150      Scalar work[])
00151     {
00152       typedef ConstMatView< Ordinal, Scalar > const_matview_type;
00153       typedef MatView< Ordinal, Scalar > matview_type;
00154       const Ordinal numRows = m + ncols_Q;
00155 
00156       A_buf_.reshape (numRows, ncols_Q);
00157       A_buf_.fill (Scalar(0));
00158       const_matview_type A_bot (m, ncols_Q, A, lda);
00159       matview_type A_buf_bot (m, ncols_Q, &A_buf_(ncols_Q, 0), A_buf_.lda());
00160       A_buf_bot.copy (A_bot);
00161 
00162       C_buf_.reshape (numRows, ncols_C);
00163       C_buf_.fill (Scalar(0));
00164       matview_type C_buf_top (ncols_Q, ncols_C, &C_buf_(0, 0), C_buf_.lda());
00165       matview_type C_buf_bot (m, ncols_C, &C_buf_(ncols_Q, 0), C_buf_.lda());
00166       matview_type C_top_view (ncols_Q, ncols_C, C_top, ldc_top);
00167       matview_type C_bot_view (m, ncols_C, C_bot, ldc_bot);
00168       C_buf_top.copy (C_top_view);
00169       C_buf_bot.copy (C_bot_view);
00170 
00171       int info = 0;
00172       lapack_.ORM2R ("Left", apply_type.toString().c_str(), 
00173          numRows, ncols_C, ncols_Q,
00174          A_buf_.get(), A_buf_.lda(), tau,
00175          C_buf_.get(), C_buf_.lda(),
00176          work, &info);
00177       if (info != 0)
00178   {
00179     std::ostringstream os;
00180     os << "TSQR::CombineDefault::apply_inner(): LAPACK\'s "
00181        << "ORM2R failed with INFO = " << info;
00182     throw std::logic_error (os.str());
00183   }
00184       // Copy back the results.
00185       C_top_view.copy (C_buf_top);
00186       C_bot_view.copy (C_buf_bot);
00187     }
00188 
00189     void
00190     factor_inner (const Ordinal m,
00191       const Ordinal n,
00192       Scalar R[],
00193       const Ordinal ldr,
00194       Scalar A[],
00195       const Ordinal lda,
00196       Scalar tau[],
00197       Scalar work[])
00198     {
00199       const Ordinal numRows = m + n;
00200 
00201       A_buf_.reshape (numRows, n);
00202       A_buf_.fill (Scalar(0));
00203       // R might be a view of the upper triangle of a cache block, but
00204       // we only want to include the upper triangle in the
00205       // factorization.  Thus, only copy the upper triangle of R into
00206       // the appropriate place in the buffer.
00207       copy_upper_triangle (n, n, &A_buf_(0, 0), A_buf_.lda(), R, ldr);
00208       copy_matrix (m, n, &A_buf_(n, 0), A_buf_.lda(), A, lda);
00209 
00210       int info = 0;
00211       lapack_.GEQR2 (numRows, n, A_buf_.get(), A_buf_.lda(), tau, work, &info);
00212       if (info != 0)
00213   throw std::logic_error ("TSQR::CombineDefault: GEQR2 failed");
00214 
00215       // Copy back the results.  R might be a view of the upper
00216       // triangle of a cache block, so only copy into the upper
00217       // triangle of R.
00218       copy_upper_triangle (n, n, R, ldr, &A_buf_(0, 0), A_buf_.lda());
00219       copy_matrix (m, n, A, lda, &A_buf_(n, 0), A_buf_.lda());
00220     }
00221 
00222     void
00223     factor_pair (const Ordinal n,
00224      Scalar R_top[],
00225      const Ordinal ldr_top,
00226      Scalar R_bot[],
00227      const Ordinal ldr_bot,
00228      Scalar tau[],
00229      Scalar work[])
00230     {
00231       const Ordinal numRows = Ordinal(2) * n;
00232 
00233       A_buf_.reshape (numRows, n);
00234       A_buf_.fill (Scalar(0));
00235       // Copy the inputs into the compute buffer.  Only touch the
00236       // upper triangles of R_top and R_bot, since they each may be
00237       // views of some cache block (where the strict lower triangle
00238       // contains things we don't want to include in the
00239       // factorization).
00240       copy_upper_triangle (n, n, &A_buf_(0, 0), A_buf_.lda(), R_top, ldr_top);
00241       copy_upper_triangle (n, n, &A_buf_(n, 0), A_buf_.lda(), R_bot, ldr_bot);
00242 
00243       int info = 0;
00244       lapack_.GEQR2 (numRows, n, A_buf_.get(), A_buf_.lda(), tau, work, &info);
00245       if (info != 0)
00246   {
00247     std::ostringstream os;
00248     os << "TSQR::CombineDefault::factor_pair(): "
00249        << "GEQR2 failed with INFO = " << info;
00250     throw std::logic_error (os.str());
00251   }
00252 
00253       // Copy back the results.  Only read the upper triangles of the
00254       // two n by n row blocks of A_buf_ (this means we don't have to
00255       // zero out the strict lower triangles), and only touch the
00256       // upper triangles of R_top and R_bot.
00257       copy_upper_triangle (n, n, R_top, ldr_top, &A_buf_(0, 0), A_buf_.lda());
00258       copy_upper_triangle (n, n, R_bot, ldr_bot, &A_buf_(n, 0), A_buf_.lda());
00259     }
00260     
00261     void
00262     apply_pair (const ApplyType& apply_type,
00263     const Ordinal ncols_C, 
00264     const Ordinal ncols_Q, 
00265     const Scalar R_bot[], 
00266     const Ordinal ldr_bot,
00267     const Scalar tau[], 
00268     Scalar C_top[], 
00269     const Ordinal ldc_top, 
00270     Scalar C_bot[], 
00271     const Ordinal ldc_bot, 
00272     Scalar work[]) 
00273     {
00274       const Ordinal numRows = Ordinal(2) * ncols_Q;
00275 
00276       A_buf_.reshape (numRows, ncols_Q);
00277       A_buf_.fill (Scalar(0));
00278       copy_upper_triangle (ncols_Q, ncols_Q, 
00279          &A_buf_(ncols_Q, 0), A_buf_.lda(), 
00280          R_bot, ldr_bot);
00281       C_buf_.reshape (numRows, ncols_C);
00282       copy_matrix (ncols_Q, ncols_C, &C_buf_(0, 0), C_buf_.lda(), C_top, ldc_top);
00283       copy_matrix (ncols_Q, ncols_C, &C_buf_(ncols_Q, 0), C_buf_.lda(), C_bot, ldc_bot);
00284 
00285       int info = 0;
00286       lapack_.ORM2R ("Left", apply_type.toString().c_str(), 
00287          numRows, ncols_C, ncols_Q,
00288          A_buf_.get(), A_buf_.lda(), tau,
00289          C_buf_.get(), C_buf_.lda(),
00290          work, &info);
00291       if (info != 0)
00292   throw std::logic_error ("TSQR::CombineDefault: ORM2R failed");
00293 
00294       // Copy back the results.
00295       copy_matrix (ncols_Q, ncols_C, C_top, ldc_top, &C_buf_(0, 0), C_buf_.lda());
00296       copy_matrix (ncols_Q, ncols_C, C_bot, ldc_bot, &C_buf_(ncols_Q, 0), C_buf_.lda());
00297     }
00298 
00299   private:
00300     LAPACK< Ordinal, Scalar > lapack_;
00301     Matrix< Ordinal, Scalar > A_buf_;
00302     Matrix< Ordinal, Scalar > C_buf_;
00303   };
00304 
00305 
00306 } // namespace TSQR
00307 
00308 #endif // __TSQR_CombineDefault_hpp
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends