Kokkos Node API and Local Linear Algebra Kernels Version of the Day
Tsqr_CombineDefault.hpp
Go to the documentation of this file.
00001 //@HEADER
00002 // ************************************************************************
00003 // 
00004 //          Kokkos: Node API and Parallel Node Kernels
00005 //              Copyright (2008) Sandia Corporation
00006 // 
00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00008 // the U.S. Government retains certain rights in this software.
00009 // 
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00038 // 
00039 // ************************************************************************
00040 //@HEADER
00041 
00045 #ifndef __TSQR_CombineDefault_hpp
00046 #define __TSQR_CombineDefault_hpp
00047 
00048 #include <Teuchos_ScalarTraits.hpp>
00049 
00050 #include <Tsqr_ApplyType.hpp>
00051 #include <Tsqr_Lapack.hpp>
00052 #include <Tsqr_Matrix.hpp>
00053 
00054 #include <algorithm>
00055 #include <sstream>
00056 #include <stdexcept>
00057 
00058 
00059 namespace TSQR {
00060 
00072   template<class Ordinal, class Scalar>
00073   class CombineDefault {
00074   private:
00075     typedef LAPACK< Ordinal, Scalar > lapack_type;
00076 
00077   public:
00078     typedef Scalar scalar_type;
00079     typedef typename Teuchos::ScalarTraits< Scalar >::magnitudeType magnitude_type;
00080     typedef Ordinal ordinal_type;
00081 
00082     CombineDefault () {}
00083 
00092     static bool QR_produces_R_factor_with_nonnegative_diagonal()
00093     { 
00094       return lapack_type::QR_produces_R_factor_with_nonnegative_diagonal();
00095     }
00096 
00097     void
00098     factor_first (const Ordinal nrows,
00099       const Ordinal ncols,
00100       Scalar A[],
00101       const Ordinal lda,
00102       Scalar tau[],
00103       Scalar work[])
00104     {
00105       // info must be an int, not a LocalOrdinal, since LAPACK
00106       // routines always (???) use int for the INFO output argument,
00107       // whether or not they were built with 64-bit integer index
00108       // support.
00109       int info = 0;
00110       lapack_.GEQR2 (nrows, ncols, A, lda, tau, work, &info);
00111       if (info != 0)
00112   {
00113     std::ostringstream os;
00114     os << "TSQR::CombineDefault::factor_first(): LAPACK\'s "
00115        << "GEQR2 failed with INFO = " << info;
00116     throw std::logic_error (os.str());
00117   }
00118     }
00119 
00120     void
00121     apply_first (const ApplyType& applyType,
00122      const Ordinal nrows,
00123      const Ordinal ncols_C,
00124      const Ordinal ncols_A,
00125      const Scalar A[],
00126      const Ordinal lda,
00127      const Scalar tau[],
00128      Scalar C[],
00129      const Ordinal ldc,
00130      Scalar work[])
00131     {
00132       int info = 0;
00133       // LAPACK has the nice feature that it only reads the first
00134       // letter of input strings that specify things like which side
00135       // to which to apply the operator, or whether to apply the
00136       // transpose.  That means we can make the strings more verbose,
00137       // as in "Left" here for the SIDE parameter.
00138       lapack_.ORM2R ("Left", applyType.toString().c_str(), 
00139          nrows, ncols_C, ncols_A,
00140          A, lda, tau, 
00141          C, ldc, work, &info);
00142       if (info != 0)
00143   {
00144     std::ostringstream os;
00145     os << "TSQR::CombineDefault::apply_first(): LAPACK\'s "
00146        << "ORM2R failed with INFO = " << info;
00147     throw std::logic_error (os.str());
00148   }
00149     }
00150 
00151     void
00152     apply_inner (const ApplyType& apply_type,
00153      const Ordinal m,
00154      const Ordinal ncols_C,
00155      const Ordinal ncols_Q,
00156      const Scalar A[],
00157      const Ordinal lda,
00158      const Scalar tau[],
00159      Scalar C_top[],
00160      const Ordinal ldc_top,
00161      Scalar C_bot[],
00162      const Ordinal ldc_bot,
00163      Scalar work[])
00164     {
00165       typedef ConstMatView< Ordinal, Scalar > const_matview_type;
00166       typedef MatView< Ordinal, Scalar > matview_type;
00167       const Ordinal numRows = m + ncols_Q;
00168 
00169       A_buf_.reshape (numRows, ncols_Q);
00170       A_buf_.fill (Scalar(0));
00171       const_matview_type A_bot (m, ncols_Q, A, lda);
00172       matview_type A_buf_bot (m, ncols_Q, &A_buf_(ncols_Q, 0), A_buf_.lda());
00173       A_buf_bot.copy (A_bot);
00174 
00175       C_buf_.reshape (numRows, ncols_C);
00176       C_buf_.fill (Scalar(0));
00177       matview_type C_buf_top (ncols_Q, ncols_C, &C_buf_(0, 0), C_buf_.lda());
00178       matview_type C_buf_bot (m, ncols_C, &C_buf_(ncols_Q, 0), C_buf_.lda());
00179       matview_type C_top_view (ncols_Q, ncols_C, C_top, ldc_top);
00180       matview_type C_bot_view (m, ncols_C, C_bot, ldc_bot);
00181       C_buf_top.copy (C_top_view);
00182       C_buf_bot.copy (C_bot_view);
00183 
00184       int info = 0;
00185       lapack_.ORM2R ("Left", apply_type.toString().c_str(), 
00186          numRows, ncols_C, ncols_Q,
00187          A_buf_.get(), A_buf_.lda(), tau,
00188          C_buf_.get(), C_buf_.lda(),
00189          work, &info);
00190       if (info != 0)
00191   {
00192     std::ostringstream os;
00193     os << "TSQR::CombineDefault::apply_inner(): LAPACK\'s "
00194        << "ORM2R failed with INFO = " << info;
00195     throw std::logic_error (os.str());
00196   }
00197       // Copy back the results.
00198       C_top_view.copy (C_buf_top);
00199       C_bot_view.copy (C_buf_bot);
00200     }
00201 
00202     void
00203     factor_inner (const Ordinal m,
00204       const Ordinal n,
00205       Scalar R[],
00206       const Ordinal ldr,
00207       Scalar A[],
00208       const Ordinal lda,
00209       Scalar tau[],
00210       Scalar work[])
00211     {
00212       const Ordinal numRows = m + n;
00213 
00214       A_buf_.reshape (numRows, n);
00215       A_buf_.fill (Scalar(0));
00216       // R might be a view of the upper triangle of a cache block, but
00217       // we only want to include the upper triangle in the
00218       // factorization.  Thus, only copy the upper triangle of R into
00219       // the appropriate place in the buffer.
00220       copy_upper_triangle (n, n, &A_buf_(0, 0), A_buf_.lda(), R, ldr);
00221       copy_matrix (m, n, &A_buf_(n, 0), A_buf_.lda(), A, lda);
00222 
00223       int info = 0;
00224       lapack_.GEQR2 (numRows, n, A_buf_.get(), A_buf_.lda(), tau, work, &info);
00225       if (info != 0)
00226   throw std::logic_error ("TSQR::CombineDefault: GEQR2 failed");
00227 
00228       // Copy back the results.  R might be a view of the upper
00229       // triangle of a cache block, so only copy into the upper
00230       // triangle of R.
00231       copy_upper_triangle (n, n, R, ldr, &A_buf_(0, 0), A_buf_.lda());
00232       copy_matrix (m, n, A, lda, &A_buf_(n, 0), A_buf_.lda());
00233     }
00234 
00235     void
00236     factor_pair (const Ordinal n,
00237      Scalar R_top[],
00238      const Ordinal ldr_top,
00239      Scalar R_bot[],
00240      const Ordinal ldr_bot,
00241      Scalar tau[],
00242      Scalar work[])
00243     {
00244       const Ordinal numRows = Ordinal(2) * n;
00245 
00246       A_buf_.reshape (numRows, n);
00247       A_buf_.fill (Scalar(0));
00248       // Copy the inputs into the compute buffer.  Only touch the
00249       // upper triangles of R_top and R_bot, since they each may be
00250       // views of some cache block (where the strict lower triangle
00251       // contains things we don't want to include in the
00252       // factorization).
00253       copy_upper_triangle (n, n, &A_buf_(0, 0), A_buf_.lda(), R_top, ldr_top);
00254       copy_upper_triangle (n, n, &A_buf_(n, 0), A_buf_.lda(), R_bot, ldr_bot);
00255 
00256       int info = 0;
00257       lapack_.GEQR2 (numRows, n, A_buf_.get(), A_buf_.lda(), tau, work, &info);
00258       if (info != 0)
00259   {
00260     std::ostringstream os;
00261     os << "TSQR::CombineDefault::factor_pair(): "
00262        << "GEQR2 failed with INFO = " << info;
00263     throw std::logic_error (os.str());
00264   }
00265 
00266       // Copy back the results.  Only read the upper triangles of the
00267       // two n by n row blocks of A_buf_ (this means we don't have to
00268       // zero out the strict lower triangles), and only touch the
00269       // upper triangles of R_top and R_bot.
00270       copy_upper_triangle (n, n, R_top, ldr_top, &A_buf_(0, 0), A_buf_.lda());
00271       copy_upper_triangle (n, n, R_bot, ldr_bot, &A_buf_(n, 0), A_buf_.lda());
00272     }
00273     
00274     void
00275     apply_pair (const ApplyType& apply_type,
00276     const Ordinal ncols_C, 
00277     const Ordinal ncols_Q, 
00278     const Scalar R_bot[], 
00279     const Ordinal ldr_bot,
00280     const Scalar tau[], 
00281     Scalar C_top[], 
00282     const Ordinal ldc_top, 
00283     Scalar C_bot[], 
00284     const Ordinal ldc_bot, 
00285     Scalar work[]) 
00286     {
00287       const Ordinal numRows = Ordinal(2) * ncols_Q;
00288 
00289       A_buf_.reshape (numRows, ncols_Q);
00290       A_buf_.fill (Scalar(0));
00291       copy_upper_triangle (ncols_Q, ncols_Q, 
00292          &A_buf_(ncols_Q, 0), A_buf_.lda(), 
00293          R_bot, ldr_bot);
00294       C_buf_.reshape (numRows, ncols_C);
00295       copy_matrix (ncols_Q, ncols_C, &C_buf_(0, 0), C_buf_.lda(), C_top, ldc_top);
00296       copy_matrix (ncols_Q, ncols_C, &C_buf_(ncols_Q, 0), C_buf_.lda(), C_bot, ldc_bot);
00297 
00298       int info = 0;
00299       lapack_.ORM2R ("Left", apply_type.toString().c_str(), 
00300          numRows, ncols_C, ncols_Q,
00301          A_buf_.get(), A_buf_.lda(), tau,
00302          C_buf_.get(), C_buf_.lda(),
00303          work, &info);
00304       if (info != 0)
00305   throw std::logic_error ("TSQR::CombineDefault: ORM2R failed");
00306 
00307       // Copy back the results.
00308       copy_matrix (ncols_Q, ncols_C, C_top, ldc_top, &C_buf_(0, 0), C_buf_.lda());
00309       copy_matrix (ncols_Q, ncols_C, C_bot, ldc_bot, &C_buf_(ncols_Q, 0), C_buf_.lda());
00310     }
00311 
00312   private:
00313     LAPACK< Ordinal, Scalar > lapack_;
00314     Matrix< Ordinal, Scalar > A_buf_;
00315     Matrix< Ordinal, Scalar > C_buf_;
00316   };
00317 
00318 
00319 } // namespace TSQR
00320 
00321 #endif // __TSQR_CombineDefault_hpp
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends