Anasazi Version of the Day
Tsqr_CombineDefault.hpp
00001 // @HEADER
00002 // ***********************************************************************
00003 //
00004 //                 Anasazi: Block Eigensolvers Package
00005 //                 Copyright (2010) Sandia Corporation
00006 //
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 //
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00025 //
00026 // ***********************************************************************
00027 // @HEADER
00028 
00029 #ifndef __TSQR_CombineDefault_hpp
00030 #define __TSQR_CombineDefault_hpp
00031 
00032 #include <Tsqr_ApplyType.hpp>
00033 #include <Tsqr_Lapack.hpp>
00034 #include <Tsqr_Matrix.hpp>
00035 #include <Tsqr_ScalarTraits.hpp>
00036 
00037 #include <algorithm>
00038 #include <sstream>
00039 #include <stdexcept>
00040 
00043 
00044 namespace TSQR {
00045 
00057   template< class Ordinal, class Scalar >
00058   class CombineDefault {
00059   private:
00060     typedef LAPACK< Ordinal, Scalar > lapack_type;
00061 
00062   public:
00063     typedef Scalar scalar_type;
00064     typedef typename ScalarTraits< Scalar >::magnitude_type magnitude_type;
00065     typedef Ordinal ordinal_type;
00066 
00067     CombineDefault () {}
00068 
00072     static bool QR_produces_R_factor_with_nonnegative_diagonal()
00073     { 
00074       return lapack_type::QR_produces_R_factor_with_nonnegative_diagonal();
00075     }
00076 
00077     void
00078     factor_first (const Ordinal nrows,
00079       const Ordinal ncols,
00080       Scalar A[],
00081       const Ordinal lda,
00082       Scalar tau[],
00083       Scalar work[])
00084     {
00085       // info must be an int, not a LocalOrdinal, since LAPACK
00086       // routines always (???) use int for the INFO output argument.
00087       int info = 0;
00088       lapack_.GEQR2 (nrows, ncols, A, lda, tau, work, &info);
00089       if (info != 0)
00090   {
00091     std::ostringstream os;
00092     os << "TSQR::CombineDefault::factor_first(): LAPACK\'s "
00093        << "GEQR2 failed with INFO = " << info;
00094     throw std::logic_error (os.str());
00095   }
00096     }
00097 
00098     void
00099     apply_first (const ApplyType& applyType,
00100      const Ordinal nrows,
00101      const Ordinal ncols_C,
00102      const Ordinal ncols_A,
00103      const Scalar A[],
00104      const Ordinal lda,
00105      const Scalar tau[],
00106      Scalar C[],
00107      const Ordinal ldc,
00108      Scalar work[])
00109     {
00110       int info = 0;
00111       // LAPACK has the nice feature that it only reads the first
00112       // letter of input strings that specify things like which side
00113       // to which to apply the operator, or whether to apply the
00114       // transpose.  That means we can make the strings more verbose,
00115       // as in "Left" here for the SIDE parameter.
00116       lapack_.ORM2R ("Left", applyType.toString().c_str(), 
00117          nrows, ncols_C, ncols_A,
00118          A, lda, tau, 
00119          C, ldc, work, &info);
00120       if (info != 0)
00121   {
00122     std::ostringstream os;
00123     os << "TSQR::CombineDefault::apply_first(): LAPACK\'s "
00124        << "ORM2R failed with INFO = " << info;
00125     throw std::logic_error (os.str());
00126   }
00127     }
00128 
00129     void
00130     apply_inner (const ApplyType& apply_type,
00131      const Ordinal m,
00132      const Ordinal ncols_C,
00133      const Ordinal ncols_Q,
00134      const Scalar A[],
00135      const Ordinal lda,
00136      const Scalar tau[],
00137      Scalar C_top[],
00138      const Ordinal ldc_top,
00139      Scalar C_bot[],
00140      const Ordinal ldc_bot,
00141      Scalar work[])
00142     {
00143       typedef ConstMatView< Ordinal, Scalar > const_matview_type;
00144       typedef MatView< Ordinal, Scalar > matview_type;
00145       const Ordinal numRows = m + ncols_Q;
00146 
00147       A_buf_.reshape (numRows, ncols_Q);
00148       A_buf_.fill (Scalar(0));
00149       const_matview_type A_bot (m, ncols_Q, A, lda);
00150       matview_type A_buf_bot (m, ncols_Q, &A_buf_(ncols_Q, 0), A_buf_.lda());
00151       A_buf_bot.copy (A_bot);
00152 
00153       C_buf_.reshape (numRows, ncols_C);
00154       C_buf_.fill (Scalar(0));
00155       matview_type C_buf_top (ncols_Q, ncols_C, &C_buf_(0, 0), C_buf_.lda());
00156       matview_type C_buf_bot (m, ncols_C, &C_buf_(ncols_Q, 0), C_buf_.lda());
00157       matview_type C_top_view (ncols_Q, ncols_C, C_top, ldc_top);
00158       matview_type C_bot_view (m, ncols_C, C_bot, ldc_bot);
00159       C_buf_top.copy (C_top_view);
00160       C_buf_bot.copy (C_bot_view);
00161 
00162       int info = 0;
00163       lapack_.ORM2R ("Left", apply_type.toString().c_str(), 
00164          numRows, ncols_C, ncols_Q,
00165          A_buf_.get(), A_buf_.lda(), tau,
00166          C_buf_.get(), C_buf_.lda(),
00167          work, &info);
00168       if (info != 0)
00169   {
00170     std::ostringstream os;
00171     os << "TSQR::CombineDefault::apply_inner(): LAPACK\'s "
00172        << "ORM2R failed with INFO = " << info;
00173     throw std::logic_error (os.str());
00174   }
00175       // Copy back the results.
00176       C_top_view.copy (C_buf_top);
00177       C_bot_view.copy (C_buf_bot);
00178     }
00179 
00180     void
00181     factor_inner (const Ordinal m,
00182       const Ordinal n,
00183       Scalar R[],
00184       const Ordinal ldr,
00185       Scalar A[],
00186       const Ordinal lda,
00187       Scalar tau[],
00188       Scalar work[])
00189     {
00190       const Ordinal numRows = m + n;
00191 
00192       A_buf_.reshape (numRows, n);
00193       A_buf_.fill (Scalar(0));
00194       // R might be a view of the upper triangle of a cache block, but
00195       // we only want to include the upper triangle in the
00196       // factorization.  Thus, only copy the upper triangle of R into
00197       // the appropriate place in the buffer.
00198       copy_upper_triangle (n, n, &A_buf_(0, 0), A_buf_.lda(), R, ldr);
00199       copy_matrix (m, n, &A_buf_(n, 0), A_buf_.lda(), A, lda);
00200 
00201       int info = 0;
00202       lapack_.GEQR2 (numRows, n, A_buf_.get(), A_buf_.lda(), tau, work, &info);
00203       if (info != 0)
00204   throw std::logic_error ("TSQR::CombineDefault: GEQR2 failed");
00205 
00206       // Copy back the results.  R might be a view of the upper
00207       // triangle of a cache block, so only copy into the upper
00208       // triangle of R.
00209       copy_upper_triangle (n, n, R, ldr, &A_buf_(0, 0), A_buf_.lda());
00210       copy_matrix (m, n, A, lda, &A_buf_(n, 0), A_buf_.lda());
00211     }
00212 
00213     void
00214     factor_pair (const Ordinal n,
00215      Scalar R_top[],
00216      const Ordinal ldr_top,
00217      Scalar R_bot[],
00218      const Ordinal ldr_bot,
00219      Scalar tau[],
00220      Scalar work[])
00221     {
00222       const Ordinal numRows = Ordinal(2) * n;
00223 
00224       A_buf_.reshape (numRows, n);
00225       A_buf_.fill (Scalar(0));
00226       // Copy the inputs into the compute buffer.  Only touch the
00227       // upper triangles of R_top and R_bot, since they each may be
00228       // views of some cache block (where the strict lower triangle
00229       // contains things we don't want to include in the
00230       // factorization).
00231       copy_upper_triangle (n, n, &A_buf_(0, 0), A_buf_.lda(), R_top, ldr_top);
00232       copy_upper_triangle (n, n, &A_buf_(n, 0), A_buf_.lda(), R_bot, ldr_bot);
00233 
00234       int info = 0;
00235       lapack_.GEQR2 (numRows, n, A_buf_.get(), A_buf_.lda(), tau, work, &info);
00236       if (info != 0)
00237   {
00238     std::ostringstream os;
00239     os << "TSQR::CombineDefault::factor_pair(): "
00240        << "GEQR2 failed with INFO = " << info;
00241     throw std::logic_error (os.str());
00242   }
00243 
00244       // Copy back the results.  Only read the upper triangles of the
00245       // two n by n row blocks of A_buf_ (this means we don't have to
00246       // zero out the strict lower triangles), and only touch the
00247       // upper triangles of R_top and R_bot.
00248       copy_upper_triangle (n, n, R_top, ldr_top, &A_buf_(0, 0), A_buf_.lda());
00249       copy_upper_triangle (n, n, R_bot, ldr_bot, &A_buf_(n, 0), A_buf_.lda());
00250     }
00251     
00252     void
00253     apply_pair (const ApplyType& apply_type,
00254     const Ordinal ncols_C, 
00255     const Ordinal ncols_Q, 
00256     const Scalar R_bot[], 
00257     const Ordinal ldr_bot,
00258     const Scalar tau[], 
00259     Scalar C_top[], 
00260     const Ordinal ldc_top, 
00261     Scalar C_bot[], 
00262     const Ordinal ldc_bot, 
00263     Scalar work[]) 
00264     {
00265       const Ordinal numRows = Ordinal(2) * ncols_Q;
00266 
00267       A_buf_.reshape (numRows, ncols_Q);
00268       A_buf_.fill (Scalar(0));
00269       copy_upper_triangle (ncols_Q, ncols_Q, 
00270          &A_buf_(ncols_Q, 0), A_buf_.lda(), 
00271          R_bot, ldr_bot);
00272       C_buf_.reshape (numRows, ncols_C);
00273       copy_matrix (ncols_Q, ncols_C, &C_buf_(0, 0), C_buf_.lda(), C_top, ldc_top);
00274       copy_matrix (ncols_Q, ncols_C, &C_buf_(ncols_Q, 0), C_buf_.lda(), C_bot, ldc_bot);
00275 
00276       int info = 0;
00277       lapack_.ORM2R ("Left", apply_type.toString().c_str(), 
00278          numRows, ncols_C, ncols_Q,
00279          A_buf_.get(), A_buf_.lda(), tau,
00280          C_buf_.get(), C_buf_.lda(),
00281          work, &info);
00282       if (info != 0)
00283   throw std::logic_error ("TSQR::CombineDefault: ORM2R failed");
00284 
00285       // Copy back the results.
00286       copy_matrix (ncols_Q, ncols_C, C_top, ldc_top, &C_buf_(0, 0), C_buf_.lda());
00287       copy_matrix (ncols_Q, ncols_C, C_bot, ldc_bot, &C_buf_(ncols_Q, 0), C_buf_.lda());
00288     }
00289 
00290   private:
00291     LAPACK< Ordinal, Scalar > lapack_;
00292     Matrix< Ordinal, Scalar > A_buf_;
00293     Matrix< Ordinal, Scalar > C_buf_;
00294   };
00295 
00296 
00297 } // namespace TSQR
00298 
00299 #endif // __TSQR_CombineDefault_hpp
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends