Kokkos Node API and Local Linear Algebra Kernels Version of the Day
TbbTsqr_TbbRecursiveTsqr_Def.hpp
00001 //@HEADER
00002 // ************************************************************************
00003 // 
00004 //          Kokkos: Node API and Parallel Node Kernels
00005 //              Copyright (2009) Sandia Corporation
00006 // 
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 // 
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //  
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //  
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00025 // 
00026 // ************************************************************************
00027 //@HEADER
00028 
00029 #ifndef __TSQR_TBB_TbbRecursiveTsqr_Def_hpp
00030 #define __TSQR_TBB_TbbRecursiveTsqr_Def_hpp
00031 
00032 #include <TbbTsqr_TbbRecursiveTsqr.hpp>
00033 #include <Tsqr_ScalarTraits.hpp>
00034 #include <Tsqr_Util.hpp>
00035 
00036 // #define TBB_DEBUG 1
00037 #ifdef TBB_DEBUG
00038 #  include <iostream>
00039 using std::cerr;
00040 using std::endl;
00041 #endif // TBB_DEBUG
00042 
00045 
00046 namespace TSQR {
00047   namespace TBB {
00048 
00049     template< class LocalOrdinal, class Scalar >
00050     void
00051     TbbRecursiveTsqr< LocalOrdinal, Scalar >::
00052     explicit_Q_helper (const size_t P_first, 
00053            const size_t P_last,
00054            MatView< LocalOrdinal, Scalar >& Q_out,
00055            const bool contiguous_cache_blocks) const
00056     {
00057       if (P_first > P_last || Q_out.empty())
00058   return;
00059       else if (P_first == P_last)
00060   {
00061     CacheBlocker< LocalOrdinal, Scalar > 
00062       blocker (Q_out.nrows(), Q_out.ncols(),
00063          seq_.cache_blocking_strategy());
00064 #ifdef TBB_DEBUG
00065     cerr << "explicit_Q_helper: On P_first = " << P_first 
00066          << ", filling Q_out with zeros:" << endl
00067          << "Q_out is " << Q_out.nrows() << " x " << Q_out.ncols() 
00068          << " with leading dimension " << Q_out.lda() << endl;
00069 #endif // TBB_DEBUG
00070     // Fill my partition with zeros.
00071     blocker.fill_with_zeros (Q_out, contiguous_cache_blocks);
00072 
00073     // If our partition is the first (topmost), fill it with
00074     // the first Q_out.ncols() columns of the identity matrix.
00075     if (P_first == 0)
00076       {
00077         // Fetch the topmost cache block of my partition.  Its
00078         // leading dimension should be set correctly by
00079         // top_block().
00080         mat_view Q_out_top = 
00081     blocker.top_block (Q_out, contiguous_cache_blocks);
00082 
00083         for (LocalOrdinal j = 0; j < Q_out_top.ncols(); ++j)
00084     Q_out_top(j,j) = Scalar(1);
00085       }
00086   }
00087       else
00088   {
00089     // Recurse on two intervals: [P_first, P_mid] and [P_mid+1, P_last]
00090     const size_t P_mid = (P_first + P_last) / 2;
00091     split_t Q_out_split =
00092       partitioner_.split (Q_out, P_first, P_mid, P_last,
00093         contiguous_cache_blocks);
00094     explicit_Q_helper (P_first, P_mid, Q_out_split.first, 
00095            contiguous_cache_blocks);
00096     explicit_Q_helper (P_mid+1, P_last, Q_out_split.second, 
00097            contiguous_cache_blocks);
00098   }
00099     }
00100 
00101 
00102     template< class LocalOrdinal, class Scalar >
00103     MatView< LocalOrdinal, Scalar >
00104     TbbRecursiveTsqr< LocalOrdinal, Scalar >::
00105     factor_helper (const size_t P_first, 
00106        const size_t P_last,
00107        const size_t depth,
00108        MatView< LocalOrdinal, Scalar > A,
00109        std::vector< typename TbbRecursiveTsqr< LocalOrdinal, Scalar >::SeqOutput >& seq_outputs,
00110        typename TbbRecursiveTsqr< LocalOrdinal, Scalar >::ParOutput& par_outputs,
00111        Scalar R[],
00112        const LocalOrdinal ldr,
00113        const bool contiguous_cache_blocks) const
00114     {
00115       mat_view A_top;
00116       if (P_first > P_last || A.empty())
00117   return A;
00118       else if (P_first == P_last)
00119   {
00120     std::pair< SeqOutput, MatView< LocalOrdinal, Scalar > > results = 
00121       seq_.factor (A.nrows(), A.ncols(), A.get(), A.lda(), contiguous_cache_blocks);
00122     seq_outputs[P_first] = results.first;
00123     A_top = A;
00124   }
00125       else
00126   {
00127     // Recurse on two intervals: [P_first, P_mid] and [P_mid+1, P_last]
00128     const size_t P_mid = (P_first + P_last) / 2;
00129     split_t A_split = 
00130       partitioner_.split (A, P_first, P_mid, P_last,
00131         contiguous_cache_blocks);
00132     A_top = factor_helper (P_first, P_mid, depth+1, A_split.first, 
00133          seq_outputs, par_outputs, R, ldr, 
00134          contiguous_cache_blocks);
00135     mat_view A_bot = 
00136       factor_helper (P_mid+1, P_last, depth+1, A_split.second,
00137          seq_outputs, par_outputs, R, ldr, 
00138          contiguous_cache_blocks);
00139     // Combine the two results
00140     factor_pair (P_first, P_mid+1, A_top, A_bot, par_outputs, 
00141            contiguous_cache_blocks);
00142   }
00143 
00144       // If we're completely done, extract the final R factor from
00145       // the topmost partition.
00146       if (depth == 0)
00147   {
00148 #ifdef TBB_DEBUG
00149     cerr << "factor_helper: On P_first = " << P_first 
00150          << ", extracting R:" << endl
00151          << "A_top is " << A_top.nrows() << " x " << A_top.ncols() 
00152          << " with leading dimension " << A_top.lda();
00153 #endif // TBB_DEBUG
00154     seq_.extract_R (A_top.nrows(), A_top.ncols(), A_top.get(), 
00155         A_top.lda(), R, ldr, contiguous_cache_blocks);
00156   }
00157       return A_top;
00158     }
00159 
00160 
00161     template< class LocalOrdinal, class Scalar >
00162     bool
00163     TbbRecursiveTsqr< LocalOrdinal, Scalar >::
00164     apply_helper_empty (const size_t P_first,
00165       const size_t P_last,
00166       ConstMatView< LocalOrdinal, Scalar >& Q,
00167       MatView< LocalOrdinal, Scalar >& C) const
00168     {
00169       if (Q.empty())
00170   {
00171     if (! C.empty())
00172       throw std::logic_error("Q is empty but C is not!");
00173     else
00174       return true;
00175   }
00176       else if (C.empty())
00177   {
00178     if (! Q.empty())
00179       throw std::logic_error("C is empty but Q is not!");
00180     else
00181       return true;
00182   }
00183       else if (P_first > P_last)
00184   return true;
00185       else
00186   return false;
00187     }
00188 
00189 
00190     template< class LocalOrdinal, class Scalar >
00191     void
00192     TbbRecursiveTsqr< LocalOrdinal, Scalar >::
00193     build_partition_array (const size_t P_first,
00194          const size_t P_last,
00195          typename TbbRecursiveTsqr< LocalOrdinal, Scalar >::array_top_blocks_t& top_blocks,
00196          ConstMatView< LocalOrdinal, Scalar >& Q,
00197          MatView< LocalOrdinal, Scalar >& C,
00198          const bool contiguous_cache_blocks) const
00199     {
00200 #ifdef TBB_DEBUG
00201       cerr << "build_partition_array: [" << P_first << ", " << P_last << "]:" << endl
00202      << "Q is " << Q.nrows() << " x " << Q.ncols() << " w/ LDA = " 
00203      << Q.lda() << endl << "C is " << C.nrows() << " x " << C.ncols() 
00204      << " w/ LDA = " << C.lda() << endl;
00205 #endif // TBB_DEBUG
00206 
00207       if (P_first > P_last)
00208   return;
00209       else if (P_first == P_last)
00210   {
00211     CacheBlocker< LocalOrdinal, Scalar > blocker (Q.nrows(), Q.ncols(), seq_.cache_blocking_strategy());
00212     const_mat_view Q_top = blocker.top_block (Q, contiguous_cache_blocks);
00213     mat_view C_top = blocker.top_block (C, contiguous_cache_blocks);
00214     top_blocks[P_first] = 
00215       std::make_pair (const_mat_view (Q_top.ncols(), Q_top.ncols(), Q_top.get(), Q_top.lda()), 
00216           mat_view (C_top.ncols(), C_top.ncols(), C_top.get(), C_top.lda()));
00217   }
00218       else
00219   {
00220     // Recurse on two intervals: [P_first, P_mid] and [P_mid+1, P_last]
00221     const size_t P_mid = (P_first + P_last) / 2;
00222     const_split_t Q_split = 
00223       partitioner_.split (Q, P_first, P_mid, P_last,
00224         contiguous_cache_blocks);
00225     split_t C_split = 
00226       partitioner_.split (C, P_first, P_mid, P_last,
00227         contiguous_cache_blocks);
00228     build_partition_array (P_first, P_mid, top_blocks, Q_split.first, 
00229          C_split.first, contiguous_cache_blocks);
00230     build_partition_array (P_mid+1, P_last, top_blocks, Q_split.second, 
00231          C_split.second, contiguous_cache_blocks);
00232   }
00233     }
00234       
00235 
00236     template< class LocalOrdinal, class Scalar >
00237     void
00238     TbbRecursiveTsqr< LocalOrdinal, Scalar >::
00239     apply_helper (const size_t P_first, 
00240       const size_t P_last,
00241       ConstMatView< LocalOrdinal, Scalar > Q,
00242       MatView< LocalOrdinal, Scalar > C,
00243       typename TbbRecursiveTsqr< LocalOrdinal, Scalar >::array_top_blocks_t& top_blocks, 
00244       const FactorOutput& factor_output,
00245       const bool contiguous_cache_blocks) const
00246     {
00247       typedef std::pair< const_mat_view, mat_view > apply_t;
00248 #ifdef TBB_DEBUG
00249       cerr << "apply_helper: [" << P_first << ", " << P_last << "]:" << endl
00250      << "Q is " << Q.nrows() << " x " << Q.ncols() << " w/ LDA = " 
00251      << Q.lda() << endl << "C is " << C.nrows() << " x " << C.ncols() 
00252      << " w/ LDA = " << C.lda() << endl;
00253 #endif // TBB_DEBUG
00254 
00255       if (apply_helper_empty (P_first, P_last, Q, C))
00256   return;
00257       else if (P_first == P_last)
00258   {
00259     const std::vector< SeqOutput >& seq_outputs = factor_output.first;
00260     seq_.apply ("N", Q.nrows(), Q.ncols(), Q.get(), Q.lda(), 
00261           seq_outputs[P_first], C.ncols(), C.get(), 
00262           C.lda(), contiguous_cache_blocks);
00263 #ifdef TBB_DEBUG
00264     cerr << "BOO!!!" << endl;
00265 #endif // TBB_DEBUG
00266   }
00267       else
00268   {
00269     // Recurse on two intervals: [P_first, P_mid] and [P_mid+1, P_last]
00270     const size_t P_mid = (P_first + P_last) / 2;
00271     const_split_t Q_split = 
00272       partitioner_.split (Q, P_first, P_mid, P_last,
00273         contiguous_cache_blocks);
00274     split_t C_split = 
00275       partitioner_.split (C, P_first, P_mid, P_last,
00276         contiguous_cache_blocks);
00277     const ParOutput& par_output = factor_output.second;
00278 
00279     apply_pair ("N", P_first, P_mid+1, top_blocks[P_mid+1].first,
00280           par_output, top_blocks[P_first].second, 
00281           top_blocks[P_mid+1].second, contiguous_cache_blocks);
00282     apply_helper (P_first, P_mid, Q_split.first, C_split.first,
00283       top_blocks, factor_output, contiguous_cache_blocks);
00284     apply_helper (P_mid+1, P_last, Q_split.second, C_split.second,
00285       top_blocks, factor_output, contiguous_cache_blocks);
00286   }
00287     }
00288 
00289 
00290     template< class LocalOrdinal, class Scalar >
00291     typename TbbRecursiveTsqr< LocalOrdinal, Scalar >::top_blocks_t
00292     TbbRecursiveTsqr< LocalOrdinal, Scalar >::
00293     apply_transpose_helper (const std::string& op,
00294           const size_t P_first, 
00295           const size_t P_last,
00296           ConstMatView< LocalOrdinal, Scalar > Q,
00297           MatView< LocalOrdinal, Scalar > C,
00298           const typename TbbRecursiveTsqr< LocalOrdinal, Scalar >::FactorOutput& factor_output,
00299           const bool contiguous_cache_blocks) const
00300     {
00301       if (apply_helper_empty (P_first, P_last, Q, C))
00302   return std::make_pair (Q, C);
00303       else if (P_first == P_last)
00304   {
00305     const std::vector< SeqOutput >& seq_outputs = factor_output.first;
00306     seq_.apply (op, Q.nrows(), Q.ncols(), Q.get(), Q.lda(), 
00307           seq_outputs[P_first], C.ncols(), C.get(), 
00308           C.lda(), contiguous_cache_blocks);
00309     return std::make_pair (Q, C);
00310   }
00311       else
00312   {
00313     // Recurse on two intervals: [P_first, P_mid] and [P_mid+1, P_last]
00314     const size_t P_mid = (P_first + P_last) / 2;
00315 
00316     const_split_t Q_split = 
00317       partitioner_.split (Q, P_first, P_mid, P_last,
00318         contiguous_cache_blocks);
00319     split_t C_split = 
00320       partitioner_.split (C, P_first, P_mid, P_last,
00321         contiguous_cache_blocks);
00322     const ParOutput& par_output = factor_output.second;
00323     top_blocks_t Top = 
00324       apply_transpose_helper (op, P_first, P_mid, Q_split.first, 
00325             C_split.first, factor_output, 
00326             contiguous_cache_blocks);
00327     top_blocks_t Bottom = 
00328       apply_transpose_helper (op, P_mid+1, P_last, Q_split.second, 
00329             C_split.second, factor_output, 
00330             contiguous_cache_blocks);
00331     apply_pair (op, P_first, P_mid+1, Bottom.first,
00332           par_output, Top.second, Bottom.second,
00333           contiguous_cache_blocks);
00334     return Top;
00335   }
00336     }
00337 
00338 
00339     template< class LocalOrdinal, class Scalar >
00340     void
00341     TbbRecursiveTsqr< LocalOrdinal, Scalar >::
00342     factor_pair (const size_t P_top,
00343      const size_t P_bot,
00344      mat_view& A_top,
00345      mat_view& A_bot,
00346      std::vector< std::vector< Scalar > >& par_outputs,
00347      const bool contiguous_cache_blocks) const
00348     {
00349       if (P_top == P_bot) 
00350   {
00351     throw std::logic_error("factor_pair: should never get here!");
00352     return; // to pacify the compiler
00353   }
00354       // We only read and write the upper ncols x ncols triangle of
00355       // each block.
00356       const LocalOrdinal ncols = A_top.ncols();
00357       if (A_bot.ncols() != ncols)
00358   throw std::logic_error("A_bot.ncols() != A_top.ncols()");
00359 
00360       std::vector< Scalar >& tau = par_outputs[P_bot];
00361       std::vector< Scalar > work (ncols);
00362 
00363       TSQR::Combine< LocalOrdinal, Scalar > combine_;
00364       combine_.factor_pair (ncols, A_top.get(), A_top.lda(),
00365           A_bot.get(), A_bot.lda(), &tau[0], &work[0]);
00366     }
00367 
00368     template< class LocalOrdinal, class Scalar >
00369     void
00370     TbbRecursiveTsqr< LocalOrdinal, Scalar >::
00371     apply_pair (const std::string& trans,
00372     const size_t P_top,
00373     const size_t P_bot,
00374     ConstMatView< LocalOrdinal, Scalar >& Q_bot,
00375     const std::vector< std::vector< Scalar > >& tau_arrays,
00376     MatView< LocalOrdinal, Scalar >& C_top,
00377     MatView< LocalOrdinal, Scalar >& C_bot,
00378     const bool contiguous_cache_blocks) const
00379     {
00380       if (P_top == P_bot) 
00381   {
00382     throw std::logic_error("apply_pair: should never get here!");
00383     return; // to pacify the compiler
00384   }
00385       const std::vector< Scalar >& tau = tau_arrays[P_bot];
00386       std::vector< Scalar > work (C_top.ncols());
00387 
00388       TSQR::Combine< LocalOrdinal, Scalar > combine_;
00389       combine_.apply_pair (trans.c_str(), C_top.ncols(), Q_bot.ncols(), 
00390          Q_bot.get(), Q_bot.lda(), &tau[0],
00391          C_top.get(), C_top.lda(), 
00392          C_bot.get(), C_bot.lda(), &work[0]);
00393     }
00394 
00395     template< class LocalOrdinal, class Scalar >
00396     void 
00397     TbbRecursiveTsqr< LocalOrdinal, Scalar >::
00398     cache_block_helper (MatView< LocalOrdinal, Scalar >& A_out,
00399       ConstMatView< LocalOrdinal, Scalar >& A_in,
00400       const size_t P_first,
00401       const size_t P_last) const
00402     {
00403       if (P_first > P_last) 
00404   return;
00405       else if (P_first == P_last)
00406   seq_.cache_block (A_out.nrows(), A_out.ncols(), A_out.get(), 
00407         A_in.get(), A_in.lda());
00408       else
00409   {
00410     const size_t P_mid = (P_first + P_last) / 2;
00411     const_split_t A_in_split = 
00412       partitioner_.split (A_in, P_first, P_mid, P_last, false);
00413     split_t A_out_split = 
00414       partitioner_.split (A_out, P_first, P_mid, P_last, true);
00415     cache_block_helper (A_out_split.first, A_in_split.first, 
00416             P_first, P_mid);
00417     cache_block_helper (A_out_split.second, A_in_split.second, 
00418             P_mid+1, P_last);
00419   }
00420     }
00421 
00422     template< class LocalOrdinal, class Scalar >
00423     void 
00424     TbbRecursiveTsqr< LocalOrdinal, Scalar >::
00425     un_cache_block_helper (MatView< LocalOrdinal, Scalar >& A_out,
00426          const ConstMatView< LocalOrdinal, Scalar >& A_in,
00427          const size_t P_first,
00428          const size_t P_last) const
00429     {
00430       if (P_first > P_last) 
00431   return;
00432       else if (P_first == P_last)
00433   seq_.un_cache_block (A_out.nrows(), A_out.ncols(), A_out.get(), 
00434            A_out.lda(), A_in.get());
00435       else
00436   {
00437     const size_t P_mid = (P_first + P_last) / 2;
00438     const const_split_t A_in_split = 
00439       partitioner_.split (A_in, P_first, P_mid, P_last, true);
00440     split_t A_out_split = 
00441       partitioner_.split (A_out, P_first, P_mid, P_last, false);
00442     
00443     un_cache_block_helper (A_out_split.first, A_in_split.first, 
00444          P_first, P_mid);
00445     un_cache_block_helper (A_out_split.second, A_in_split.second, 
00446          P_mid+1, P_last);
00447   }
00448     }
00449 
00450     template< class LocalOrdinal, class Scalar >
00451     TbbRecursiveTsqr< LocalOrdinal, Scalar >::
00452     TbbRecursiveTsqr (const size_t num_cores,
00453           const size_t cache_size_hint)
00454       : seq_ (cache_size_hint), ncores_ (1)
00455     {
00456       if (num_cores < 1)
00457   ncores_ = 1; // default is no parallelism
00458       else
00459   ncores_ = num_cores;
00460     }
00461 
00462     template< class LocalOrdinal, class Scalar >
00463     void
00464     TbbRecursiveTsqr< LocalOrdinal, Scalar >::
00465     cache_block (const LocalOrdinal nrows,
00466      const LocalOrdinal ncols, 
00467      Scalar A_out[],
00468      const Scalar A_in[],
00469      const LocalOrdinal lda_in) const
00470     {
00471       const_mat_view A_in_view (nrows, ncols, A_in, lda_in);
00472       // Leading dimension doesn't matter, since we're going to cache block it.
00473       mat_view A_out_view (nrows, ncols, A_out, lda_in);
00474       cache_block_helper (A_out_view, A_in_view, 0, ncores()-1);
00475     }
00476 
00477     template< class LocalOrdinal, class Scalar >
00478     void
00479     TbbRecursiveTsqr< LocalOrdinal, Scalar >::
00480     un_cache_block (const LocalOrdinal nrows,
00481         const LocalOrdinal ncols,
00482         Scalar A_out[],
00483         const LocalOrdinal lda_out,       
00484         const Scalar A_in[]) const
00485     {
00486       // Leading dimension doesn't matter, since it's cache-blocked.
00487       const_mat_view A_in_view (nrows, ncols, A_in, lda_out);
00488       mat_view A_out_view (nrows, ncols, A_out, lda_out);
00489       un_cache_block_helper (A_out_view, A_in_view, 0, ncores()-1);
00490     }
00491 
00492     template< class LocalOrdinal, class Scalar >
00493     typename TbbRecursiveTsqr< LocalOrdinal, Scalar >::FactorOutput
00494     TbbRecursiveTsqr< LocalOrdinal, Scalar >::
00495     factor (const LocalOrdinal nrows,
00496       const LocalOrdinal ncols, 
00497       Scalar A[],
00498       const LocalOrdinal lda,
00499       Scalar R[],
00500       const LocalOrdinal ldr,
00501       const bool contiguous_cache_blocks) const
00502     {
00503       mat_view A_view (nrows, ncols, A, lda);
00504       std::vector< SeqOutput > seq_outputs (ncores());
00505       ParOutput par_outputs (ncores(), std::vector< Scalar >(ncols));
00506       (void) factor_helper (0, ncores()-1, 0, A_view, seq_outputs, 
00507           par_outputs, R, ldr, contiguous_cache_blocks);
00508       return std::make_pair (seq_outputs, par_outputs);
00509     }
00510 
00511     template< class LocalOrdinal, class Scalar >
00512     void
00513     TbbRecursiveTsqr< LocalOrdinal, Scalar >::
00514     apply (const std::string& op,
00515      const LocalOrdinal nrows,
00516      const LocalOrdinal ncols_C,
00517      Scalar C[],
00518      const LocalOrdinal ldc,
00519      const LocalOrdinal ncols_Q,
00520      const Scalar Q[],
00521      const LocalOrdinal ldq,
00522      const typename TbbRecursiveTsqr< LocalOrdinal, Scalar >::FactorOutput& factor_output,
00523      const bool contiguous_cache_blocks) const
00524     {
00525       const ApplyType apply_type (op);
00526       if (apply_type == ApplyType::ConjugateTranspose && 
00527     ScalarTraits< Scalar >::is_complex)
00528   throw std::logic_error("Applying Q^H for complex scalar types "
00529              "not yet implemented");
00530 
00531       const_mat_view Q_view (nrows, ncols_Q, Q, ldq);
00532       mat_view C_view (nrows, ncols_C, C, ldc);
00533       if (! apply_type.transposed())
00534   {
00535     array_top_blocks_t top_blocks (ncores());
00536     build_partition_array (0, ncores()-1, top_blocks, Q_view, 
00537          C_view, contiguous_cache_blocks);
00538     apply_helper (0, ncores()-1, Q_view, C_view, top_blocks, 
00539       factor_output, contiguous_cache_blocks);
00540   }
00541       else
00542   apply_transpose_helper (op, 0, ncores()-1, Q_view, C_view, 
00543         factor_output, contiguous_cache_blocks);
00544     }
00545 
00546 
00547     template< class LocalOrdinal, class Scalar >
00548     void
00549     TbbRecursiveTsqr< LocalOrdinal, Scalar >::
00550     explicit_Q (const LocalOrdinal nrows,
00551     const LocalOrdinal ncols_Q_in,
00552     const Scalar Q_in[],
00553     const LocalOrdinal ldq_in,
00554     const LocalOrdinal ncols_Q_out,
00555     Scalar Q_out[],
00556     const LocalOrdinal ldq_out,
00557     const typename TbbRecursiveTsqr< LocalOrdinal, Scalar >::FactorOutput& factor_output,
00558     const bool contiguous_cache_blocks) const
00559     {
00560       if (ncols_Q_out != ncols_Q_in)
00561   throw std::logic_error("FIXME Currently, explicit_Q() only works for ncols_Q_out == ncols_Q_in");
00562 
00563       const_mat_view Q_in_view (nrows, ncols_Q_in, Q_in, ldq_in);
00564       mat_view Q_out_view (nrows, ncols_Q_out, Q_out, ldq_out);
00565 
00566       explicit_Q_helper (0, ncores()-1, Q_out_view, contiguous_cache_blocks);
00567       apply ("N", nrows, ncols_Q_out, Q_out, ldq_out, ncols_Q_in, 
00568        Q_in, ldq_in, factor_output, contiguous_cache_blocks);
00569     }
00570 
00571   } // namespace TBB
00572 } // namespace TSQR
00573 
00574 
00575 #endif // __TSQR_TBB_TbbRecursiveTsqr_Def_hpp
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends