Kokkos Node API and Local Linear Algebra Kernels Version of the Day
TbbTsqr_TbbRecursiveTsqr_Def.hpp
00001 //@HEADER
00002 // ************************************************************************
00003 // 
00004 //          Kokkos: Node API and Parallel Node Kernels
00005 //              Copyright (2008) Sandia Corporation
00006 // 
00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00008 // the U.S. Government retains certain rights in this software.
00009 // 
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00038 // 
00039 // ************************************************************************
00040 //@HEADER
00041 
00042 #ifndef __TSQR_TBB_TbbRecursiveTsqr_Def_hpp
00043 #define __TSQR_TBB_TbbRecursiveTsqr_Def_hpp
00044 
00045 #include <TbbTsqr_TbbRecursiveTsqr.hpp>
00046 #include <Tsqr_ScalarTraits.hpp>
00047 #include <Tsqr_Util.hpp>
00048 
00049 // #define TBB_DEBUG 1
00050 #ifdef TBB_DEBUG
00051 #  include <iostream>
00052 using std::cerr;
00053 using std::endl;
00054 #endif // TBB_DEBUG
00055 
00058 
00059 namespace TSQR {
00060   namespace TBB {
00061 
00062     template< class LocalOrdinal, class Scalar >
00063     void
00064     TbbRecursiveTsqr< LocalOrdinal, Scalar >::
00065     explicit_Q_helper (const size_t P_first, 
00066            const size_t P_last,
00067            MatView< LocalOrdinal, Scalar >& Q_out,
00068            const bool contiguous_cache_blocks) const
00069     {
00070       if (P_first > P_last || Q_out.empty())
00071   return;
00072       else if (P_first == P_last)
00073   {
00074     CacheBlocker< LocalOrdinal, Scalar > 
00075       blocker (Q_out.nrows(), Q_out.ncols(),
00076          seq_.cache_blocking_strategy());
00077 #ifdef TBB_DEBUG
00078     cerr << "explicit_Q_helper: On P_first = " << P_first 
00079          << ", filling Q_out with zeros:" << endl
00080          << "Q_out is " << Q_out.nrows() << " x " << Q_out.ncols() 
00081          << " with leading dimension " << Q_out.lda() << endl;
00082 #endif // TBB_DEBUG
00083     // Fill my partition with zeros.
00084     blocker.fill_with_zeros (Q_out, contiguous_cache_blocks);
00085 
00086     // If our partition is the first (topmost), fill it with
00087     // the first Q_out.ncols() columns of the identity matrix.
00088     if (P_first == 0)
00089       {
00090         // Fetch the topmost cache block of my partition.  Its
00091         // leading dimension should be set correctly by
00092         // top_block().
00093         mat_view Q_out_top = 
00094     blocker.top_block (Q_out, contiguous_cache_blocks);
00095 
00096         for (LocalOrdinal j = 0; j < Q_out_top.ncols(); ++j)
00097     Q_out_top(j,j) = Scalar(1);
00098       }
00099   }
00100       else
00101   {
00102     // Recurse on two intervals: [P_first, P_mid] and [P_mid+1, P_last]
00103     const size_t P_mid = (P_first + P_last) / 2;
00104     split_t Q_out_split =
00105       partitioner_.split (Q_out, P_first, P_mid, P_last,
00106         contiguous_cache_blocks);
00107     explicit_Q_helper (P_first, P_mid, Q_out_split.first, 
00108            contiguous_cache_blocks);
00109     explicit_Q_helper (P_mid+1, P_last, Q_out_split.second, 
00110            contiguous_cache_blocks);
00111   }
00112     }
00113 
00114 
00115     template< class LocalOrdinal, class Scalar >
00116     MatView< LocalOrdinal, Scalar >
00117     TbbRecursiveTsqr< LocalOrdinal, Scalar >::
00118     factor_helper (const size_t P_first, 
00119        const size_t P_last,
00120        const size_t depth,
00121        MatView< LocalOrdinal, Scalar > A,
00122        std::vector< typename TbbRecursiveTsqr< LocalOrdinal, Scalar >::SeqOutput >& seq_outputs,
00123        typename TbbRecursiveTsqr< LocalOrdinal, Scalar >::ParOutput& par_outputs,
00124        Scalar R[],
00125        const LocalOrdinal ldr,
00126        const bool contiguous_cache_blocks) const
00127     {
00128       mat_view A_top;
00129       if (P_first > P_last || A.empty())
00130   return A;
00131       else if (P_first == P_last)
00132   {
00133     std::pair< SeqOutput, MatView< LocalOrdinal, Scalar > > results = 
00134       seq_.factor (A.nrows(), A.ncols(), A.get(), A.lda(), contiguous_cache_blocks);
00135     seq_outputs[P_first] = results.first;
00136     A_top = A;
00137   }
00138       else
00139   {
00140     // Recurse on two intervals: [P_first, P_mid] and [P_mid+1, P_last]
00141     const size_t P_mid = (P_first + P_last) / 2;
00142     split_t A_split = 
00143       partitioner_.split (A, P_first, P_mid, P_last,
00144         contiguous_cache_blocks);
00145     A_top = factor_helper (P_first, P_mid, depth+1, A_split.first, 
00146          seq_outputs, par_outputs, R, ldr, 
00147          contiguous_cache_blocks);
00148     mat_view A_bot = 
00149       factor_helper (P_mid+1, P_last, depth+1, A_split.second,
00150          seq_outputs, par_outputs, R, ldr, 
00151          contiguous_cache_blocks);
00152     // Combine the two results
00153     factor_pair (P_first, P_mid+1, A_top, A_bot, par_outputs, 
00154            contiguous_cache_blocks);
00155   }
00156 
00157       // If we're completely done, extract the final R factor from
00158       // the topmost partition.
00159       if (depth == 0)
00160   {
00161 #ifdef TBB_DEBUG
00162     cerr << "factor_helper: On P_first = " << P_first 
00163          << ", extracting R:" << endl
00164          << "A_top is " << A_top.nrows() << " x " << A_top.ncols() 
00165          << " with leading dimension " << A_top.lda();
00166 #endif // TBB_DEBUG
00167     seq_.extract_R (A_top.nrows(), A_top.ncols(), A_top.get(), 
00168         A_top.lda(), R, ldr, contiguous_cache_blocks);
00169   }
00170       return A_top;
00171     }
00172 
00173 
00174     template< class LocalOrdinal, class Scalar >
00175     bool
00176     TbbRecursiveTsqr< LocalOrdinal, Scalar >::
00177     apply_helper_empty (const size_t P_first,
00178       const size_t P_last,
00179       ConstMatView< LocalOrdinal, Scalar >& Q,
00180       MatView< LocalOrdinal, Scalar >& C) const
00181     {
00182       if (Q.empty())
00183   {
00184     if (! C.empty())
00185       throw std::logic_error("Q is empty but C is not!");
00186     else
00187       return true;
00188   }
00189       else if (C.empty())
00190   {
00191     if (! Q.empty())
00192       throw std::logic_error("C is empty but Q is not!");
00193     else
00194       return true;
00195   }
00196       else if (P_first > P_last)
00197   return true;
00198       else
00199   return false;
00200     }
00201 
00202 
00203     template< class LocalOrdinal, class Scalar >
00204     void
00205     TbbRecursiveTsqr< LocalOrdinal, Scalar >::
00206     build_partition_array (const size_t P_first,
00207          const size_t P_last,
00208          typename TbbRecursiveTsqr< LocalOrdinal, Scalar >::array_top_blocks_t& top_blocks,
00209          ConstMatView< LocalOrdinal, Scalar >& Q,
00210          MatView< LocalOrdinal, Scalar >& C,
00211          const bool contiguous_cache_blocks) const
00212     {
00213 #ifdef TBB_DEBUG
00214       cerr << "build_partition_array: [" << P_first << ", " << P_last << "]:" << endl
00215      << "Q is " << Q.nrows() << " x " << Q.ncols() << " w/ LDA = " 
00216      << Q.lda() << endl << "C is " << C.nrows() << " x " << C.ncols() 
00217      << " w/ LDA = " << C.lda() << endl;
00218 #endif // TBB_DEBUG
00219 
00220       if (P_first > P_last)
00221   return;
00222       else if (P_first == P_last)
00223   {
00224     CacheBlocker< LocalOrdinal, Scalar > blocker (Q.nrows(), Q.ncols(), seq_.cache_blocking_strategy());
00225     const_mat_view Q_top = blocker.top_block (Q, contiguous_cache_blocks);
00226     mat_view C_top = blocker.top_block (C, contiguous_cache_blocks);
00227     top_blocks[P_first] = 
00228       std::make_pair (const_mat_view (Q_top.ncols(), Q_top.ncols(), Q_top.get(), Q_top.lda()), 
00229           mat_view (C_top.ncols(), C_top.ncols(), C_top.get(), C_top.lda()));
00230   }
00231       else
00232   {
00233     // Recurse on two intervals: [P_first, P_mid] and [P_mid+1, P_last]
00234     const size_t P_mid = (P_first + P_last) / 2;
00235     const_split_t Q_split = 
00236       partitioner_.split (Q, P_first, P_mid, P_last,
00237         contiguous_cache_blocks);
00238     split_t C_split = 
00239       partitioner_.split (C, P_first, P_mid, P_last,
00240         contiguous_cache_blocks);
00241     build_partition_array (P_first, P_mid, top_blocks, Q_split.first, 
00242          C_split.first, contiguous_cache_blocks);
00243     build_partition_array (P_mid+1, P_last, top_blocks, Q_split.second, 
00244          C_split.second, contiguous_cache_blocks);
00245   }
00246     }
00247       
00248 
00249     template< class LocalOrdinal, class Scalar >
00250     void
00251     TbbRecursiveTsqr< LocalOrdinal, Scalar >::
00252     apply_helper (const size_t P_first, 
00253       const size_t P_last,
00254       ConstMatView< LocalOrdinal, Scalar > Q,
00255       MatView< LocalOrdinal, Scalar > C,
00256       typename TbbRecursiveTsqr< LocalOrdinal, Scalar >::array_top_blocks_t& top_blocks, 
00257       const FactorOutput& factor_output,
00258       const bool contiguous_cache_blocks) const
00259     {
00260       typedef std::pair< const_mat_view, mat_view > apply_t;
00261 #ifdef TBB_DEBUG
00262       cerr << "apply_helper: [" << P_first << ", " << P_last << "]:" << endl
00263      << "Q is " << Q.nrows() << " x " << Q.ncols() << " w/ LDA = " 
00264      << Q.lda() << endl << "C is " << C.nrows() << " x " << C.ncols() 
00265      << " w/ LDA = " << C.lda() << endl;
00266 #endif // TBB_DEBUG
00267 
00268       if (apply_helper_empty (P_first, P_last, Q, C))
00269   return;
00270       else if (P_first == P_last)
00271   {
00272     const std::vector< SeqOutput >& seq_outputs = factor_output.first;
00273     seq_.apply ("N", Q.nrows(), Q.ncols(), Q.get(), Q.lda(), 
00274           seq_outputs[P_first], C.ncols(), C.get(), 
00275           C.lda(), contiguous_cache_blocks);
00276 #ifdef TBB_DEBUG
00277     cerr << "BOO!!!" << endl;
00278 #endif // TBB_DEBUG
00279   }
00280       else
00281   {
00282     // Recurse on two intervals: [P_first, P_mid] and [P_mid+1, P_last]
00283     const size_t P_mid = (P_first + P_last) / 2;
00284     const_split_t Q_split = 
00285       partitioner_.split (Q, P_first, P_mid, P_last,
00286         contiguous_cache_blocks);
00287     split_t C_split = 
00288       partitioner_.split (C, P_first, P_mid, P_last,
00289         contiguous_cache_blocks);
00290     const ParOutput& par_output = factor_output.second;
00291 
00292     apply_pair ("N", P_first, P_mid+1, top_blocks[P_mid+1].first,
00293           par_output, top_blocks[P_first].second, 
00294           top_blocks[P_mid+1].second, contiguous_cache_blocks);
00295     apply_helper (P_first, P_mid, Q_split.first, C_split.first,
00296       top_blocks, factor_output, contiguous_cache_blocks);
00297     apply_helper (P_mid+1, P_last, Q_split.second, C_split.second,
00298       top_blocks, factor_output, contiguous_cache_blocks);
00299   }
00300     }
00301 
00302 
00303     template< class LocalOrdinal, class Scalar >
00304     typename TbbRecursiveTsqr< LocalOrdinal, Scalar >::top_blocks_t
00305     TbbRecursiveTsqr< LocalOrdinal, Scalar >::
00306     apply_transpose_helper (const std::string& op,
00307           const size_t P_first, 
00308           const size_t P_last,
00309           ConstMatView< LocalOrdinal, Scalar > Q,
00310           MatView< LocalOrdinal, Scalar > C,
00311           const typename TbbRecursiveTsqr< LocalOrdinal, Scalar >::FactorOutput& factor_output,
00312           const bool contiguous_cache_blocks) const
00313     {
00314       if (apply_helper_empty (P_first, P_last, Q, C))
00315   return std::make_pair (Q, C);
00316       else if (P_first == P_last)
00317   {
00318     const std::vector< SeqOutput >& seq_outputs = factor_output.first;
00319     seq_.apply (op, Q.nrows(), Q.ncols(), Q.get(), Q.lda(), 
00320           seq_outputs[P_first], C.ncols(), C.get(), 
00321           C.lda(), contiguous_cache_blocks);
00322     return std::make_pair (Q, C);
00323   }
00324       else
00325   {
00326     // Recurse on two intervals: [P_first, P_mid] and [P_mid+1, P_last]
00327     const size_t P_mid = (P_first + P_last) / 2;
00328 
00329     const_split_t Q_split = 
00330       partitioner_.split (Q, P_first, P_mid, P_last,
00331         contiguous_cache_blocks);
00332     split_t C_split = 
00333       partitioner_.split (C, P_first, P_mid, P_last,
00334         contiguous_cache_blocks);
00335     const ParOutput& par_output = factor_output.second;
00336     top_blocks_t Top = 
00337       apply_transpose_helper (op, P_first, P_mid, Q_split.first, 
00338             C_split.first, factor_output, 
00339             contiguous_cache_blocks);
00340     top_blocks_t Bottom = 
00341       apply_transpose_helper (op, P_mid+1, P_last, Q_split.second, 
00342             C_split.second, factor_output, 
00343             contiguous_cache_blocks);
00344     apply_pair (op, P_first, P_mid+1, Bottom.first,
00345           par_output, Top.second, Bottom.second,
00346           contiguous_cache_blocks);
00347     return Top;
00348   }
00349     }
00350 
00351 
00352     template< class LocalOrdinal, class Scalar >
00353     void
00354     TbbRecursiveTsqr< LocalOrdinal, Scalar >::
00355     factor_pair (const size_t P_top,
00356      const size_t P_bot,
00357      mat_view& A_top,
00358      mat_view& A_bot,
00359      std::vector< std::vector< Scalar > >& par_outputs,
00360      const bool contiguous_cache_blocks) const
00361     {
00362       if (P_top == P_bot) 
00363   {
00364     throw std::logic_error("factor_pair: should never get here!");
00365     return; // to pacify the compiler
00366   }
00367       // We only read and write the upper ncols x ncols triangle of
00368       // each block.
00369       const LocalOrdinal ncols = A_top.ncols();
00370       if (A_bot.ncols() != ncols)
00371   throw std::logic_error("A_bot.ncols() != A_top.ncols()");
00372 
00373       std::vector< Scalar >& tau = par_outputs[P_bot];
00374       std::vector< Scalar > work (ncols);
00375 
00376       TSQR::Combine< LocalOrdinal, Scalar > combine_;
00377       combine_.factor_pair (ncols, A_top.get(), A_top.lda(),
00378           A_bot.get(), A_bot.lda(), &tau[0], &work[0]);
00379     }
00380 
00381     template< class LocalOrdinal, class Scalar >
00382     void
00383     TbbRecursiveTsqr< LocalOrdinal, Scalar >::
00384     apply_pair (const std::string& trans,
00385     const size_t P_top,
00386     const size_t P_bot,
00387     ConstMatView< LocalOrdinal, Scalar >& Q_bot,
00388     const std::vector< std::vector< Scalar > >& tau_arrays,
00389     MatView< LocalOrdinal, Scalar >& C_top,
00390     MatView< LocalOrdinal, Scalar >& C_bot,
00391     const bool contiguous_cache_blocks) const
00392     {
00393       if (P_top == P_bot) 
00394   {
00395     throw std::logic_error("apply_pair: should never get here!");
00396     return; // to pacify the compiler
00397   }
00398       const std::vector< Scalar >& tau = tau_arrays[P_bot];
00399       std::vector< Scalar > work (C_top.ncols());
00400 
00401       TSQR::Combine< LocalOrdinal, Scalar > combine_;
00402       combine_.apply_pair (trans.c_str(), C_top.ncols(), Q_bot.ncols(), 
00403          Q_bot.get(), Q_bot.lda(), &tau[0],
00404          C_top.get(), C_top.lda(), 
00405          C_bot.get(), C_bot.lda(), &work[0]);
00406     }
00407 
00408     template< class LocalOrdinal, class Scalar >
00409     void 
00410     TbbRecursiveTsqr< LocalOrdinal, Scalar >::
00411     cache_block_helper (MatView< LocalOrdinal, Scalar >& A_out,
00412       ConstMatView< LocalOrdinal, Scalar >& A_in,
00413       const size_t P_first,
00414       const size_t P_last) const
00415     {
00416       if (P_first > P_last) 
00417   return;
00418       else if (P_first == P_last)
00419   seq_.cache_block (A_out.nrows(), A_out.ncols(), A_out.get(), 
00420         A_in.get(), A_in.lda());
00421       else
00422   {
00423     const size_t P_mid = (P_first + P_last) / 2;
00424     const_split_t A_in_split = 
00425       partitioner_.split (A_in, P_first, P_mid, P_last, false);
00426     split_t A_out_split = 
00427       partitioner_.split (A_out, P_first, P_mid, P_last, true);
00428     cache_block_helper (A_out_split.first, A_in_split.first, 
00429             P_first, P_mid);
00430     cache_block_helper (A_out_split.second, A_in_split.second, 
00431             P_mid+1, P_last);
00432   }
00433     }
00434 
00435     template< class LocalOrdinal, class Scalar >
00436     void 
00437     TbbRecursiveTsqr< LocalOrdinal, Scalar >::
00438     un_cache_block_helper (MatView< LocalOrdinal, Scalar >& A_out,
00439          const ConstMatView< LocalOrdinal, Scalar >& A_in,
00440          const size_t P_first,
00441          const size_t P_last) const
00442     {
00443       if (P_first > P_last) 
00444   return;
00445       else if (P_first == P_last)
00446   seq_.un_cache_block (A_out.nrows(), A_out.ncols(), A_out.get(), 
00447            A_out.lda(), A_in.get());
00448       else
00449   {
00450     const size_t P_mid = (P_first + P_last) / 2;
00451     const const_split_t A_in_split = 
00452       partitioner_.split (A_in, P_first, P_mid, P_last, true);
00453     split_t A_out_split = 
00454       partitioner_.split (A_out, P_first, P_mid, P_last, false);
00455     
00456     un_cache_block_helper (A_out_split.first, A_in_split.first, 
00457          P_first, P_mid);
00458     un_cache_block_helper (A_out_split.second, A_in_split.second, 
00459          P_mid+1, P_last);
00460   }
00461     }
00462 
00463     template< class LocalOrdinal, class Scalar >
00464     TbbRecursiveTsqr< LocalOrdinal, Scalar >::
00465     TbbRecursiveTsqr (const size_t num_cores,
00466           const size_t cache_size_hint)
00467       : seq_ (cache_size_hint), ncores_ (1)
00468     {
00469       if (num_cores < 1)
00470   ncores_ = 1; // default is no parallelism
00471       else
00472   ncores_ = num_cores;
00473     }
00474 
00475     template< class LocalOrdinal, class Scalar >
00476     void
00477     TbbRecursiveTsqr< LocalOrdinal, Scalar >::
00478     cache_block (const LocalOrdinal nrows,
00479      const LocalOrdinal ncols, 
00480      Scalar A_out[],
00481      const Scalar A_in[],
00482      const LocalOrdinal lda_in) const
00483     {
00484       const_mat_view A_in_view (nrows, ncols, A_in, lda_in);
00485       // Leading dimension doesn't matter, since we're going to cache block it.
00486       mat_view A_out_view (nrows, ncols, A_out, lda_in);
00487       cache_block_helper (A_out_view, A_in_view, 0, ncores()-1);
00488     }
00489 
00490     template< class LocalOrdinal, class Scalar >
00491     void
00492     TbbRecursiveTsqr< LocalOrdinal, Scalar >::
00493     un_cache_block (const LocalOrdinal nrows,
00494         const LocalOrdinal ncols,
00495         Scalar A_out[],
00496         const LocalOrdinal lda_out,       
00497         const Scalar A_in[]) const
00498     {
00499       // Leading dimension doesn't matter, since it's cache-blocked.
00500       const_mat_view A_in_view (nrows, ncols, A_in, lda_out);
00501       mat_view A_out_view (nrows, ncols, A_out, lda_out);
00502       un_cache_block_helper (A_out_view, A_in_view, 0, ncores()-1);
00503     }
00504 
00505     template< class LocalOrdinal, class Scalar >
00506     typename TbbRecursiveTsqr< LocalOrdinal, Scalar >::FactorOutput
00507     TbbRecursiveTsqr< LocalOrdinal, Scalar >::
00508     factor (const LocalOrdinal nrows,
00509       const LocalOrdinal ncols, 
00510       Scalar A[],
00511       const LocalOrdinal lda,
00512       Scalar R[],
00513       const LocalOrdinal ldr,
00514       const bool contiguous_cache_blocks) const
00515     {
00516       mat_view A_view (nrows, ncols, A, lda);
00517       std::vector< SeqOutput > seq_outputs (ncores());
00518       ParOutput par_outputs (ncores(), std::vector< Scalar >(ncols));
00519       (void) factor_helper (0, ncores()-1, 0, A_view, seq_outputs, 
00520           par_outputs, R, ldr, contiguous_cache_blocks);
00521       return std::make_pair (seq_outputs, par_outputs);
00522     }
00523 
00524     template< class LocalOrdinal, class Scalar >
00525     void
00526     TbbRecursiveTsqr< LocalOrdinal, Scalar >::
00527     apply (const std::string& op,
00528      const LocalOrdinal nrows,
00529      const LocalOrdinal ncols_C,
00530      Scalar C[],
00531      const LocalOrdinal ldc,
00532      const LocalOrdinal ncols_Q,
00533      const Scalar Q[],
00534      const LocalOrdinal ldq,
00535      const typename TbbRecursiveTsqr< LocalOrdinal, Scalar >::FactorOutput& factor_output,
00536      const bool contiguous_cache_blocks) const
00537     {
00538       const ApplyType apply_type (op);
00539       if (apply_type == ApplyType::ConjugateTranspose && 
00540     ScalarTraits< Scalar >::is_complex)
00541   throw std::logic_error("Applying Q^H for complex scalar types "
00542              "not yet implemented");
00543 
00544       const_mat_view Q_view (nrows, ncols_Q, Q, ldq);
00545       mat_view C_view (nrows, ncols_C, C, ldc);
00546       if (! apply_type.transposed())
00547   {
00548     array_top_blocks_t top_blocks (ncores());
00549     build_partition_array (0, ncores()-1, top_blocks, Q_view, 
00550          C_view, contiguous_cache_blocks);
00551     apply_helper (0, ncores()-1, Q_view, C_view, top_blocks, 
00552       factor_output, contiguous_cache_blocks);
00553   }
00554       else
00555   apply_transpose_helper (op, 0, ncores()-1, Q_view, C_view, 
00556         factor_output, contiguous_cache_blocks);
00557     }
00558 
00559 
00560     template< class LocalOrdinal, class Scalar >
00561     void
00562     TbbRecursiveTsqr< LocalOrdinal, Scalar >::
00563     explicit_Q (const LocalOrdinal nrows,
00564     const LocalOrdinal ncols_Q_in,
00565     const Scalar Q_in[],
00566     const LocalOrdinal ldq_in,
00567     const LocalOrdinal ncols_Q_out,
00568     Scalar Q_out[],
00569     const LocalOrdinal ldq_out,
00570     const typename TbbRecursiveTsqr< LocalOrdinal, Scalar >::FactorOutput& factor_output,
00571     const bool contiguous_cache_blocks) const
00572     {
00573       if (ncols_Q_out != ncols_Q_in)
00574   throw std::logic_error("FIXME Currently, explicit_Q() only works for ncols_Q_out == ncols_Q_in");
00575 
00576       const_mat_view Q_in_view (nrows, ncols_Q_in, Q_in, ldq_in);
00577       mat_view Q_out_view (nrows, ncols_Q_out, Q_out, ldq_out);
00578 
00579       explicit_Q_helper (0, ncores()-1, Q_out_view, contiguous_cache_blocks);
00580       apply ("N", nrows, ncols_Q_out, Q_out, ldq_out, ncols_Q_in, 
00581        Q_in, ldq_in, factor_output, contiguous_cache_blocks);
00582     }
00583 
00584   } // namespace TBB
00585 } // namespace TSQR
00586 
00587 
00588 #endif // __TSQR_TBB_TbbRecursiveTsqr_Def_hpp
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends