Anasazi Version of the Day
Tsqr_CacheBlocker.hpp
00001 // @HEADER
00002 // ***********************************************************************
00003 //
00004 //                 Anasazi: Block Eigensolvers Package
00005 //                 Copyright (2010) Sandia Corporation
00006 //
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 //
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00025 //
00026 // ***********************************************************************
00027 // @HEADER
00028 
00029 #ifndef __TSQR_CacheBlocker_hpp
00030 #define __TSQR_CacheBlocker_hpp
00031 
00032 #include <Tsqr_CacheBlockingStrategy.hpp>
00033 #include <Tsqr_MatView.hpp>
00034 #include <Tsqr_Util.hpp>
00035 
00036 // #include <iostream>
00037 #include <sstream>
00038 #include <stdexcept>
00039 
00040 // #define CACHE_BLOCKER_DEBUG 1
00041 // #ifdef CACHE_BLOCKER_DEBUG
00042 // #  undef CACHE_BLOCKER_DEBUG
00043 // #endif // CACHE_BLOCKER_DEBUG
00044 #ifdef CACHE_BLOCKER_DEBUG
00045 #  include <iostream>
00046 using std::cerr;
00047 using std::endl;
00048 #endif // CACHE_BLOCKER_DEBUG
00049 
00052 
00053 namespace TSQR {
00054 
00063   template< class Ordinal, class Scalar > // class CBS=CacheBlockingStrategy< Ordinal, Scalar > 
00064   class CacheBlocker {
00065   private:
00066     typedef MatView< Ordinal, Scalar > mat_view;
00067     typedef ConstMatView< Ordinal, Scalar > const_mat_view;
00068 
00069     void
00070     validate () 
00071     {
00072       if (nrows_cache_block_ < ncols_)
00073   {
00074     std::ostringstream os;
00075     os << "Cache block is too small: only " << nrows_cache_block_
00076        << " rows fit, but need at least as many rows as columns (= "
00077        << ncols_ << ")";
00078     throw std::logic_error (os.str());
00079   }
00080     }
00081 
00082   public:
00090     CacheBlocker (const Ordinal num_rows,
00091       const Ordinal num_cols,
00092       const CacheBlockingStrategy< Ordinal, Scalar >& strategy) :
00093       nrows_ (num_rows), 
00094       ncols_ (num_cols), 
00095       strategy_ (strategy)
00096     {
00097 #ifdef CACHE_BLOCKER_DEBUG
00098       cerr << "CacheBlocker:" << endl
00099      << "# rows = " << nrows() << endl
00100      << "# cols = " << ncols() << endl
00101      << "Cache block size (bytes) = " << strategy_.cache_block_size()
00102      << endl << endl;
00103 #endif
00104       const Ordinal temp_nrows_cache_block = 
00105   strategy_.cache_block_num_rows (ncols());
00106 #ifdef CACHE_BLOCKER_DEBUG
00107       cerr << "Strategy says: # rows per cache block = "
00108      << temp_nrows_cache_block << endl;
00109 #endif
00110       nrows_cache_block_ = temp_nrows_cache_block;
00111       validate ();
00112     }
00113 
00115     CacheBlocker (const CacheBlocker& rhs) :
00116       nrows_ (rhs.nrows()), 
00117       ncols_ (rhs.ncols()), 
00118       strategy_ (rhs.strategy_)
00119     {}
00120 
00122     CacheBlocker& operator= (const CacheBlocker& rhs) {
00123       nrows_ = rhs.nrows();
00124       ncols_ = rhs.ncols();
00125       strategy_ = rhs.strategy_;
00126       return *this;
00127     }
00128 
00130     size_t cache_block_size () const { return strategy_.cache_block_size(); }
00131 
00133     Ordinal nrows () const { return nrows_; }
00134 
00136     Ordinal ncols () const { return ncols_; }
00137 
00145     template< class MatrixViewType >
00146     MatrixViewType
00147     split_top_block (MatrixViewType& A, const bool contiguous_cache_blocks) const
00148     {
00149       typedef typename MatrixViewType::ordinal_type ordinal_type;
00150       const ordinal_type nrows_top = 
00151   strategy_.top_block_split_nrows (A.nrows(), ncols(), 
00152            nrows_cache_block());
00153       // split_top() modifies A
00154       return A.split_top (nrows_top, contiguous_cache_blocks);
00155     }
00156 
00159     template< class MatrixViewType >
00160     MatrixViewType
00161     top_block (const MatrixViewType& A, const bool contiguous_cache_blocks) const
00162     {
00163       typedef typename MatrixViewType::ordinal_type ordinal_type;
00164       const ordinal_type nrows_top = 
00165   strategy_.top_block_split_nrows (A.nrows(), ncols(), 
00166            nrows_cache_block());
00167       MatrixViewType A_copy (A);
00168       return A_copy.split_top (nrows_top, contiguous_cache_blocks);
00169     }
00170 
00171     template< class MatrixViewType >
00172     MatrixViewType
00173     split_bottom_block (MatrixViewType& A, const bool contiguous_cache_blocks) const
00174     {
00175       typedef typename MatrixViewType::ordinal_type ordinal_type;
00176       const ordinal_type nrows_bottom = 
00177   strategy_.bottom_block_split_nrows (A.nrows(), ncols(), 
00178               nrows_cache_block());
00179       // split_bottom() modifies A
00180       return A.split_bottom (nrows_bottom, contiguous_cache_blocks);
00181     }
00182 
00191     template< class MatrixViewType >
00192     void
00193     fill_with_zeros (MatrixViewType A,
00194          const bool contiguous_cache_blocks) const
00195     {
00196       // Note: if the cache blocks are stored contiguously, A.lda()
00197       // won't be the correct leading dimension of A, but it won't
00198       // matter: we only ever operate on A_cur here, and A_cur's
00199       // leading dimension is set correctly by split_top_block().
00200       while (! A.empty())
00201   {
00202     // This call modifies A, but that's OK since we passed the
00203     // input view by copy, not by reference.
00204     MatrixViewType A_cur = split_top_block (A, contiguous_cache_blocks);
00205     A_cur.fill (Scalar(0));
00206   }
00207     }
00208 
00226     void
00227     cache_block (const Ordinal num_rows,
00228      const Ordinal num_cols,
00229      Scalar A_out[],
00230      const Scalar A_in[],
00231      const Ordinal lda_in) const
00232     {
00233       // We say "*_rest" because it points to the remaining part of
00234       // the matrix left to cache block; at the beginning, the
00235       // "remaining" part is the whole matrix, but that will change as
00236       // the algorithm progresses.
00237       const_mat_view A_in_rest (num_rows, num_cols, A_in, lda_in);
00238       // Leading dimension doesn't matter since A_out will be cache blocked.
00239       mat_view A_out_rest (num_rows, num_cols, A_out, lda_in);
00240 
00241       while (! A_in_rest.empty())
00242   {
00243     if (A_out_rest.empty())
00244       throw std::logic_error("A_out_rest is empty, but A_in_rest is not");
00245 
00246     // This call modifies A_in_rest.
00247     const_mat_view A_in_cur = split_top_block (A_in_rest, false);
00248 
00249     // This call modifies A_out_rest.
00250     mat_view A_out_cur = split_top_block (A_out_rest, true);
00251 
00252     copy_matrix (A_in_cur.nrows(), num_cols, A_out_cur.get(), 
00253            A_out_cur.lda(), A_in_cur.get(), A_in_cur.lda());
00254   }
00255     }
00256 
00259     void
00260     un_cache_block (const Ordinal num_rows,
00261         const Ordinal num_cols,
00262         Scalar A_out[],
00263         const Ordinal lda_out,        
00264         const Scalar A_in[]) const
00265     {
00266       // We say "*_rest" because it points to the remaining part of
00267       // the matrix left to cache block; at the beginning, the
00268       // "remaining" part is the whole matrix, but that will change as
00269       // the algorithm progresses.
00270       //
00271       // Leading dimension doesn't matter since A_in is cache blocked.
00272       const_mat_view A_in_rest (num_rows, num_cols, A_in, lda_out);
00273       mat_view A_out_rest (num_rows, num_cols, A_out, lda_out);
00274 
00275       while (! A_in_rest.empty())
00276   {
00277     if (A_out_rest.empty())
00278       throw std::logic_error("A_out_rest is empty, but A_in_rest is not");
00279 
00280     // This call modifies A_in_rest.
00281     const_mat_view A_in_cur = split_top_block (A_in_rest, true);
00282 
00283     // This call modifies A_out_rest.
00284     mat_view A_out_cur = split_top_block (A_out_rest, false);
00285 
00286     copy_matrix (A_in_cur.nrows(), num_cols, A_out_cur.get(), 
00287            A_out_cur.lda(), A_in_cur.get(), A_in_cur.lda());
00288   }
00289     }
00290 
00291 
00292     void
00293     fill_with_zeros (const Ordinal num_rows,
00294          const Ordinal num_cols,
00295          Scalar A[],
00296          const Ordinal lda, 
00297          const bool contiguous_cache_blocks)
00298     {
00299       // We say "A_rest" because it points to the remaining part of
00300       // the matrix left to process; at the beginning, the "remaining"
00301       // part is the whole matrix, but that will change as the
00302       // algorithm progresses.
00303       //
00304       // Note: if the cache blocks are stored contiguously, lda won't
00305       // be the correct leading dimension of A, but it won't matter:
00306       // we only ever operate on A_cur here, and A_cur's leading
00307       // dimension is set correctly by A_rest.split_top().
00308       mat_view A_rest (num_rows, num_cols, A, lda);
00309 
00310       while (! A_rest.empty())
00311   {
00312     // This call modifies A_rest.
00313     mat_view A_cur = split_top_block (A_rest, contiguous_cache_blocks);
00314     A_cur.fill (Scalar(0));
00315   }
00316     }
00317 
00318 
00319   private:
00320     Ordinal nrows_;
00321     Ordinal ncols_;
00322     CacheBlockingStrategy< Ordinal, Scalar > strategy_;
00323     Ordinal nrows_cache_block_;
00324 
00330     size_t nrows_cache_block () const { return nrows_cache_block_; }
00331   };
00332 
00333 } // namespace TSQR
00334 
00335 
00336 #endif // __TSQR_CacheBlocker_hpp
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends