AbstractLinAlgPack_COOMatrixTmplOpDef.hpp

Go to the documentation of this file.
00001 // @HEADER
00002 // ***********************************************************************
00003 // 
00004 // Moocho: Multi-functional Object-Oriented arCHitecture for Optimization
00005 //                  Copyright (2003) Sandia Corporation
00006 // 
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 // 
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //  
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //  
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Roscoe A. Bartlett (rabartl@sandia.gov) 
00025 // 
00026 // ***********************************************************************
00027 // @HEADER
00028 
00029 #ifndef COO_MATRIX_TMPL_OP_DEF_H
00030 #define COO_MATRIX_TMPL_OP_DEF_H
00031 
00032 #include "AbstractLinAlgPack_COOMatrixTmplOpDecl.hpp"
00033 
00034 #include "DenseLinAlgPack_DMatrixClass.hpp"
00035 #include "DenseLinAlgPack_DVectorOp.hpp"
00036 #include "DenseLinAlgPack_AssertOp.hpp"
00037 
00038 namespace AbstractLinAlgPack {
00039 
00040 using BLAS_Cpp::trans_not;
00041 
00042 using DenseLinAlgPack::Mp_M_assert_sizes;
00043 using DenseLinAlgPack::Vp_MtV_assert_sizes;
00044 using DenseLinAlgPack::Mp_MtM_assert_sizes;
00045 
00046 // gms_lhs += alpha * coom_rhs (time = O(coom_rhs.nz()), space = O(1))
00047 template<class T_COOM>
00048 void Mp_StCOOM(DMatrixSlice* gms_lhs, value_type alpha, const T_COOM& coom_rhs
00049   , BLAS_Cpp::Transp trans_rhs)
00050 {
00051   Mp_M_assert_sizes(    gms_lhs->rows(), gms_lhs->cols(), BLAS_Cpp::no_trans
00052             , coom_rhs.rows(), coom_rhs.cols(), trans_rhs     );
00053   typename T_COOM::difference_type
00054     i_o = coom_rhs.row_offset(),
00055     j_o = coom_rhs.col_offset();
00056   if(trans_rhs == BLAS_Cpp::no_trans)
00057     for(typename T_COOM::const_iterator itr = coom_rhs.begin(); itr != coom_rhs.end(); ++itr)
00058       (*gms_lhs)(itr->row_i()+i_o,itr->col_j()+j_o) += alpha * itr->value();
00059   else
00060     for(typename T_COOM::const_iterator itr = coom_rhs.begin(); itr != coom_rhs.end(); ++itr)
00061       (*gms_lhs)(itr->col_j()+j_o,itr->row_i()+i_o) += alpha * itr->value();
00062 }
00063 
00064 // vs_lhs += alpha * coom_rhs1 * vs_rhs2 (BLAS xGEMV) (time = O(coom_rhs.nz()), space = O(1))
00065 template<class T_COOM>
00066 void Vp_StCOOMtV(DVectorSlice* vs_lhs, value_type alpha, const T_COOM& coom_rhs1
00067   , BLAS_Cpp::Transp trans_rhs1, const DVectorSlice& vs_rhs2)
00068 {
00069   Vp_MtV_assert_sizes( vs_lhs->dim(), coom_rhs1.rows(), coom_rhs1.cols(), trans_rhs1, vs_rhs2.dim() );
00070   typename T_COOM::difference_type
00071     i_o = coom_rhs1.row_offset(),
00072     j_o = coom_rhs1.col_offset();
00073   if(trans_rhs1 == BLAS_Cpp::no_trans)
00074     for(typename T_COOM::const_iterator itr = coom_rhs1.begin(); itr != coom_rhs1.end(); ++itr)
00075       (*vs_lhs)(itr->row_i()+i_o) += alpha * itr->value() * vs_rhs2(itr->col_j()+j_o);
00076   else
00077     for(typename T_COOM::const_iterator itr = coom_rhs1.begin(); itr != coom_rhs1.end(); ++itr)
00078       (*vs_lhs)(itr->col_j()+j_o) += alpha * itr->value() * vs_rhs2(itr->row_i()+i_o);
00079 }
00080 
00081 namespace UtilityPack {
00082 
00083 // op(gms_lhs) += alpha * op(gms_rhs1) * op(coom_rhs2) (BLAS xGEMM)
00084 template<class T_COOM>
00085 void imp_Mp_StMtCOOM(DMatrixSlice& gms_lhs, BLAS_Cpp::Transp trans_lhs, value_type alpha
00086   , const DMatrixSlice& gms_rhs1, BLAS_Cpp::Transp trans_rhs1
00087   , const T_COOM& coom_rhs2, BLAS_Cpp::Transp trans_rhs2 );
00088 
00089 } // end namespace UtilityPack
00090 
00091 
00092 // gms_lhs += alpha * op(coom_rhs1) * op(gms_rhs2) (right) (BLAS xGEMM)
00093 template<class T_COOM>
00094 void Mp_StCOOMtM(DMatrixSlice* gms_lhs, value_type alpha, const T_COOM& coom_rhs1
00095   , BLAS_Cpp::Transp trans_rhs1, const DMatrixSlice& gms_rhs2, BLAS_Cpp::Transp trans_rhs2)
00096 {
00097   Mp_MtM_assert_sizes(  gms_lhs->rows(), gms_lhs->cols(), BLAS_Cpp::no_trans
00098             , coom_rhs1.rows(), coom_rhs1.cols(), trans_rhs1
00099             , gms_rhs2.rows() ,gms_rhs2.cols(), trans_rhs2      );
00100   UtilityPack::imp_Mp_StMtCOOM( gms_lhs, BLAS_Cpp::trans, alpha, gms_rhs2
00101     , trans_not(trans_rhs2), coom_rhs1, trans_not(trans_rhs1)       );
00102 }
00103 
00104 // gms_lhs += alpha * op(gms_rhs1) * op(coom_rhs2) (left) (BLAS xGEMM)
00105 template<class T_COOM>
00106 void Mp_StMtCOOM(DMatrixSlice* gms_lhs, value_type alpha, const DMatrixSlice& gms_rhs1
00107   , BLAS_Cpp::Transp trans_rhs1, const T_COOM& coom_rhs2, BLAS_Cpp::Transp trans_rhs2)
00108 {
00109   Mp_MtM_assert_sizes(  gms_lhs->rows(), gms_lhs->cols(), BLAS_Cpp::no_trans
00110             , gms_rhs1.rows() ,gms_rhs1.cols(), trans_rhs1
00111             , coom_rhs2.rows(), coom_rhs2.cols(), trans_rhs2    );
00112   UtilityPack::imp_Mp_StMtCOOM( gms_lhs, BLAS_Cpp::no_trans, alpha, gms_rhs1
00113     , trans_rhs1, coom_rhs2, trans_rhs2                   );
00114 }
00115 
00116 // ToDo: implement the level-3 BLAS operations below
00117 
00118 // gms_lhs = alpha * op(coom_rhs1) * op(sym_rhs2) (right) (BLAS xSYMM)
00119 //template<class T_COOM>
00120 //void Mp_StCOOMtSM(DMatrixSlice* gms_lhs, value_type alpha, const T_COOM& coom_rhs1
00121 //  , BLAS_Cpp::Transp trans_rhs1, const DMatrixSliceSym& sym_rhs2, BLAS_Cpp::Transp trans_rhs2);
00122 
00123 // gms_lhs = alpha * op(sym_rhs1) * op(coom_rhs2) (left) (BLAS xSYMM)
00124 //template<class T_COOM>
00125 //void Mp_StSMtCOOM(DMatrixSlice* gms_lhs, value_type alpha, const DMatrixSliceSym& sym_rhs1
00126 //  , BLAS_Cpp::Transp trans_rhs1, const T_COOM& coom_rhs2, BLAS_Cpp::Transp trans_rhs2);
00127 
00128 // gms_lhs = alpha * op(coom_rhs1) * op(tri_rhs2) (right) (BLAS xTRMM)
00129 //template<class T_COOM>
00130 //void Mp_StCOOMtSM(DMatrixSlice* gms_lhs, value_type alpha, const T_COOM& coom_rhs1
00131 //  , BLAS_Cpp::Transp trans_rhs1, const DMatrixSliceTri& tri_rhs2, BLAS_Cpp::Transp trans_rhs2);
00132 
00133 // gms_lhs = alpha * op(tri_rhs1) * op(coom_rhs2) (left) (BLAS xTRMM)
00134 //template<class T_COOM>
00135 //void Mp_StSMtCOOM(DMatrixSlice* gms_lhs, value_type alpha, const DMatrixSliceTri& tri_rhs1
00136 //  , BLAS_Cpp::Transp trans_rhs1, const T_COOM& coom_rhs2, BLAS_Cpp::Transp trans_rhs2);
00137 
00138 namespace UtilityPack {
00139 
00140 // op(gms_lhs) += alpha * op(gms_rhs1) * op(coom_rhs2) (BLAS xGEMM)
00141 //
00142 // This function perform this operation by looping through the nonzero elements of
00143 // coom_rhs2 and for each nonzero element (val,i,j) it performs the following operation.
00144 //
00145 //  op(gms_lhs).col(j) += (alpha * val) * op(gms_rhs1).col(i)
00146 //
00147 //
00148 //      jth               ith           jth
00149 //    [ #     ]   [     #   ]     [       ]
00150 //    [ #     ]   [     #   ]     [       ]
00151 //  m [ #     ] +=  [     #   ] *   [       ] n
00152 //    [ #     ]   [     #   ]   ith [ val     ]
00153 //        n           p           [       ]
00154 //                                    p
00155 //      op(gms_lhs)       op(gms_rhs1)           op(coom_rhs2)
00156 //
00157 // The number of arithmetic operations performed are: 
00158 //  floats = (2*m + 1) * nz
00159 //
00160 // Strictly speeking the number of memory references is:
00161 //  mem_refs = (2*m + 1) * nz
00162 // but this does not take into account that elements are accessed by columns
00163 // and this has some ramifications on cache effects and paging.  If op(gms_lhs)
00164 // == gms_lhs' or op(gms_rhs1) == gms_rhs1' then elements in a column are
00165 // not adjacent to each other and if m is large enough each element may
00166 // even reside on a seperate page of memory.  On Win32 0x86 systems a page is
00167 // 4 K so 4,000 (bytes/page) / 8 (bytes/double) = 500 doubles / page.  If
00168 // the working set of pages is small this could cause some serious page thrashing
00169 // for large m.
00170 //
00171 // Another concideration is to sorting order of the elements in the COO matrix.
00172 // If op(coom_rhs2) is sorted by row then columns of op(gms_lhs) will be accessed
00173 // consecutivly and will result in better performance.  The same goes for op(gms_rhs1)
00174 // if op(coom_rhs2) is sorted by column.
00175 //
00176 // There is opertunity for some vectorization and it is handled by calling
00177 // DenseLinAlgPack::Vp_StV(...).
00178 //
00179 template<class T_COOM>
00180 void imp_Mp_StMtCOOM(DMatrixSlice* gms_lhs, BLAS_Cpp::Transp trans_lhs, value_type alpha
00181   , const DMatrixSlice& gms_rhs1, BLAS_Cpp::Transp trans_rhs1
00182   , const T_COOM& coom_rhs2, BLAS_Cpp::Transp trans_rhs2)
00183 {
00184   using BLAS_Cpp::rows;
00185   using BLAS_Cpp::cols;
00186   using DenseLinAlgPack::col;
00187 
00188   typename T_COOM::difference_type
00189     i_o = coom_rhs2.row_offset(),
00190     j_o = coom_rhs2.col_offset();
00191   for(typename T_COOM::const_iterator itr = coom_rhs2.begin(); itr != coom_rhs2.end(); ++itr) {
00192     size_type i = rows( itr->row_i() + i_o , itr->col_j() + j_o , trans_rhs2 ),
00193           j = cols( itr->row_i() + i_o , itr->col_j() + j_o , trans_rhs2 );
00194     //  op(gms_lhs).col(j) += (alpha * val) * op(gms_rhs1).col(i)
00195     DenseLinAlgPack::Vp_StV(  &col(*gms_lhs,trans_lhs,j), alpha * itr->value()
00196       , col(gms_rhs1,trans_rhs1,i) );
00197   }
00198 }
00199 
00200 } // end namespace UtilityPack
00201 
00202 } // end namespace AbstractLinAlgPack
00203 
00204 #endif  // COO_MATRIX_TMPL_OP_DEF_H

Generated on Tue Oct 20 12:51:42 2009 for MOOCHO (Single Doxygen Collection) by doxygen 1.4.7