RTOpPack_TOpSetSubVector.hpp

Go to the documentation of this file.
00001 // @HEADER
00002 // ***********************************************************************
00003 // 
00004 // RTOp: Interfaces and Support Software for Vector Reduction Transformation
00005 //       Operations
00006 //                Copyright (2006) Sandia Corporation
00007 // 
00008 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00009 // license for use of this work by or on behalf of the U.S. Government.
00010 // 
00011 // This library is free software; you can redistribute it and/or modify
00012 // it under the terms of the GNU Lesser General Public License as
00013 // published by the Free Software Foundation; either version 2.1 of the
00014 // License, or (at your option) any later version.
00015 //  
00016 // This library is distributed in the hope that it will be useful, but
00017 // WITHOUT ANY WARRANTY; without even the implied warranty of
00018 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00019 // Lesser General Public License for more details.
00020 //  
00021 // You should have received a copy of the GNU Lesser General Public
00022 // License along with this library; if not, write to the Free Software
00023 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00024 // USA
00025 // Questions? Contact Roscoe A. Bartlett (rabartl@sandia.gov) 
00026 // 
00027 // ***********************************************************************
00028 // @HEADER
00029 
00030 #ifndef RTOPPACK_TOP_SET_SUB_VECTOR_HPP
00031 #define RTOPPACK_TOP_SET_SUB_VECTOR_HPP
00032 
00033 #include "RTOpPack_RTOpTHelpers.hpp"
00034 
00035 namespace RTOpPack {
00036 
00042 template<class Scalar>
00043 class TOpSetSubVector : public RTOpT<Scalar> {
00044 public:
00045 
00047   typedef typename RTOpT<Scalar>::primitive_value_type primitive_value_type;
00048 
00050   TOpSetSubVector( const SparseSubVectorT<Scalar> &sub_vec );
00051 
00053   void set_sub_vec( const SparseSubVectorT<Scalar> &sub_vec );
00054 
00057 
00059   void get_op_type_num_entries(
00060     int*  num_values
00061     ,int* num_indexes
00062     ,int* num_chars
00063     ) const;
00065   void extract_op_state(
00066     int                             num_values
00067     ,primitive_value_type           value_data[]
00068     ,int                            num_indexes
00069     ,index_type                     index_data[]
00070     ,int                            num_chars
00071     ,char_type                      char_data[]
00072     ) const;
00074   void load_op_state(
00075     int                           num_values
00076     ,const primitive_value_type   value_data[]
00077     ,int                          num_indexes
00078     ,const index_type             index_data[]
00079     ,int                          num_chars
00080     ,const char_type              char_data[]
00081     );
00083   bool coord_invariant() const;
00085   void apply_op(
00086     const int   num_vecs,       const ConstSubVectorView<Scalar>         sub_vecs[]
00087     ,const int  num_targ_vecs,  const SubVectorView<Scalar>  targ_sub_vecs[]
00088     ,ReductTarget *reduct_obj
00089     ) const;
00090 
00092 
00093 private:
00094 
00095   // ////////////////////////////
00096   // Private types
00097 
00098   enum { num_sub_vec_members = 6 };
00099 
00100   // ////////////////////////////
00101   // Private data members
00102 
00103   SparseSubVectorT<Scalar>  sub_vec_;  // We do not own memory it its arrays!
00104   bool                      ownsMem_;
00105 
00106 }; // class TOpSetSubVector
00107 
00108 // ////////////////////////////////
00109 // Template definitions
00110 
00111 template<class Scalar>
00112 TOpSetSubVector<Scalar>::TOpSetSubVector( const SparseSubVectorT<Scalar> &sub_vec )
00113   :RTOpT<Scalar>("TOpSetSubVector"), sub_vec_(sub_vec),ownsMem_(false)
00114 {}
00115 
00116 template<class Scalar>
00117 void TOpSetSubVector<Scalar>::set_sub_vec( const SparseSubVectorT<Scalar> &sub_vec )
00118 {
00119   if( ownsMem_ ) {
00120     if(sub_vec_.values())  delete [] const_cast<Scalar*>(sub_vec_.values());
00121     if(sub_vec_.indices()) delete [] const_cast<index_type*>(sub_vec_.indices());
00122   }
00123   sub_vec_ = sub_vec;
00124   ownsMem_ = false;
00125 }
00126 
00127 // Overridden from RTOpT
00128 
00129 template<class Scalar>
00130 void TOpSetSubVector<Scalar>::get_op_type_num_entries(
00131   int*  num_values
00132   ,int* num_indexes
00133   ,int* num_chars
00134   ) const
00135 {
00136   typedef Teuchos::PrimitiveTypeTraits<Scalar> PTT;
00137   TEST_FOR_EXCEPT( !num_values || !num_indexes || !num_chars ); 
00138   const int num_prim_objs_per_scalar = PTT::numPrimitiveObjs();
00139   *num_values = num_prim_objs_per_scalar*sub_vec_.subNz();  // values[]
00140   *num_indexes =
00141     num_sub_vec_members // globalOffset,subDim,subNz,localOffset,isSorted,ownsMem
00142     + (sub_vec_.indices() ? sub_vec_.subNz() : 0 ); // indices[]
00143   *num_chars   = 0;
00144 }
00145 
00146 template<class Scalar>
00147 void TOpSetSubVector<Scalar>::extract_op_state(
00148   int                             num_values
00149   ,primitive_value_type           value_data[]
00150   ,int                            num_indexes
00151   ,index_type                     index_data[]
00152   ,int                            num_chars
00153   ,char_type                      char_data[]
00154   ) const
00155 {
00156   typedef Teuchos::PrimitiveTypeTraits<Scalar> PTT;
00157   register index_type k, j;
00158 #ifdef RTOp_DEBUG
00159   TEST_FOR_EXCEPT(!(num_values==sub_vec_.subNz()));
00160   TEST_FOR_EXCEPT(!(num_indexes==num_sub_vec_members+(sub_vec_.indices()?sub_vec_.subNz():0)));
00161   TEST_FOR_EXCEPT(!num_chars==0);
00162 #endif
00163   const int num_prim_objs_per_scalar = PTT::numPrimitiveObjs();
00164   index_type vd_off = 0;
00165   for( k = 0; k < sub_vec_.subNz(); ++k, vd_off += num_prim_objs_per_scalar ) {
00166     PTT::extractPrimitiveObjs(
00167       *(sub_vec_.values() + k*sub_vec_.valuesStride())
00168       ,num_prim_objs_per_scalar, value_data+vd_off
00169       );
00170   }
00171   index_data[k=0] = sub_vec_.globalOffset();
00172   index_data[++k] = sub_vec_.subDim();
00173   index_data[++k] = sub_vec_.subNz();
00174   index_data[++k] = sub_vec_.localOffset();
00175   index_data[++k] = sub_vec_.isSorted();
00176   index_data[++k] = ownsMem_ ? 1 : 0;
00177   if( sub_vec_.indices() ) {
00178     for( j = 0; j < sub_vec_.subNz(); ++j )
00179       index_data[++k] = *(sub_vec_.indices() + j*sub_vec_.indicesStride());
00180   }
00181 }
00182 
00183 template<class Scalar>
00184 void TOpSetSubVector<Scalar>::load_op_state(
00185   int                           num_values
00186   ,const primitive_value_type   value_data[]
00187   ,int                          num_indexes
00188   ,const index_type             index_data[]
00189   ,int                          num_chars
00190   ,const char_type              char_data[]
00191   )
00192 {
00193   typedef Teuchos::PrimitiveTypeTraits<Scalar> PTT;
00194 #ifdef TEUCHOS_DEBUG
00195   TEST_FOR_EXCEPT( num_chars!=0 );
00196   TEST_FOR_EXCEPT( !value_data );
00197   TEST_FOR_EXCEPT( !index_data );
00198 #endif
00199   const int num_prim_objs_per_scalar = PTT::numPrimitiveObjs();
00200   index_type k, j;
00201   index_type Nz            = num_values / num_prim_objs_per_scalar;
00202   Scalar     *scalars      = const_cast<Scalar*>(sub_vec_.values());
00203   ptrdiff_t  scalarsStride = sub_vec_.valuesStride();
00204   index_type *indices      = const_cast<index_type*>(sub_vec_.indices());
00205   ptrdiff_t  indicesStride = sub_vec_.indicesStride();
00206   // Reallocate storage if we have to
00207   if( Nz != sub_vec_.subNz() || (indices==NULL && num_indexes > num_sub_vec_members ) ) {
00208     // The current sub_vec_ does not have storage setup to hold the incoming subvector.
00209     // Delete current storage if owned.
00210     if( ownsMem_ ) {
00211       if(scalars) delete [] scalars;
00212       if(indices) delete [] indices;
00213     }
00214     // We need to reallocate storage for scalars[] and perhaps indices[]
00215     scalars = new Scalar[Nz];
00216     if( num_indexes > num_sub_vec_members )
00217       indices = new index_type[Nz];
00218     ownsMem_ = true;
00219     scalarsStride = 1;
00220     indicesStride = 1;
00221   }
00222   else {
00223     // The storage in sub_vec_ is already correct to hold the incoming subvector
00224   }
00225   // Set the internal sub_vec
00226   index_type v_off = 0;
00227   for( k = 0; k < Nz; ++k, v_off += num_prim_objs_per_scalar )
00228     PTT::loadPrimitiveObjs( num_prim_objs_per_scalar, value_data+v_off, scalars+k*scalarsStride );
00229   const index_type globalOffset  = index_data[k=0];
00230   const index_type subDim        = index_data[++k];
00231   const index_type subNz         = index_data[++k];
00232   const ptrdiff_t  localOffset   = index_data[++k];
00233   const int        isSorted      = index_data[++k];
00234   ownsMem_                       = ( index_data[++k]==1 ? true : false );
00235   if( num_indexes > num_sub_vec_members ) {
00236     for( j = 0; j < num_values; ++j )
00237       *(indices+j*indicesStride) = index_data[++k];
00238   }
00239   TEST_FOR_EXCEPT( subNz != Nz );
00240   sub_vec_.initialize(globalOffset,subDim,subNz,scalars,scalarsStride,indices,indicesStride,localOffset,isSorted);
00241 }
00242 
00243 template<class Scalar>
00244 bool TOpSetSubVector<Scalar>::coord_invariant() const
00245 {
00246   return false;
00247 }
00248 
00249 template<class Scalar>
00250 void TOpSetSubVector<Scalar>::apply_op(
00251   const int   num_vecs,       const ConstSubVectorView<Scalar>         sub_vecs[]
00252   ,const int  num_targ_vecs,  const SubVectorView<Scalar>  targ_sub_vecs[]
00253   ,ReductTarget *reduct_obj
00254   ) const
00255 {
00256   // Get the local vector chunk data
00257   RTOP_APPLY_OP_0_1(num_vecs,sub_vecs,num_targ_vecs,targ_sub_vecs);
00258   const ptrdiff_t  z_global_offset = targ_sub_vecs[0].globalOffset();
00259   const index_type z_sub_dim       = subDim; // From macro above
00260   Scalar           *z_val          = z0_val; // From macro above
00261   const ptrdiff_t  z_val_s         = z0_s;   // From macro above
00262   // Get the sparse subvector data
00263   const index_type v_global_offset  = sub_vec_.globalOffset();
00264   const index_type v_sub_dim        = sub_vec_.subDim();
00265   const index_type v_sub_nz         = sub_vec_.subNz();
00266   const Scalar     *v_val           = sub_vec_.values();
00267   const ptrdiff_t  v_val_s          = sub_vec_.valuesStride();
00268   const index_type *v_ind           = sub_vec_.indices();
00269   const ptrdiff_t  v_ind_s          = sub_vec_.indicesStride();
00270   const ptrdiff_t  v_l_off          = sub_vec_.localOffset();
00271   //const bool       v_sorted         = sub_vec_.isSorted();
00272   
00273   //
00274   // Set part of the sub-vector v for this chunk z.
00275   //
00276 
00277   if( v_global_offset + v_sub_dim < z_global_offset + 1
00278     || z_global_offset + z_sub_dim < v_global_offset + 1 )
00279     return; // The sub-vector that we are setting does not overlap with this vector chunk!
00280 
00281   if( v_sub_nz == 0 )
00282     return; // The sparse sub-vector we are reading from is empty?
00283 
00284   // Get the number of elements that overlap
00285   index_type num_overlap;
00286   if( v_global_offset <= z_global_offset ) {
00287     if( v_global_offset + v_sub_dim >= z_global_offset + z_sub_dim )
00288       num_overlap = z_sub_dim;
00289     else
00290       num_overlap = (v_global_offset + v_sub_dim) - z_global_offset;
00291   }
00292   else {
00293     if( z_global_offset + z_sub_dim >= v_global_offset + v_sub_dim )
00294       num_overlap = v_sub_dim;
00295     else
00296       num_overlap = (z_global_offset + z_sub_dim) - v_global_offset;
00297   }
00298   
00299   // Set the part of the sub-vector that overlaps
00300   if( v_ind != NULL ) {
00301     // Sparse elements
00302     // Set the overlapping elements to zero first.
00303     if( v_global_offset >= z_global_offset )
00304       z_val += (v_global_offset - z_global_offset) * z_val_s;
00305     for( index_type k = 0; k < num_overlap; ++k, z_val += z_val_s )
00306       *z_val = 0.0;
00307     // Now set the sparse entries
00308     z_val = targ_sub_vecs[0].values();
00309     for( index_type k = 0; k < v_sub_nz; ++k, v_val += v_val_s, v_ind += v_ind_s ) {
00310       const index_type i = v_global_offset + v_l_off + (*v_ind);
00311       if( z_global_offset < i && i <= z_global_offset + z_sub_dim )
00312         z_val[ z_val_s * (i - z_global_offset - 1) ] = *v_val;
00313     }
00314     // ToDo: Implement a faster version for v sorted and eliminate the
00315     // if statement in the above loop.
00316   }
00317   else {
00318     // Dense elements
00319     if( v_global_offset <= z_global_offset )
00320       v_val += (z_global_offset - v_global_offset) * v_val_s;
00321     else
00322       z_val += (v_global_offset - z_global_offset) * z_val_s;
00323     for( index_type k = 0; k < num_overlap; ++k, v_val += v_val_s, z_val += z_val_s )
00324       *z_val = *v_val;
00325   }
00326 }
00327 
00328 } // namespace RTOpPack
00329 
00330 #endif // RTOPPACK_TOP_SET_SUB_VECTOR_HPP

Generated on Thu Sep 18 12:30:43 2008 for RTOp Package Browser (Single Doxygen Collection) by doxygen 1.3.9.1