RTOpPack_TOpSetSubVector.hpp

00001 // @HEADER
00002 // ***********************************************************************
00003 // 
00004 //      Thyra: Interfaces and Support Code for the Interoperability of Abstract Numerical Algorithms 
00005 //                 Copyright (2004) Sandia Corporation
00006 // 
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 // 
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //  
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //  
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00025 // 
00026 // ***********************************************************************
00027 // @HEADER
00028 
00029 #ifndef RTOPPACK_TOP_SET_SUB_VECTOR_HPP
00030 #define RTOPPACK_TOP_SET_SUB_VECTOR_HPP
00031 
00032 #include "RTOpPack_RTOpTHelpers.hpp"
00033 
00034 namespace RTOpPack {
00035 
00041 template<class Scalar>
00042 class TOpSetSubVector : public RTOpT<Scalar> {
00043 public:
00044 
00046   typedef typename RTOpT<Scalar>::primitive_value_type primitive_value_type;
00047 
00049   TOpSetSubVector( const SparseSubVectorT<Scalar> &sub_vec );
00050 
00052   void set_sub_vec( const SparseSubVectorT<Scalar> &sub_vec );
00053 
00056 
00058   void get_op_type_num_entries(
00059     int*  num_values
00060     ,int* num_indexes
00061     ,int* num_chars
00062     ) const;
00064   void extract_op_state(
00065     int                             num_values
00066     ,primitive_value_type           value_data[]
00067     ,int                            num_indexes
00068     ,index_type                     index_data[]
00069     ,int                            num_chars
00070     ,char_type                      char_data[]
00071     ) const;
00073   void load_op_state(
00074     int                           num_values
00075     ,const primitive_value_type   value_data[]
00076     ,int                          num_indexes
00077     ,const index_type             index_data[]
00078     ,int                          num_chars
00079     ,const char_type              char_data[]
00080     );
00082   bool coord_invariant() const;
00084   void apply_op(
00085     const int   num_vecs,       const SubVectorT<Scalar>         sub_vecs[]
00086     ,const int  num_targ_vecs,  const MutableSubVectorT<Scalar>  targ_sub_vecs[]
00087     ,ReductTarget *reduct_obj
00088     ) const;
00089 
00091 
00092 private:
00093 
00094   // ////////////////////////////
00095   // Private types
00096 
00097   enum { num_sub_vec_members = 6 };
00098 
00099   // ////////////////////////////
00100   // Private data members
00101 
00102   SparseSubVectorT<Scalar>  sub_vec_;  // We do not own memory it its arrays!
00103   bool                      ownsMem_;
00104 
00105 }; // class TOpSetSubVector
00106 
00107 // ////////////////////////////////
00108 // Template definitions
00109 
00110 template<class Scalar>
00111 TOpSetSubVector<Scalar>::TOpSetSubVector( const SparseSubVectorT<Scalar> &sub_vec )
00112   :RTOpT<Scalar>("TOpSetSubVector"), sub_vec_(sub_vec),ownsMem_(false)
00113 {}
00114 
00115 template<class Scalar>
00116 void TOpSetSubVector<Scalar>::set_sub_vec( const SparseSubVectorT<Scalar> &sub_vec )
00117 {
00118   if( ownsMem_ ) {
00119     if(sub_vec_.values())  delete [] const_cast<Scalar*>(sub_vec_.values());
00120     if(sub_vec_.indices()) delete [] const_cast<index_type*>(sub_vec_.indices());
00121   }
00122   sub_vec_ = sub_vec;
00123   ownsMem_ = false;
00124 }
00125 
00126 // Overridden from RTOpT
00127 
00128 template<class Scalar>
00129 void TOpSetSubVector<Scalar>::get_op_type_num_entries(
00130   int*  num_values
00131   ,int* num_indexes
00132   ,int* num_chars
00133   ) const
00134 {
00135   typedef Teuchos::PrimitiveTypeTraits<Scalar> PTT;
00136   TEST_FOR_EXCEPT( !num_values || !num_indexes || !num_chars ); 
00137   const int num_prim_objs_per_scalar = PTT::numPrimitiveObjs();
00138   *num_values = num_prim_objs_per_scalar*sub_vec_.subNz();  // values[]
00139   *num_indexes =
00140     num_sub_vec_members // globalOffset,subDim,subNz,localOffset,isSorted,ownsMem
00141     + (sub_vec_.indices() ? sub_vec_.subNz() : 0 ); // indices[]
00142   *num_chars   = 0;
00143 }
00144 
00145 template<class Scalar>
00146 void TOpSetSubVector<Scalar>::extract_op_state(
00147   int                             num_values
00148   ,primitive_value_type           value_data[]
00149   ,int                            num_indexes
00150   ,index_type                     index_data[]
00151   ,int                            num_chars
00152   ,char_type                      char_data[]
00153   ) const
00154 {
00155   typedef Teuchos::PrimitiveTypeTraits<Scalar> PTT;
00156   register index_type k, j;
00157 #ifdef RTOp_DEBUG
00158   TEST_FOR_EXCEPT(!(num_values==sub_vec_.subNz()));
00159   TEST_FOR_EXCEPT(!(num_indexes==num_sub_vec_members+(sub_vec_.indices()?sub_vec_.subNz():0)));
00160   TEST_FOR_EXCEPT(!num_chars==0);
00161 #endif
00162   const int num_prim_objs_per_scalar = PTT::numPrimitiveObjs();
00163   index_type vd_off = 0;
00164   for( k = 0; k < sub_vec_.subNz(); ++k, vd_off += num_prim_objs_per_scalar ) {
00165     PTT::extractPrimitiveObjs(
00166       *(sub_vec_.values() + k*sub_vec_.valuesStride())
00167       ,num_prim_objs_per_scalar, value_data+vd_off
00168       );
00169   }
00170   index_data[k=0] = sub_vec_.globalOffset();
00171   index_data[++k] = sub_vec_.subDim();
00172   index_data[++k] = sub_vec_.subNz();
00173   index_data[++k] = sub_vec_.localOffset();
00174   index_data[++k] = sub_vec_.isSorted();
00175   index_data[++k] = ownsMem_ ? 1 : 0;
00176   if( sub_vec_.indices() ) {
00177     for( j = 0; j < sub_vec_.subNz(); ++j )
00178       index_data[++k] = *(sub_vec_.indices() + j*sub_vec_.indicesStride());
00179   }
00180 }
00181 
00182 template<class Scalar>
00183 void TOpSetSubVector<Scalar>::load_op_state(
00184   int                           num_values
00185   ,const primitive_value_type   value_data[]
00186   ,int                          num_indexes
00187   ,const index_type             index_data[]
00188   ,int                          num_chars
00189   ,const char_type              char_data[]
00190   )
00191 {
00192   typedef Teuchos::PrimitiveTypeTraits<Scalar> PTT;
00193 #ifdef _DEBUG
00194   TEST_FOR_EXCEPT( num_chars!=0 );
00195   TEST_FOR_EXCEPT( !value_data );
00196   TEST_FOR_EXCEPT( !index_data );
00197 #endif
00198   const int num_prim_objs_per_scalar = PTT::numPrimitiveObjs();
00199   index_type k, j;
00200   index_type Nz            = num_values / num_prim_objs_per_scalar;
00201   Scalar     *scalars      = const_cast<Scalar*>(sub_vec_.values());
00202   ptrdiff_t  scalarsStride = sub_vec_.valuesStride();
00203   index_type *indices      = const_cast<index_type*>(sub_vec_.indices());
00204   ptrdiff_t  indicesStride = sub_vec_.indicesStride();
00205   // Reallocate storage if we have to
00206   if( Nz != sub_vec_.subNz() || (indices==NULL && num_indexes > num_sub_vec_members ) ) {
00207     // The current sub_vec_ does not have storage setup to hold the incoming subvector.
00208     // Delete current storage if owned.
00209     if( ownsMem_ ) {
00210       if(scalars) delete [] scalars;
00211       if(indices) delete [] indices;
00212     }
00213     // We need to reallocate storage for scalars[] and perhaps indices[]
00214     scalars = new Scalar[Nz];
00215     if( num_indexes > num_sub_vec_members )
00216       indices = new index_type[Nz];
00217     ownsMem_ = true;
00218     scalarsStride = 1;
00219     indicesStride = 1;
00220   }
00221   else {
00222     // The storage in sub_vec_ is already correct to hold the incoming subvector
00223   }
00224   // Set the internal sub_vec
00225   index_type v_off = 0;
00226   for( k = 0; k < Nz; ++k, v_off += num_prim_objs_per_scalar )
00227     PTT::loadPrimitiveObjs( num_prim_objs_per_scalar, value_data+v_off, scalars+k*scalarsStride );
00228   const index_type globalOffset  = index_data[k=0];
00229   const index_type subDim        = index_data[++k];
00230   const index_type subNz         = index_data[++k];
00231   const ptrdiff_t  localOffset   = index_data[++k];
00232   const int        isSorted      = index_data[++k];
00233   ownsMem_                       = ( index_data[++k]==1 ? true : false );
00234   if( num_indexes > num_sub_vec_members ) {
00235     for( j = 0; j < num_values; ++j )
00236       *(indices+j*indicesStride) = index_data[++k];
00237   }
00238   TEST_FOR_EXCEPT( subNz != Nz );
00239   sub_vec_.initialize(globalOffset,subDim,subNz,scalars,scalarsStride,indices,indicesStride,localOffset,isSorted);
00240 }
00241 
00242 template<class Scalar>
00243 bool TOpSetSubVector<Scalar>::coord_invariant() const
00244 {
00245   return false;
00246 }
00247 
00248 template<class Scalar>
00249 void TOpSetSubVector<Scalar>::apply_op(
00250   const int   num_vecs,       const SubVectorT<Scalar>         sub_vecs[]
00251   ,const int  num_targ_vecs,  const MutableSubVectorT<Scalar>  targ_sub_vecs[]
00252   ,ReductTarget *reduct_obj
00253   ) const
00254 {
00255   // Get the local vector chunk data
00256   RTOP_APPLY_OP_0_1(num_vecs,sub_vecs,num_targ_vecs,targ_sub_vecs);
00257   const ptrdiff_t  z_global_offset = targ_sub_vecs[0].globalOffset();
00258   const index_type z_sub_dim       = subDim; // From macro above
00259   Scalar           *z_val          = z0_val; // From macro above
00260   const ptrdiff_t  z_val_s         = z0_s;   // From macro above
00261   // Get the sparse subvector data
00262   const index_type v_global_offset  = sub_vec_.globalOffset();
00263   const index_type v_sub_dim        = sub_vec_.subDim();
00264   const index_type v_sub_nz         = sub_vec_.subNz();
00265   const Scalar     *v_val           = sub_vec_.values();
00266   const ptrdiff_t  v_val_s          = sub_vec_.valuesStride();
00267   const index_type *v_ind           = sub_vec_.indices();
00268   const ptrdiff_t  v_ind_s          = sub_vec_.indicesStride();
00269   const ptrdiff_t  v_l_off          = sub_vec_.localOffset();
00270   //const bool       v_sorted         = sub_vec_.isSorted();
00271   
00272   //
00273   // Set part of the sub-vector v for this chunk z.
00274   //
00275 
00276   if( v_global_offset + v_sub_dim < z_global_offset + 1
00277     || z_global_offset + z_sub_dim < v_global_offset + 1 )
00278     return; // The sub-vector that we are setting does not overlap with this vector chunk!
00279 
00280   if( v_sub_nz == 0 )
00281     return; // The sparse sub-vector we are reading from is empty?
00282 
00283   // Get the number of elements that overlap
00284   index_type num_overlap;
00285   if( v_global_offset <= z_global_offset ) {
00286     if( v_global_offset + v_sub_dim >= z_global_offset + z_sub_dim )
00287       num_overlap = z_sub_dim;
00288     else
00289       num_overlap = (v_global_offset + v_sub_dim) - z_global_offset;
00290   }
00291   else {
00292     if( z_global_offset + z_sub_dim >= v_global_offset + v_sub_dim )
00293       num_overlap = v_sub_dim;
00294     else
00295       num_overlap = (z_global_offset + z_sub_dim) - v_global_offset;
00296   }
00297   
00298   // Set the part of the sub-vector that overlaps
00299   if( v_ind != NULL ) {
00300     // Sparse elements
00301     // Set the overlapping elements to zero first.
00302     if( v_global_offset >= z_global_offset )
00303       z_val += (v_global_offset - z_global_offset) * z_val_s;
00304     for( index_type k = 0; k < num_overlap; ++k, z_val += z_val_s )
00305       *z_val = 0.0;
00306     // Now set the sparse entries
00307     z_val = targ_sub_vecs[0].values();
00308     for( index_type k = 0; k < v_sub_nz; ++k, v_val += v_val_s, v_ind += v_ind_s ) {
00309       const index_type i = v_global_offset + v_l_off + (*v_ind);
00310       if( z_global_offset < i && i <= z_global_offset + z_sub_dim )
00311         z_val[ z_val_s * (i - z_global_offset - 1) ] = *v_val;
00312     }
00313     // ToDo: Implement a faster version for v sorted and eliminate the
00314     // if statement in the above loop.
00315   }
00316   else {
00317     // Dense elements
00318     if( v_global_offset <= z_global_offset )
00319       v_val += (z_global_offset - v_global_offset) * v_val_s;
00320     else
00321       z_val += (v_global_offset - z_global_offset) * z_val_s;
00322     for( index_type k = 0; k < num_overlap; ++k, v_val += v_val_s, z_val += z_val_s )
00323       *z_val = *v_val;
00324   }
00325 }
00326 
00327 } // namespace RTOpPack
00328 
00329 #endif // RTOPPACK_TOP_SET_SUB_VECTOR_HPP

Generated on Thu Sep 18 12:39:44 2008 for RTOp : Vector Reduction/Transformation Operators by doxygen 1.3.9.1