RTOpPack_MPI_apply_op.hpp

Go to the documentation of this file.
00001 // @HEADER
00002 // ***********************************************************************
00003 // 
00004 // RTOp: Interfaces and Support Software for Vector Reduction Transformation
00005 //       Operations
00006 //                Copyright (2006) Sandia Corporation
00007 // 
00008 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00009 // license for use of this work by or on behalf of the U.S. Government.
00010 // 
00011 // This library is free software; you can redistribute it and/or modify
00012 // it under the terms of the GNU Lesser General Public License as
00013 // published by the Free Software Foundation; either version 2.1 of the
00014 // License, or (at your option) any later version.
00015 //  
00016 // This library is distributed in the hope that it will be useful, but
00017 // WITHOUT ANY WARRANTY; without even the implied warranty of
00018 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00019 // Lesser General Public License for more details.
00020 //  
00021 // You should have received a copy of the GNU Lesser General Public
00022 // License along with this library; if not, write to the Free Software
00023 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00024 // USA
00025 // Questions? Contact Roscoe A. Bartlett (rabartl@sandia.gov) 
00026 // 
00027 // ***********************************************************************
00028 // @HEADER
00029 
00030 #ifndef RTOPPACK_MPI_APPLY_OP_HPP
00031 #define RTOPPACK_MPI_APPLY_OP_HPP
00032 
00033 #include "RTOpPack_MPI_apply_op_decl.hpp"
00034 #include "Teuchos_RawMPITraits.hpp"
00035 #include "Teuchos_Workspace.hpp"
00036 #ifdef RTOp_USE_MPI
00037 //#  include "Teuchos_MPIReductionOpBase.hpp"
00038 #  include "Teuchos_MpiReductionOpSetter.hpp"
00039 #endif
00040 
00041 #ifdef RTOPPACK_MPI_APPLY_OP_DUMP
00042 #  include "Teuchos_VerboseObject.hpp"
00043 #endif // RTOPPACK_MPI_APPLY_OP_DUMP
00044 
00045 namespace RTOpPack {
00046 
00047 #ifdef RTOPPACK_MPI_APPLY_OP_DUMP
00048 template<class Scalar>
00049 void print( const ConstSubVectorView<Scalar> &v, Teuchos::FancyOStream &out_arg )
00050 {
00051   Teuchos::RefCountPtr<Teuchos::FancyOStream> out = Teuchos::rcp(&out_arg,false);
00052   Teuchos::OSTab tab(out);
00053   *out << "globalOffset="<<v.globalOffset()<<"\n";
00054   *out << "subDim="<<v.subDim()<<"\n";
00055   *out << "values:\n";
00056   tab.incrTab();
00057   for( int i = 0; i < v.subDim(); ++i )
00058     *out << " " << v(i) << ":" << (v.globalOffset()+i);
00059   *out << "\n";
00060 }
00061 
00062 #  include "Teuchos_VerboseObject.hpp"
00063 #endif // RTOPPACK_MPI_APPLY_OP_DUMP
00064 
00065 #ifdef RTOp_USE_MPI
00066 
00067 template<class Scalar>
00068 class MpiReductionOp : public Teuchos::MpiReductionOpBase {
00069 public:
00070 
00071   MpiReductionOp(
00072     const Teuchos::RefCountPtr<const RTOpT<Scalar> >  &op
00073     )
00074     : op_(op)
00075     {
00076       *op; // Assert not null
00077     }
00078 
00079   void reduce(
00080     void              *invec
00081     ,void             *inoutvec
00082     ,int              *len
00083     ,RTOp_Datatype    *datatype
00084     ) const
00085     {
00086       typedef typename RTOpT<Scalar>::primitive_value_type  primitive_value_type;
00087       int num_reduct_type_values = 0, num_reduct_type_indexes = 0, num_reduct_type_chars = 0;
00088       op_->get_reduct_type_num_entries(
00089         &num_reduct_type_values, &num_reduct_type_indexes, &num_reduct_type_chars
00090         );
00091       const int reduct_obj_ext_size
00092         = RTOpPack::reduct_obj_ext_size<primitive_value_type>(
00093           num_reduct_type_values,num_reduct_type_indexes,num_reduct_type_chars
00094           );
00095       const char
00096         *in_reduct_obj_ext = reinterpret_cast<char*>(invec);
00097       char
00098         *inout_reduct_obj_ext = reinterpret_cast<char*>(inoutvec);
00099       Teuchos::RefCountPtr<ReductTarget>
00100         in_reduct_obj    = op_->reduct_obj_create(),
00101         inout_reduct_obj = op_->reduct_obj_create();
00102       for( int i = 0; i < (*len); ++i, in_reduct_obj_ext += reduct_obj_ext_size, inout_reduct_obj_ext += reduct_obj_ext_size  )
00103       {
00104         load_reduct_obj_ext_state( *op_, in_reduct_obj_ext,    &*in_reduct_obj    );
00105         load_reduct_obj_ext_state( *op_, inout_reduct_obj_ext, &*inout_reduct_obj );
00106         op_->reduce_reduct_objs( *in_reduct_obj, &*inout_reduct_obj );
00107         extract_reduct_obj_ext_state(
00108           *op_,*inout_reduct_obj,num_reduct_type_values,num_reduct_type_indexes,num_reduct_type_chars
00109           ,inout_reduct_obj_ext
00110           );
00111       }
00112     }
00113 private:
00114   Teuchos::RefCountPtr<const RTOpT<Scalar> >  op_;
00115   // Not defined and not to be called!
00116   MpiReductionOp();
00117   MpiReductionOp(const MpiReductionOp<Scalar>&);
00118   MpiReductionOp<Scalar>& operator=(const MpiReductionOp<Scalar>&);
00119 };
00120 
00121 
00122 #endif // RTOp_USE_MPI
00123 
00124 } // namespace RTOpPack
00125 
00126 //
00127 // Template implementation stuff
00128 //
00129 
00130 namespace {
00131 
00132 // This class is designed to call MPI functions to free
00133 // opaque objects.  This allows robust behavior in the
00134 // face of exceptions.
00135 template<class T>
00136 class call_free_func {
00137 public:
00138 //  typedef extern "C" int (*free_func_ptr_t)(T*);
00139     typedef int (*free_func_ptr_t)(T*);
00140   call_free_func(T* opaque_obj, T null_value, free_func_ptr_t free_func_ptr)
00141     : opaque_obj_(opaque_obj), null_value_(null_value), free_func_ptr_(free_func_ptr)
00142     {}
00143   ~call_free_func()
00144     {
00145       if(*opaque_obj_ != null_value_)
00146         free_func_ptr_(opaque_obj_);
00147     }
00148 private:
00149   T*                opaque_obj_;
00150   T                 null_value_;
00151   free_func_ptr_t   free_func_ptr_;
00152   // not defined and not to be called
00153   call_free_func();
00154   call_free_func(const call_free_func&);
00155   call_free_func& operator=(const call_free_func&);
00156 }; 
00157 
00158 } // end namespace
00159 
00160 template<class primitive_value_type>
00161 void RTOpPack::MPI_type_signature(
00162   const int num_values
00163   ,const int num_indexes
00164   ,const int num_chars
00165   ,int* num_entries
00166   ,int block_lengths[]
00167   ,MPI_Aint displacements[]
00168   ,MPI_Datatype datatypes[]
00169   )
00170 {
00171   int k = 0, off = 0;
00172   // values
00173   block_lengths[k] = 3 + num_values; /* must carry size information */
00174   displacements[k] = 0;
00175   datatypes[k]     = Teuchos::RawMPITraits<primitive_value_type>::type();
00176   ++k;
00177   off += Teuchos::RawMPITraits<primitive_value_type>::adjustCount(3 + num_values) * sizeof(primitive_value_type);
00178   // indexes
00179   if( num_indexes ) {
00180     block_lengths[k] = num_indexes;
00181     displacements[k] = off;
00182     datatypes[k]     = Teuchos::RawMPITraits<index_type>::type();
00183     ++k;
00184     off += num_indexes * sizeof(index_type);
00185   }
00186   // chars
00187   if( num_chars ) {
00188     block_lengths[k] = num_chars;
00189     displacements[k] = off;
00190     datatypes[k]     = Teuchos::RawMPITraits<char_type>::type();
00191     ++k;
00192   }
00193   *num_entries = k;
00194 }
00195 
00196 template<class primitive_value_type>
00197 int reduct_obj_ext_size(
00198   int   num_values
00199   ,int  num_indexes
00200   ,int  num_chars
00201   )
00202 {
00203   TEST_FOR_EXCEPT(true);
00204   return 0;
00205 }
00206 
00207 template<class Scalar>
00208 void RTOpPack::extract_reduct_obj_ext_state(
00209   const RTOpT<Scalar>    &op
00210   ,const ReductTarget    &reduct_obj
00211   ,int                   num_values
00212   ,int                   num_indexes
00213   ,int                   num_chars
00214   ,void                  *_reduct_obj_ext
00215   )
00216 {
00217   typedef typename RTOpT<Scalar>::primitive_value_type primitive_value_type;
00218   char *reduct_obj_ext = reinterpret_cast<char*>(_reduct_obj_ext);
00219   const int
00220     num_values_off  = 0,
00221     num_indexes_off = num_values_off  + sizeof(primitive_value_type),
00222     num_chars_off   = num_indexes_off + sizeof(primitive_value_type),
00223     values_off      = num_chars_off   + sizeof(primitive_value_type),
00224     indexes_off     = values_off      + num_values  * sizeof(primitive_value_type),
00225     chars_off       = indexes_off     + num_indexes * sizeof(index_type);
00226   *reinterpret_cast<primitive_value_type*>(reduct_obj_ext + num_values_off)  = static_cast<primitive_value_type>(num_values);
00227   *reinterpret_cast<primitive_value_type*>(reduct_obj_ext + num_indexes_off) = static_cast<primitive_value_type>(num_indexes);
00228   *reinterpret_cast<primitive_value_type*>(reduct_obj_ext + num_chars_off)   = static_cast<primitive_value_type>(num_chars);
00229   op.extract_reduct_obj_state(
00230     reduct_obj
00231     ,num_values,  num_values  ? reinterpret_cast<primitive_value_type*>(reduct_obj_ext + values_off) : NULL
00232      ,num_indexes, num_indexes ? reinterpret_cast<index_type*>(reduct_obj_ext + indexes_off)          : NULL
00233     ,num_chars,   num_chars   ? reinterpret_cast<char_type*>(reduct_obj_ext  + chars_off)            : NULL
00234     );
00235 }
00236 
00237 template<class Scalar>
00238 void RTOpPack::load_reduct_obj_ext_state(
00239   const RTOpT<Scalar>    &op
00240   ,const void            *_reduct_obj_ext
00241   ,ReductTarget          *reduct_obj
00242   )
00243 {
00244   typedef typename RTOpT<Scalar>::primitive_value_type primitive_value_type;
00245   const char *reduct_obj_ext = reinterpret_cast<const char*>(_reduct_obj_ext);
00246   const int
00247     num_values_off  = 0,
00248     num_values      =                   static_cast<int>(*reinterpret_cast<const primitive_value_type*>(reduct_obj_ext + num_values_off)),
00249     num_indexes_off = num_values_off  + sizeof(primitive_value_type),
00250     num_indexes     =                   static_cast<int>(*reinterpret_cast<const primitive_value_type*>(reduct_obj_ext + num_indexes_off)),
00251     num_chars_off   = num_indexes_off + sizeof(primitive_value_type),
00252     num_chars       =                   static_cast<int>(*reinterpret_cast<const primitive_value_type*>(reduct_obj_ext + num_chars_off)),
00253     values_off      = num_chars_off + sizeof(primitive_value_type),
00254     indexes_off     = values_off  + num_values  * sizeof(primitive_value_type),
00255     chars_off       = indexes_off + num_indexes * sizeof(index_type);
00256   op.load_reduct_obj_state(
00257     num_values,   num_values  ? reinterpret_cast<const primitive_value_type*>(reduct_obj_ext + values_off) : NULL
00258      ,num_indexes, num_indexes ? reinterpret_cast<const index_type*>(reduct_obj_ext + indexes_off)          : NULL
00259     ,num_chars,   num_chars   ? reinterpret_cast<const char_type*>(reduct_obj_ext  + chars_off)            : NULL
00260     ,reduct_obj
00261     );
00262 }
00263 
00264 template<class Scalar>
00265 void RTOpPack::MPI_apply_op(
00266   MPI_Comm                                      comm
00267   ,const RTOpT<Scalar>                          &op
00268   ,const int                                    root_rank
00269   ,const int                                    num_vecs
00270   ,const RTOpPack::ConstSubVectorView<Scalar>   sub_vecs[]
00271   ,const int                                    num_targ_vecs
00272   ,const RTOpPack::SubVectorView<Scalar>        targ_sub_vecs[]
00273   ,ReductTarget                                 *reduct_obj
00274   )
00275 {
00276   ReductTarget* reduct_objs[] = { reduct_obj };
00277   MPI_apply_op(
00278     comm,op,root_rank,1,num_vecs,sub_vecs,num_targ_vecs,targ_sub_vecs
00279     ,reduct_obj ? reduct_objs : NULL
00280     );
00281 }
00282 
00284 template<class Scalar>
00285 void RTOpPack::MPI_apply_op(
00286   MPI_Comm                                           comm
00287   ,const RTOpT<Scalar>                               &op
00288   ,const int                                         root_rank
00289   ,const int                                         num_cols
00290   ,const int                                         num_multi_vecs
00291   ,const RTOpPack::ConstSubMultiVectorView<Scalar>   sub_multi_vecs[]
00292   ,const int                                         num_targ_multi_vecs
00293   ,const RTOpPack::SubMultiVectorView<Scalar>        targ_sub_multi_vecs[]
00294   ,RTOpPack::ReductTarget*const                      reduct_objs[]
00295   )
00296 {
00297   using Teuchos::Workspace;
00298   Teuchos::WorkspaceStore* wss = Teuchos::get_default_workspace_store().get();
00299   int k, j, off;
00300   Workspace<ConstSubVectorView<Scalar> > c_sub_vecs(wss,num_multi_vecs*num_cols);
00301   if(sub_multi_vecs) {
00302     for( off = 0, j = 0; j < num_cols; ++j ) {
00303       for( k = 0; k < num_multi_vecs; ++k ) {
00304         const ConstSubMultiVectorView<Scalar> &mv = sub_multi_vecs[k];
00305         c_sub_vecs[off++].initialize(mv.globalOffset(),mv.subDim(),&mv(0,j),1);
00306       }
00307     }
00308   }
00309   Workspace<SubVectorView<Scalar> > c_targ_sub_vecs(wss,num_targ_multi_vecs*num_cols);
00310   if(targ_sub_multi_vecs) {
00311     for( off = 0, j = 0; j < num_cols; ++j ) {
00312       for( k = 0; k < num_targ_multi_vecs; ++k ) {
00313         const SubMultiVectorView<Scalar> &mv = targ_sub_multi_vecs[k];
00314         c_targ_sub_vecs[off++].initialize(mv.globalOffset(),mv.subDim(),&mv(0,j),1);
00315       }
00316     }
00317   }
00318   MPI_apply_op(
00319     comm,op,root_rank,num_cols
00320     ,num_multi_vecs, num_multi_vecs && sub_multi_vecs ? &c_sub_vecs[0] : NULL
00321     ,num_targ_multi_vecs, num_targ_multi_vecs && targ_sub_multi_vecs ? &c_targ_sub_vecs[0] : NULL
00322     ,reduct_objs
00323     );
00324 }
00325 
00326 template<class Scalar>
00327 void RTOpPack::MPI_all_reduce(
00328   MPI_Comm                            comm
00329   ,const RTOpT<Scalar>                &op
00330   ,const int                          root_rank
00331   ,const int                          num_cols
00332   ,const ReductTarget*const           i_reduct_objs[]
00333   ,ReductTarget*const                 reduct_objs[]
00334   )
00335 {
00336   using Teuchos::Workspace;
00337   Teuchos::WorkspaceStore* wss = Teuchos::get_default_workspace_store().get();
00338   typedef typename RTOpT<Scalar>::primitive_value_type primitive_value_type;
00339   //
00340   // Get the description of the datatype of the target object.  We
00341   // need this in order to map it to an MPI user-defined data type so
00342   // that the reduction operations can be performed over all of the of
00343   // processors.
00344   //
00345   // Get the number of the entries in the array that describes target
00346   // type's datatypes
00347   int num_reduct_type_values = 0, num_reduct_type_indexes = 0,
00348     num_reduct_type_chars = 0,  num_reduct_type_entries = 0;
00349   op.get_reduct_type_num_entries(
00350     &num_reduct_type_values, &num_reduct_type_indexes, &num_reduct_type_chars
00351     );
00352   num_reduct_type_entries
00353     = (num_reduct_type_values ? 1 : 0)
00354     + (num_reduct_type_indexes ? 1 : 0)
00355     + (num_reduct_type_chars ? 1 : 0);
00356   //
00357   // Get the arrays that describe the reduction target type datatypes
00358   //
00359   if( num_reduct_type_values == 0 ) ++num_reduct_type_entries; // Add the block for the sizes
00360   int
00361     target_type_block_lengths[3];
00362   MPI_Aint
00363     target_type_displacements[3];
00364   RTOp_Datatype
00365     target_type_datatypes[3];
00366   MPI_type_signature<primitive_value_type>(
00367     num_reduct_type_values, num_reduct_type_indexes, num_reduct_type_chars
00368     ,&num_reduct_type_entries
00369     ,target_type_block_lengths, target_type_displacements, target_type_datatypes 
00370     );
00371   //
00372   // Translate this reduction target datatype description to an MPI datatype
00373   //
00374   MPI_Datatype
00375     mpi_reduct_ext_type = MPI_DATATYPE_NULL;
00376   TEST_FOR_EXCEPTION(
00377     0!=MPI_Type_struct(
00378       num_reduct_type_entries
00379       ,target_type_block_lengths, target_type_displacements
00380       ,target_type_datatypes, &mpi_reduct_ext_type
00381       )
00382     ,std::logic_error, "Error!"
00383     );
00384   call_free_func<MPI_Datatype>
00385     free_mpi_reduct_ext_type(&mpi_reduct_ext_type,MPI_DATATYPE_NULL,MPI_Type_free);
00386   TEST_FOR_EXCEPTION(
00387     0!=MPI_Type_commit( &mpi_reduct_ext_type )
00388     ,std::logic_error,"Error!"
00389     );
00390   //
00391   // Create external contiguous representations for the intermediate
00392   // reduction objects that MPI can use.  This is very low level but
00393   // at least the user does not have to do it.
00394   //
00395   const int reduct_obj_ext_size
00396     = RTOpPack::reduct_obj_ext_size<primitive_value_type>(
00397       num_reduct_type_values,num_reduct_type_indexes,num_reduct_type_chars
00398       );
00399   Workspace<char> // This is raw memory for external intermediate reduction object.
00400     i_reduct_objs_ext( wss, reduct_obj_ext_size*num_cols, false );
00401   for( int kc = 0; kc < num_cols; ++kc ) {
00402     extract_reduct_obj_ext_state(
00403       op,*i_reduct_objs[kc],num_reduct_type_values,num_reduct_type_indexes,num_reduct_type_chars
00404       ,&i_reduct_objs_ext[0]+kc*reduct_obj_ext_size
00405       );
00406   }
00407   //
00408   // Now perform the global reduction over all of the processors.
00409   //
00410   if( root_rank >= 0 ) {
00411     TEST_FOR_EXCEPTION( true, std::logic_error, "Error, we don't support root_rank >= 0 yet!" );
00412   }
00413   else {
00414     // Apply the reduction operation over all of the processors
00415     // and reduce to one target object on each processor
00416     //
00417     // Create output argument for MPI call (must be different from input)
00418     //
00419     Workspace<char> // This is raw memory for external intermediate reduction object.
00420       i_reduct_objs_tmp( wss, reduct_obj_ext_size*num_cols, false );
00421     Teuchos::RefCountPtr<ReductTarget> i_reduct_obj_tmp_uninit = op.reduct_obj_create(); // Just an unreduced object!
00422     for( int kc = 0; kc < num_cols; ++kc ) {
00423       extract_reduct_obj_ext_state( // Initialize to no initial reduction
00424         op,*i_reduct_obj_tmp_uninit,num_reduct_type_values,num_reduct_type_indexes,num_reduct_type_chars
00425         ,&i_reduct_objs_tmp[0]+kc*reduct_obj_ext_size
00426         );
00427     }
00428 #ifdef RTOp_USE_MPI
00429     //
00430     // Setup the mpi-compatible global function object for this
00431     // reduction operator and create the MPI_Op object.
00432     //
00433     Teuchos::MpiReductionOpSetter
00434       mpi_op_setter( Teuchos::rcp(new MpiReductionOp<Scalar>(Teuchos::rcp(&op,false))) ); // Destructor will release everything!
00435     //
00436     // Call MPI
00437     //
00438     TEST_FOR_EXCEPT(
00439       0!=MPI_Allreduce(
00440         &i_reduct_objs_ext[0], &i_reduct_objs_tmp[0]
00441         ,num_cols, mpi_reduct_ext_type
00442         ,mpi_op_setter.mpi_op()
00443         ,comm
00444         )
00445       );
00446 #else // RTOp_USE_MPI
00447     for( int k = 0; k < reduct_obj_ext_size; ++k )
00448       i_reduct_objs_tmp[k] = i_reduct_objs_ext[k];
00449 #endif // RTOp_USE_MPI
00450     // Load the updated state of the reduction target object and reduce.
00451     if(1) {
00452       Teuchos::RefCountPtr<ReductTarget> tmp_reduct_obj = op.reduct_obj_create();
00453       for( int kc = 0; kc < num_cols; ++kc ) {
00454         load_reduct_obj_ext_state( op, &i_reduct_objs_tmp[0]+kc*reduct_obj_ext_size, &*tmp_reduct_obj );
00455         op.reduce_reduct_objs( *tmp_reduct_obj, reduct_objs[kc] );
00456       }
00457     }
00458   }
00459 }
00460 
00461 template<class Scalar>
00462 void RTOpPack::MPI_apply_op(
00463   MPI_Comm                                  comm
00464   ,const RTOpT<Scalar>                      &op
00465   ,const int                                root_rank
00466   ,const int                                num_cols
00467   ,const int                                num_vecs
00468   ,const ConstSubVectorView<Scalar>         sub_vecs[]
00469   ,const int                                num_targ_vecs
00470   ,const SubVectorView<Scalar>              sub_targ_vecs[]
00471   ,ReductTarget*const                       reduct_objs[]
00472   )
00473 {
00474 #ifdef RTOPPACK_MPI_APPLY_OP_DUMP
00475   Teuchos::RefCountPtr<Teuchos::FancyOStream>
00476     out = Teuchos::VerboseObjectBase::getDefaultOStream();
00477   Teuchos::OSTab tab(out);
00478   if(show_mpi_apply_op_dump) {
00479     *out << "\nEntering RTOpPack::MPI_apply_op(...) ...\n";
00480     *out
00481       << "\ncomm = " << comm
00482       << "\nop = " << op.description()
00483       << "\nroot_rank = " << root_rank
00484       << "\nnum_cols = " << num_cols
00485       << "\nnum_vecs = " << num_vecs
00486       << "\nnum_targ_vecs = " << num_targ_vecs
00487       << "\n";
00488     if( num_vecs && sub_vecs ) {
00489       *out << "\nInput vectors:\n";
00490       Teuchos::OSTab tab(out);
00491       for( int kc = 0; kc < num_cols; ++kc ) {
00492         for( int k = 0; k < num_vecs; ++k ) {
00493           *out << "\nvecs["<<kc<<","<<k<<"] =\n";
00494           print(sub_vecs[kc*num_vecs+k],*out);
00495         }
00496       }
00497     }
00498     if( num_targ_vecs && sub_targ_vecs ) {
00499       *out << "\nInput/output vectors *before* transforamtion:\n";
00500       Teuchos::OSTab tab(out);
00501       for( int kc = 0; kc < num_cols; ++kc ) {
00502         for( int k = 0; k < num_targ_vecs; ++k ) {
00503           *out << "\nvecs["<<kc<<","<<k<<"] =\n";
00504           print(sub_targ_vecs[kc*num_targ_vecs+k],*out);
00505         }
00506       }
00507     }
00508     if(reduct_objs) {
00509       *out << "\nInput/output reduction objects *before* reduction:\n";
00510       Teuchos::OSTab tab(out);
00511       for( int kc = 0; kc < num_cols; ++kc ) {
00512         *out
00513           << "\nreduct_objs["<<kc<<"] =\n"
00514           << describe(*reduct_objs[kc],Teuchos::VERB_EXTREME);
00515       }
00516     }
00517   }
00518 #endif // RTOPPACK_MPI_APPLY_OP_DUMP
00519   using Teuchos::Workspace;
00520   Teuchos::WorkspaceStore* wss = Teuchos::get_default_workspace_store().get();
00521   typedef typename RTOpT<Scalar>::primitive_value_type primitive_value_type;
00522   if( reduct_objs == NULL && sub_vecs == NULL && sub_targ_vecs == NULL ) {
00523     // This is a transformation operation with no data on this processor.
00524     // Therefore, we can just exist!
00525   }
00526   else {
00527     const int localSubDim =
00528       ( num_vecs
00529         ? ( sub_vecs ? sub_vecs[0].subDim() : 0 )
00530         : ( sub_targ_vecs ? sub_targ_vecs[0].subDim() : 0 )
00531         );
00532     // See if we need to do any global communication at all?
00533     if( comm == MPI_COMM_NULL || reduct_objs == NULL ) {
00534       if( ( sub_vecs || sub_targ_vecs ) && localSubDim ) {
00535         for( int kc = 0; kc < num_cols; ++kc ) {
00536           op.apply_op(
00537             num_vecs,sub_vecs+kc*num_vecs,num_targ_vecs,sub_targ_vecs+kc*num_targ_vecs
00538             ,reduct_objs ? reduct_objs[kc] : NULL
00539             );
00540         }
00541       }
00542     }
00543     else {
00544       // Check the preconditions for excluding empty target vectors.
00545       TEST_FOR_EXCEPTION(
00546         ( ( num_vecs && !sub_vecs) || ( num_targ_vecs && !sub_targ_vecs) ) && !( !sub_vecs && !sub_targ_vecs )
00547         ,std::logic_error
00548         ,"MPI_apply_op(...): Error, invalid arguments num_vecs = " << num_vecs
00549         << ", sub_vecs = " << sub_vecs << ", num_targ_vecs = " << num_targ_vecs
00550         << ", sub_targ_vecs = " << sub_targ_vecs
00551         );
00552       //
00553       // There is a non-null reduction target object and we are using
00554       // MPI so we need to reduce it across processors
00555       //
00556       // Allocate the intermediate target object and perform the
00557       // reduction for the vector elements on this processor.
00558       //
00559       Workspace<Teuchos::RefCountPtr<ReductTarget> >
00560         i_reduct_objs( wss, num_cols );
00561       for( int kc = 0; kc < num_cols; ++kc ) {
00562         i_reduct_objs[kc] = op.reduct_obj_create();
00563         if( ( sub_vecs || sub_targ_vecs ) && localSubDim ) {
00564           op.apply_op(
00565             num_vecs, sub_vecs+kc*num_vecs, num_targ_vecs, sub_targ_vecs+kc*num_targ_vecs
00566             ,&*i_reduct_objs[kc]
00567             );
00568         }
00569       }
00570       //
00571       // Reduce the local intermediate reduction objects into the global reduction objects
00572       //
00573       Workspace<const ReductTarget*>
00574         _i_reduct_objs( wss, num_cols );
00575       for( int kc = 0; kc < num_cols; ++kc )
00576         _i_reduct_objs[kc] = &*i_reduct_objs[kc];
00577       MPI_all_reduce(comm,op,root_rank,num_cols,&_i_reduct_objs[0],reduct_objs);
00578     }
00579   }
00580 #ifdef RTOPPACK_MPI_APPLY_OP_DUMP
00581   if(show_mpi_apply_op_dump) {
00582     if( num_targ_vecs && sub_targ_vecs ) {
00583       *out << "\nInput/output vectors *after* transforamtion:\n";
00584       Teuchos::OSTab tab(out);
00585       for( int kc = 0; kc < num_cols; ++kc ) {
00586         for( int k = 0; k < num_targ_vecs; ++k ) {
00587           *out << "\nvecs["<<kc<<","<<k<<"] =\n";
00588           print(sub_targ_vecs[kc*num_targ_vecs+k],*out);
00589         }
00590       }
00591     }
00592     if(reduct_objs) {
00593       *out << "\nInput/output reduction objects *after* reduction:\n";
00594       Teuchos::OSTab tab(out);
00595       for( int kc = 0; kc < num_cols; ++kc ) {
00596         *out
00597           << "\nreduct_objs["<<kc<<"] =\n"
00598           << describe(*reduct_objs[kc],Teuchos::VERB_EXTREME);
00599       }
00600     }
00601     *out << "\nLeaving RTOpPack::MPI_apply_op(...) ...\n";
00602   }
00603 #endif // RTOPPACK_MPI_APPLY_OP_DUMP
00604 }
00605 
00606 #endif // RTOPPACK_MPI_APPLY_OP_HPP

Generated on Thu Sep 18 12:30:42 2008 for RTOp Package Browser (Single Doxygen Collection) by doxygen 1.3.9.1