Sierra Toolkit Version of the Day
ParallelReduce.hpp
00001 /*------------------------------------------------------------------------*/
00002 /*                 Copyright 2010 Sandia Corporation.                     */
00003 /*  Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive   */
00004 /*  license for use of this work by or on behalf of the U.S. Government.  */
00005 /*  Export of this program may require a license from the                 */
00006 /*  United States Government.                                             */
00007 /*------------------------------------------------------------------------*/
00008 
00009 #ifndef stk_util_parallel_ParallelReduce_hpp
00010 #define stk_util_parallel_ParallelReduce_hpp
00011 
00012 #include <cstddef>
00013 #include <iosfwd>
00014 #include <string>
00015 #include <stk_util/parallel/Parallel.hpp>
00016 #include <stk_util/util/SimpleArrayOps.hpp>
00017 
00018 //------------------------------------------------------------------------
00019 
00020 namespace stk {
00021 
00026 // REFACTOR: Replace ReduceSum with Sum?, etc...  Should be possible
00027 
00031 void all_write_string( ParallelMachine ,
00032                        std::ostream & ,
00033                        const std::string & );
00034 
00036 void all_reduce_sum( ParallelMachine ,
00037                      const double * local , double * global , unsigned count );
00038 
00040 void all_reduce_sum( ParallelMachine ,
00041                      const float * local , float * global , unsigned count );
00042 
00044 void all_reduce_sum( ParallelMachine ,
00045                      const int * local , int * global , unsigned count );
00046 
00048 void all_reduce_sum( ParallelMachine ,
00049                      const size_t * local , size_t * global , unsigned count );
00050 
00052 void all_reduce_bor( ParallelMachine ,
00053                      const unsigned * local ,
00054                      unsigned * global , unsigned count );
00055 
00074 template < class ReduceOp >
00075 void all_reduce( ParallelMachine , const ReduceOp & );
00076 
00079 }
00080 
00081 //----------------------------------------------------------------------
00082 //----------------------------------------------------------------------
00083 
00084 #ifndef DOXYGEN_COMPILE
00085 
00086 namespace stk {
00087 namespace {
00088 // Blank namespace so that this class produces local symbols,
00089 // avoiding complaints from a linker of multiple-define symbols.
00090 
00091 struct ReduceEnd {
00092   struct WorkType {};
00093   void copyin(  WorkType & ) const {}
00094   void copyout( WorkType & ) const {}
00095   static void op( WorkType & , WorkType & ) {}
00096 };
00097 
00098 // Workhorse class for aggregating reduction operations.
00099 
00100 template <class Op, typename T, class Next>
00101 struct Reduce {
00102 
00103   typedef T Type ;
00104   enum { N = Op::N };
00105 
00106   struct WorkType {
00107     typename Next::WorkType m_next ;
00108     Type                    m_value[N];
00109   };
00110 
00111   Next   m_next ;
00112   Type * m_value ;
00113 
00114   // Copy values into buffer:
00115   void copyin( WorkType & w ) const
00116     { Copy<N>( w.m_value , m_value ); m_next.copyin( w.m_next ); }
00117       
00118   // Copy value out from buffer:
00119   void copyout( WorkType & w ) const
00120     { Copy<N>( m_value , w.m_value ); m_next.copyout( w.m_next ); }
00121 
00122   // Reduction function
00123   static void op( WorkType & out , WorkType & in )
00124     { Op( out.m_value , in.m_value ); Next::op( out.m_next , in.m_next ); }
00125 
00126   // Aggregate reduction operations, use '&' for left-to-right evaluation
00127   template<class OpB, typename TB>
00128   Reduce<OpB, TB, Reduce<Op,T,Next> >
00129     operator & ( const Reduce<OpB,TB,ReduceEnd> & rhs )
00130       { return Reduce<OpB, TB, Reduce<Op,T,Next> >( rhs , *this ); }
00131 
00132   // Constructor for aggregation:
00133   Reduce( const Reduce<Op,T, ReduceEnd> & arg_val , const Next & arg_next )
00134     : m_next( arg_next ), m_value( arg_val.m_value ) {}
00135 
00136   // Constructor for aggregate member:
00137   explicit Reduce( Type * arg_value )
00138    : m_next(), m_value( arg_value ) {}
00139 
00140   static void void_op( void*inv, void*inoutv, int*, ParallelDatatype*);
00141 };
00142 
00143 template <class Op, typename T, class Next>
00144 void Reduce<Op,T,Next>::void_op( void*inv, void*inoutv,int*,ParallelDatatype*)
00145 {
00146   op( * reinterpret_cast<WorkType*>( inoutv ) ,
00147       * reinterpret_cast<WorkType*>( inv ) );
00148 }
00149 
00150 }
00151 }
00152 
00153 //----------------------------------------------------------------------
00154 //----------------------------------------------------------------------
00155 
00156 namespace stk {
00157 
00158 template<unsigned N, typename T>
00159 inline
00160 Reduce< Sum<N> , T, ReduceEnd> ReduceSum( T * value )
00161 { return Reduce< Sum<N>, T, ReduceEnd >( value ); }
00162 
00163 template<unsigned N, typename T>
00164 inline
00165 Reduce< Prod<N>, T, ReduceEnd > ReduceProd( T * value )
00166 { return Reduce< Prod<N>, T, ReduceEnd >( value ); }
00167 
00168 template<unsigned N, typename T>
00169 inline
00170 Reduce< Max<N>, T, ReduceEnd> ReduceMax( T * value )
00171 { return Reduce< Max<N>, T, ReduceEnd>( value ); }
00172 
00173 template<unsigned N, typename T>
00174 inline
00175 Reduce< Min<N>, T, ReduceEnd> ReduceMin( T * value )
00176 { return Reduce<Min<N>, T, ReduceEnd>( value ); }
00177 
00178 template<unsigned N, typename T>
00179 inline
00180 Reduce< BitOr<N>, T, ReduceEnd> ReduceBitOr( T * value )
00181 { return Reduce< BitOr<N>, T, ReduceEnd>( value ); }
00182 
00183 template<unsigned N, typename T>
00184 inline
00185 Reduce< BitAnd<N>, T, ReduceEnd> ReduceBitAnd( T * value )
00186 { return Reduce< BitAnd<N>, T, ReduceEnd>( value ); }
00187 
00188 //----------------------------------------------------------------------
00189 // all_reduce( comm , ReduceSum<5>( A ) & ReduceMax<3>( B ) );
00190 
00191 extern "C" {
00192 typedef void (*ParallelReduceOp)
00193   ( void * inv , void * outv , int * , ParallelDatatype * );
00194 }
00195 
00196 void all_reduce( ParallelMachine  arg_comm ,
00197                  ParallelReduceOp arg_op ,
00198                  void           * arg_in ,
00199                  void           * arg_out ,
00200                  unsigned         arg_len );
00201 
00202 namespace {
00203 
00204 template < class ReduceOp >
00205 void all_reduce_driver( ParallelMachine comm , const ReduceOp & op )
00206 {
00207   typedef typename ReduceOp::WorkType WorkType ;
00208 
00209   WorkType inbuf , outbuf ;
00210 
00211   ParallelReduceOp f =
00212     reinterpret_cast<ParallelReduceOp>( & ReduceOp::void_op );
00213   op.copyin( inbuf );
00214   all_reduce( comm , f , & inbuf, & outbuf, sizeof(WorkType) );
00215   op.copyout( outbuf );
00216 }
00217 
00218 }
00219 
00220 template < class ReduceOp >
00221 inline
00222 void all_reduce( ParallelMachine comm , const ReduceOp & op )
00223 { all_reduce_driver<ReduceOp>( comm , op ); }
00224 
00225 }
00226 
00227 #endif /* DOXYGEN_COMPILE */
00228 
00229 //----------------------------------------------------------------------
00230 
00231 #endif
00232 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines