phdMesh Version of the Day
ParallelReduce.hpp
00001 /*------------------------------------------------------------------------*/
00002 /*      phdMesh : Parallel Heterogneous Dynamic unstructured Mesh         */
00003 /*                Copyright (2007) Sandia Corporation                     */
00004 /*                                                                        */
00005 /*  Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive   */
00006 /*  license for use of this work by or on behalf of the U.S. Government.  */
00007 /*                                                                        */
00008 /*  This library is free software; you can redistribute it and/or modify  */
00009 /*  it under the terms of the GNU Lesser General Public License as        */
00010 /*  published by the Free Software Foundation; either version 2.1 of the  */
00011 /*  License, or (at your option) any later version.                       */
00012 /*                                                                        */
00013 /*  This library is distributed in the hope that it will be useful,       */
00014 /*  but WITHOUT ANY WARRANTY; without even the implied warranty of        */
00015 /*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU     */
00016 /*  Lesser General Public License for more details.                       */
00017 /*                                                                        */
00018 /*  You should have received a copy of the GNU Lesser General Public      */
00019 /*  License along with this library; if not, write to the Free Software   */
00020 /*  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307   */
00021 /*  USA                                                                   */
00022 /*------------------------------------------------------------------------*/
00028 #ifndef util_ParallelReduce_hpp
00029 #define util_ParallelReduce_hpp
00030 
00031 #include <cstddef>
00032 #include <iosfwd>
00033 #include <string>
00034 #include <util/Parallel.hpp>
00035 #include <util/SimpleArrayOps.hpp>
00036 
00037 //------------------------------------------------------------------------
00038 
00039 namespace phdmesh {
00040 
00044 void all_write_string( ParallelMachine ,
00045                        std::ostream & ,
00046                        const std::string & );
00047 
00048 void all_reduce_sum( ParallelMachine ,
00049                      const double * local , double * global , unsigned count );
00050 
00051 void all_reduce_sum( ParallelMachine ,
00052                      const float * local , float * global , unsigned count );
00053 
00054 void all_reduce_sum( ParallelMachine ,
00055                      const int * local , int * global , unsigned count );
00056 
00057 void all_reduce_bor( ParallelMachine ,
00058                      const unsigned * local ,
00059                      unsigned * global , unsigned count );
00060 
00072 }
00073 
00074 //----------------------------------------------------------------------
00075 //----------------------------------------------------------------------
00076 
00077 namespace phdmesh {
00078 
00079 extern "C" {
00080 typedef void (*ParallelReduceOp)
00081   ( void * inv , void * outv , int * , ParallelDatatype * );
00082 }
00083 
00084 void all_reduce_internal( ParallelMachine  arg_comm ,
00085                           ParallelReduceOp arg_op ,
00086                           void           * arg_in ,
00087                           void           * arg_out ,
00088                           unsigned         arg_len );
00089 
00090 namespace {
00091 
00092 // Blank namespace so that this class produces local symbols,
00093 // avoiding complaints from a linker of multiple-define symbols.
00094 
00095 struct ReduceEnd {
00096   struct BufferType {};
00097   void copyin(  BufferType & ) const {}
00098   void copyout( BufferType & ) const {}
00099   static void op( BufferType & , BufferType & ) {}
00100 };
00101 
00102 // Workhorse class for aggregating reduction operations.
00103 
00104 template < class Oper , class Next = ReduceEnd >
00105 struct Reduce {
00106   typedef typename Oper::type Type ;
00107   enum { N = Oper::N };
00108 
00109   struct BufferType {
00110     Type                      m_value[N];
00111     typename Next::BufferType m_next ;
00112   };
00113 
00114   Next   m_next ;
00115   Type * m_ptr ;
00116 
00117   Next & set( const Oper & arg ) { m_ptr = arg.ptr ; return m_next ; }
00118 
00119   void reduce( ParallelMachine comm ) const ;
00120 
00121   void copyin( BufferType & b ) const
00122     { Copy<N>( b.m_value , m_ptr ); m_next.copyin( b.m_next ); }
00123 
00124   void copyout( BufferType & b ) const
00125     { Copy<N>( m_ptr , b.m_value ); m_next.copyout( b.m_next ); }
00126 
00127   static void op( BufferType & dst , BufferType & src )
00128     { Oper::op(dst.m_value,src.m_value); Next::op(dst.m_next,src.m_next); }
00129 
00130   static void void_op( void*inv, void*inoutv, int*, ParallelDatatype*);
00131 };
00132 
00133 template <class Oper, class Next>
00134 void Reduce<Oper,Next>::void_op( void*inv, void*inoutv,int*,ParallelDatatype*)
00135 {
00136   op( * reinterpret_cast<BufferType*>( inoutv ) ,
00137       * reinterpret_cast<BufferType*>( inv ) );
00138 }
00139 
00140 template <class Oper, class Next>
00141 void Reduce<Oper,Next>::reduce( ParallelMachine comm ) const
00142 {
00143   ParallelReduceOp f = reinterpret_cast<ParallelReduceOp>( & void_op );
00144   BufferType inbuf , outbuf ;
00145   copyin( inbuf );
00146   all_reduce_internal( comm , f , & inbuf , & outbuf , sizeof(BufferType) );
00147   copyout( outbuf );
00148 }
00149 
00150 } // namespace
00151 } // namespace phdmesh
00152 
00153 //----------------------------------------------------------------------
00154 //----------------------------------------------------------------------
00155 
00156 namespace phdmesh {
00157 
00158 template < class Op1 >
00159 inline
00160 void all_reduce( ParallelMachine comm , const Op1 & op1 )
00161 {
00162   Reduce< Op1 > work ;
00163   work.set( op1 );
00164   work.reduce( comm );
00165 }
00166 
00167 template < class Op1 , class Op2 >
00168 inline
00169 void all_reduce( ParallelMachine comm , const Op1 & op1 ,
00170                                         const Op2 & op2 )
00171 {
00172   Reduce< Op1 ,
00173   Reduce< Op2 > > work ;
00174   work.set( op1 ).set( op2 );
00175   work.reduce( comm );
00176 }
00177 
00178 template < class Op1 , class Op2 , class Op3 >
00179 inline
00180 void all_reduce( ParallelMachine comm , const Op1 & op1 ,
00181                                         const Op2 & op2 ,
00182                                         const Op3 & op3 )
00183 {
00184   Reduce< Op1 ,
00185   Reduce< Op2 ,
00186   Reduce< Op3 > > > work ;
00187   work.set( op1 ).set( op2 ).set( op3 );
00188   work.reduce( comm );
00189 }
00190 
00191 template < class Op1 , class Op2 , class Op3 , class Op4 >
00192 inline
00193 void all_reduce( ParallelMachine comm , const Op1 & op1 ,
00194                                         const Op2 & op2 ,
00195                                         const Op3 & op3 ,
00196                                         const Op4 & op4 )
00197 {
00198   Reduce< Op1 ,
00199   Reduce< Op2 ,
00200   Reduce< Op3 ,
00201   Reduce< Op4 > > > > work ;
00202   work.set( op1 ).set( op2 ).set( op3 ).set( op4 );
00203   work.reduce( comm );
00204 }
00205 
00206 template < class Op1 , class Op2 , class Op3 , class Op4 ,
00207            class Op5 >
00208 inline
00209 void all_reduce( ParallelMachine comm , const Op1 & op1 ,
00210                                         const Op2 & op2 ,
00211                                         const Op3 & op3 ,
00212                                         const Op4 & op4 ,
00213                                         const Op5 & op5 )
00214 {
00215   Reduce< Op1 ,
00216   Reduce< Op2 ,
00217   Reduce< Op3 ,
00218   Reduce< Op4 ,
00219   Reduce< Op5 > > > > > work ;
00220   work.set( op1 ).set( op2 ).set( op3 ).set( op4 ).set( op5 );
00221   work.reduce( comm );
00222 }
00223 
00224 template < class Op1 , class Op2 , class Op3 , class Op4 ,
00225            class Op5 , class Op6 >
00226 inline
00227 void all_reduce( ParallelMachine comm , const Op1 & op1 ,
00228                                         const Op2 & op2 ,
00229                                         const Op3 & op3 ,
00230                                         const Op4 & op4 ,
00231                                         const Op5 & op5 ,
00232                                         const Op6 & op6 )
00233 {
00234   Reduce< Op1 ,
00235   Reduce< Op2 ,
00236   Reduce< Op3 ,
00237   Reduce< Op4 ,
00238   Reduce< Op5 ,
00239   Reduce< Op6 > > > > > > work ;
00240   work.set( op1 ).set( op2 ).set( op3 ).set( op4 ).set( op5 ).set( op6 );
00241   work.reduce( comm );
00242 }
00243 
00244 }
00245 
00246 //----------------------------------------------------------------------
00247 
00248 #endif
00249 
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator