|
phdMesh Version of the Day
|
00001 /*------------------------------------------------------------------------*/ 00002 /* phdMesh : Parallel Heterogneous Dynamic unstructured Mesh */ 00003 /* Copyright (2007) Sandia Corporation */ 00004 /* */ 00005 /* Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive */ 00006 /* license for use of this work by or on behalf of the U.S. Government. */ 00007 /* */ 00008 /* This library is free software; you can redistribute it and/or modify */ 00009 /* it under the terms of the GNU Lesser General Public License as */ 00010 /* published by the Free Software Foundation; either version 2.1 of the */ 00011 /* License, or (at your option) any later version. */ 00012 /* */ 00013 /* This library is distributed in the hope that it will be useful, */ 00014 /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ 00015 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU */ 00016 /* Lesser General Public License for more details. */ 00017 /* */ 00018 /* You should have received a copy of the GNU Lesser General Public */ 00019 /* License along with this library; if not, write to the Free Software */ 00020 /* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 */ 00021 /* USA */ 00022 /*------------------------------------------------------------------------*/ 00028 #ifndef util_ParallelReduce_hpp 00029 #define util_ParallelReduce_hpp 00030 00031 #include <cstddef> 00032 #include <iosfwd> 00033 #include <string> 00034 #include <util/Parallel.hpp> 00035 #include <util/SimpleArrayOps.hpp> 00036 00037 //------------------------------------------------------------------------ 00038 00039 namespace phdmesh { 00040 00044 void all_write_string( ParallelMachine , 00045 std::ostream & , 00046 const std::string & ); 00047 00048 void all_reduce_sum( ParallelMachine , 00049 const double * local , double * global , unsigned count ); 00050 00051 void all_reduce_sum( ParallelMachine , 00052 const float * local , float * global , unsigned count ); 00053 00054 void all_reduce_sum( ParallelMachine , 00055 const int * local , int * global , unsigned count ); 00056 00057 void all_reduce_bor( ParallelMachine , 00058 const unsigned * local , 00059 unsigned * global , unsigned count ); 00060 00072 } 00073 00074 //---------------------------------------------------------------------- 00075 //---------------------------------------------------------------------- 00076 00077 namespace phdmesh { 00078 00079 extern "C" { 00080 typedef void (*ParallelReduceOp) 00081 ( void * inv , void * outv , int * , ParallelDatatype * ); 00082 } 00083 00084 void all_reduce_internal( ParallelMachine arg_comm , 00085 ParallelReduceOp arg_op , 00086 void * arg_in , 00087 void * arg_out , 00088 unsigned arg_len ); 00089 00090 namespace { 00091 00092 // Blank namespace so that this class produces local symbols, 00093 // avoiding complaints from a linker of multiple-define symbols. 00094 00095 struct ReduceEnd { 00096 struct BufferType {}; 00097 void copyin( BufferType & ) const {} 00098 void copyout( BufferType & ) const {} 00099 static void op( BufferType & , BufferType & ) {} 00100 }; 00101 00102 // Workhorse class for aggregating reduction operations. 00103 00104 template < class Oper , class Next = ReduceEnd > 00105 struct Reduce { 00106 typedef typename Oper::type Type ; 00107 enum { N = Oper::N }; 00108 00109 struct BufferType { 00110 Type m_value[N]; 00111 typename Next::BufferType m_next ; 00112 }; 00113 00114 Next m_next ; 00115 Type * m_ptr ; 00116 00117 Next & set( const Oper & arg ) { m_ptr = arg.ptr ; return m_next ; } 00118 00119 void reduce( ParallelMachine comm ) const ; 00120 00121 void copyin( BufferType & b ) const 00122 { Copy<N>( b.m_value , m_ptr ); m_next.copyin( b.m_next ); } 00123 00124 void copyout( BufferType & b ) const 00125 { Copy<N>( m_ptr , b.m_value ); m_next.copyout( b.m_next ); } 00126 00127 static void op( BufferType & dst , BufferType & src ) 00128 { Oper::op(dst.m_value,src.m_value); Next::op(dst.m_next,src.m_next); } 00129 00130 static void void_op( void*inv, void*inoutv, int*, ParallelDatatype*); 00131 }; 00132 00133 template <class Oper, class Next> 00134 void Reduce<Oper,Next>::void_op( void*inv, void*inoutv,int*,ParallelDatatype*) 00135 { 00136 op( * reinterpret_cast<BufferType*>( inoutv ) , 00137 * reinterpret_cast<BufferType*>( inv ) ); 00138 } 00139 00140 template <class Oper, class Next> 00141 void Reduce<Oper,Next>::reduce( ParallelMachine comm ) const 00142 { 00143 ParallelReduceOp f = reinterpret_cast<ParallelReduceOp>( & void_op ); 00144 BufferType inbuf , outbuf ; 00145 copyin( inbuf ); 00146 all_reduce_internal( comm , f , & inbuf , & outbuf , sizeof(BufferType) ); 00147 copyout( outbuf ); 00148 } 00149 00150 } // namespace 00151 } // namespace phdmesh 00152 00153 //---------------------------------------------------------------------- 00154 //---------------------------------------------------------------------- 00155 00156 namespace phdmesh { 00157 00158 template < class Op1 > 00159 inline 00160 void all_reduce( ParallelMachine comm , const Op1 & op1 ) 00161 { 00162 Reduce< Op1 > work ; 00163 work.set( op1 ); 00164 work.reduce( comm ); 00165 } 00166 00167 template < class Op1 , class Op2 > 00168 inline 00169 void all_reduce( ParallelMachine comm , const Op1 & op1 , 00170 const Op2 & op2 ) 00171 { 00172 Reduce< Op1 , 00173 Reduce< Op2 > > work ; 00174 work.set( op1 ).set( op2 ); 00175 work.reduce( comm ); 00176 } 00177 00178 template < class Op1 , class Op2 , class Op3 > 00179 inline 00180 void all_reduce( ParallelMachine comm , const Op1 & op1 , 00181 const Op2 & op2 , 00182 const Op3 & op3 ) 00183 { 00184 Reduce< Op1 , 00185 Reduce< Op2 , 00186 Reduce< Op3 > > > work ; 00187 work.set( op1 ).set( op2 ).set( op3 ); 00188 work.reduce( comm ); 00189 } 00190 00191 template < class Op1 , class Op2 , class Op3 , class Op4 > 00192 inline 00193 void all_reduce( ParallelMachine comm , const Op1 & op1 , 00194 const Op2 & op2 , 00195 const Op3 & op3 , 00196 const Op4 & op4 ) 00197 { 00198 Reduce< Op1 , 00199 Reduce< Op2 , 00200 Reduce< Op3 , 00201 Reduce< Op4 > > > > work ; 00202 work.set( op1 ).set( op2 ).set( op3 ).set( op4 ); 00203 work.reduce( comm ); 00204 } 00205 00206 template < class Op1 , class Op2 , class Op3 , class Op4 , 00207 class Op5 > 00208 inline 00209 void all_reduce( ParallelMachine comm , const Op1 & op1 , 00210 const Op2 & op2 , 00211 const Op3 & op3 , 00212 const Op4 & op4 , 00213 const Op5 & op5 ) 00214 { 00215 Reduce< Op1 , 00216 Reduce< Op2 , 00217 Reduce< Op3 , 00218 Reduce< Op4 , 00219 Reduce< Op5 > > > > > work ; 00220 work.set( op1 ).set( op2 ).set( op3 ).set( op4 ).set( op5 ); 00221 work.reduce( comm ); 00222 } 00223 00224 template < class Op1 , class Op2 , class Op3 , class Op4 , 00225 class Op5 , class Op6 > 00226 inline 00227 void all_reduce( ParallelMachine comm , const Op1 & op1 , 00228 const Op2 & op2 , 00229 const Op3 & op3 , 00230 const Op4 & op4 , 00231 const Op5 & op5 , 00232 const Op6 & op6 ) 00233 { 00234 Reduce< Op1 , 00235 Reduce< Op2 , 00236 Reduce< Op3 , 00237 Reduce< Op4 , 00238 Reduce< Op5 , 00239 Reduce< Op6 > > > > > > work ; 00240 work.set( op1 ).set( op2 ).set( op3 ).set( op4 ).set( op5 ).set( op6 ); 00241 work.reduce( comm ); 00242 } 00243 00244 } 00245 00246 //---------------------------------------------------------------------- 00247 00248 #endif 00249
1.7.4