Sierra Toolkit Version of the Day
MPI.cpp
00001 
00010 #include <sstream>
00011 
00012 #include <stk_util/util/FeatureTest.hpp>
00013 #include <stk_util/parallel/MPI.hpp>
00014 
00015 namespace sierra {
00016 namespace MPI {
00017 
00018 template struct Loc<int>;
00019 template struct Loc<double>;
00020 template struct Loc<float>;
00021 
00022 MPI_Datatype
00023 double_complex_type()
00024 {
00025   static MPI_Datatype s_mpi_double_complex;
00026   static bool initialized = false;
00027 
00028   if (!initialized) {
00029     initialized = true;
00030 
00031     MPI_Type_contiguous(2, MPI_DOUBLE, &s_mpi_double_complex);
00032     MPI_Type_commit(&s_mpi_double_complex);
00033   }
00034   return s_mpi_double_complex;
00035 }
00036 
00037 MPI_Datatype
00038 float_complex_type()
00039 {
00040   static MPI_Datatype s_mpi_float_complex;
00041   static bool initialized = false;
00042 
00043   if (!initialized) {
00044     initialized = true;
00045 
00046     MPI_Type_contiguous(2, MPI_FLOAT, &s_mpi_float_complex);
00047     MPI_Type_commit(&s_mpi_float_complex);
00048   }
00049   return s_mpi_float_complex;
00050 }
00051 
00052 
00053 // #ifdef MPI_LONG_LONG_INT
00054 // MPI_Datatype
00055 // long_long_int_int_type()
00056 // {
00057 //   static MPI_Datatype s_mpi_long_long_int_int;
00058 //   static bool initialized = false;
00059 
00060 //   int B[] = {2, 1};
00061 //   MPI_Aint D[] = {0, 8};
00062 //   MPI_Datatype T[] = {MPI_LONG_LONG_INT, MPI_INT};
00063   
00064 //   if (!initialized) {
00065 //     initialized = true;
00066 
00067 //     MPI_Type_struct(2, B, D, T, &s_mpi_long_long_int_int);
00068 //     MPI_Type_commit(&s_mpi_long_long_int_int);
00069 //   }
00070 //   return s_mpi_long_long_int_int;
00071 // }
00072 // #endif
00073 
00074 
00075 MPI_Datatype
00076 double_double_int_type()
00077 {
00078   static MPI_Datatype s_mpi_double_double_int;
00079   static bool initialized = false;
00080 
00081   int B[] = {2, 1};
00082   MPI_Aint D[] = {0, 16};
00083   MPI_Datatype T[] = {MPI_DOUBLE, MPI_INT};
00084   
00085   
00086   if (!initialized) {
00087     initialized = true;
00088 
00089     MPI_Type_struct(2, B, D, T, &s_mpi_double_double_int);
00090     MPI_Type_commit(&s_mpi_double_double_int);
00091   }
00092   return s_mpi_double_double_int;
00093 }
00094 
00095 
00096 namespace {
00097 
00098 extern "C" {
00099   void
00100   mpi_double_complex_sum(
00101     void *    invec,
00102     void *    inoutvec,
00103     int *   len,
00104     MPI_Datatype *  datatype)
00105   {
00106     std::complex<double> *complex_in = static_cast<std::complex<double> *>(invec);
00107     std::complex<double> *complex_inout = static_cast<std::complex<double> *>(inoutvec);
00108 
00109     for (int i = 0; i < *len; ++i)
00110       complex_inout[i] += complex_in[i];
00111   }
00112 } // extern "C"
00113 
00114 } // namespace <unnamed>
00115 
00116 
00117 MPI_Op
00118 double_complex_sum_op()
00119 {
00120   static MPI_Op s_mpi_double_complex_sum;
00121   static bool initialized = false;
00122 
00123   if (!initialized) {
00124     initialized = true;
00125 
00126     MPI_Op_create(mpi_double_complex_sum, true, &s_mpi_double_complex_sum);
00127   }
00128   return s_mpi_double_complex_sum;
00129 }
00130 
00131 namespace {
00132 
00133 const MPI::ReduceSet *s_currentReduceSet = 0;
00134 
00135 extern "C" {
00136   typedef void (*ParallelReduceOp)
00137   (void * inv, void * outv, int *, MPI_Datatype *);
00138 }
00139 
00140 
00141 void
00142 all_reduce(
00143   MPI_Comm    arg_comm,
00144   ParallelReduceOp  arg_op,
00145   void *    arg_in,
00146   void *    arg_out,
00147   unsigned    arg_len)
00148 {
00149   MPI_Op mpi_op = MPI_OP_NULL ;
00150 
00151   MPI_Op_create(arg_op, 0, & mpi_op);
00152 
00153   // The SUN was buggy when combinng an
00154   // MPI_Allreduce with a user defined operator,
00155   // use reduce/broadcast instead.
00156 
00157 #ifdef SIERRA_MPI_ALLREDUCE_USER_FUNCTION_BUG
00158   const int result_reduce = MPI_Reduce(arg_in,arg_out,arg_len,MPI_BYTE,mpi_op,0,arg_comm);
00159   const int result_bcast = MPI_Bcast(arg_out,arg_len,MPI_BYTE,0,arg_comm);
00160 
00161   MPI_Op_free(& mpi_op);
00162 
00163   if (MPI_SUCCESS != result_reduce || MPI_SUCCESS != result_bcast) {
00164     std::ostringstream msg ;
00165     msg << "sierra::MPI::all_reduce FAILED: MPI_Reduce = " << result_reduce
00166   << " MPI_Bcast = " << result_bcast ;
00167     throw std::runtime_error(msg.str());
00168   }
00169 #else
00170   const int result = MPI_Allreduce(arg_in,arg_out,arg_len,MPI_BYTE,mpi_op,arg_comm);
00171 
00172   MPI_Op_free(& mpi_op);
00173 
00174   if (MPI_SUCCESS != result) {
00175     std::ostringstream msg ;
00176     msg << "sierra::MPI::all_reduce FAILED: MPI_Allreduce = " << result;
00177     throw std::runtime_error(msg.str());
00178   }
00179 #endif
00180 }
00181 
00182 struct ReduceCheck : public ReduceInterface
00183 {
00184   ReduceCheck()
00185   {}
00186 
00187   void setSize(unsigned size) {
00188     m_size = size;
00189   }
00190 
00191   virtual void size(void *&inbuf) const {
00192     unsigned *t = align_cast<unsigned>(inbuf);
00193     t += sizeof(unsigned);
00194     inbuf = t;
00195   }
00196 
00197   virtual void copyin(void *&inbuf) const {
00198     unsigned *t = align_cast<unsigned>(inbuf);
00199     *t++ = m_size;
00200     inbuf = t;
00201   }
00202 
00203   virtual void copyout(void *&outbuf) const {
00204     unsigned *t = align_cast<unsigned>(outbuf);
00205 
00206     unsigned size = *t++;
00207     if (m_size != size)
00208       throw std::runtime_error("size mismatch");
00209 
00210     outbuf = t;
00211   }
00212 
00213   virtual void op(void *&inbuf, void *&outbuf) const {
00214     unsigned *tin = align_cast<unsigned>(inbuf);
00215     unsigned *tout = align_cast<unsigned>(outbuf);
00216 
00217     *tout = std::min(*tout, *tin);
00218 
00219     inbuf = ++tin;
00220     outbuf = ++tout;
00221   }
00222 
00223 private:
00224   unsigned  m_size;
00225 };
00226 
00227 } // namespace <unnamed>
00228 
00229 
00230 ReduceSet::ReduceSet()
00231 {
00232   add(new ReduceCheck);
00233 }
00234 
00235 
00236 ReduceSet::~ReduceSet()
00237 {
00238   for (ReduceVector::const_iterator it = m_reduceVector.begin(); it != m_reduceVector.end(); ++it)
00239     delete (*it);
00240 }
00241 
00242 
00243 size_t
00244 ReduceSet::size() const {
00245   void *buffer_end = 0;
00246 
00247   for (ReduceVector::const_iterator it = m_reduceVector.begin(); it != m_reduceVector.end(); ++it)
00248     (*it)->size(buffer_end);
00249 
00250   ReduceCheck *reduce_check = static_cast<ReduceCheck *>(m_reduceVector.front());
00251   reduce_check->setSize(reinterpret_cast<char *>(buffer_end) - (char *) 0);
00252 
00253   return reinterpret_cast<char *>(buffer_end) - (char *) 0;
00254 }
00255 
00256 void
00257 ReduceSet::copyin(void * const buffer_in) const {
00258   void *inbuf = buffer_in;
00259 
00260   for (ReduceVector::const_iterator it = m_reduceVector.begin(); it != m_reduceVector.end(); ++it)
00261     (*it)->copyin(inbuf);
00262 }
00263 
00264 void
00265 ReduceSet::copyout(void * const buffer_out) const {
00266   void *outbuf = buffer_out;
00267 
00268   for (ReduceVector::const_iterator it = m_reduceVector.begin(); it != m_reduceVector.end(); ++it)
00269     (*it)->copyout(outbuf);
00270 }
00271 
00272 void
00273 ReduceSet::op(void * const buffer_in, void * const buffer_out) const {
00274   void *inbuf = buffer_in;
00275   void *outbuf = buffer_out;
00276 
00277   for (ReduceVector::const_iterator it = m_reduceVector.begin(); it != m_reduceVector.end(); ++it)
00278     (*it)->op(inbuf, outbuf);
00279 }
00280 
00281 void ReduceSet::void_op(void * inv, void * outv, int *, MPI_Datatype *) {
00282   s_currentReduceSet->op(inv, outv);
00283 }
00284 
00285 
00286 void
00287 ReduceSet::add(
00288   ReduceInterface * reduce_interface)
00289 {
00290   m_reduceVector.push_back(reduce_interface);
00291 }
00292 
00293 
00294 void
00295 AllReduce(
00296   MPI_Comm    comm,
00297   const ReduceSet & reduce_set)
00298 {
00299   size_t size = reduce_set.size();
00300 
00301   if (size) {
00302     char *input_buffer  = new char[size];
00303     char *output_buffer = new char[size];
00304     void *inbuf = (void *) input_buffer;
00305     void *outbuf = (void *) output_buffer;
00306 
00307     s_currentReduceSet = &reduce_set;
00308 
00309     ParallelReduceOp f = reinterpret_cast<ParallelReduceOp>(& ReduceSet::void_op);
00310 
00311     reduce_set.copyin(inbuf);
00312     all_reduce(comm, f, inbuf, outbuf, size);
00313     reduce_set.copyout(outbuf);
00314     delete [] output_buffer;
00315     delete [] input_buffer;
00316   }
00317 }
00318 
00319 
00320 } // namespace MPI
00321 } // namespace sierra
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines