Sierra Toolkit Version of the Day
MPI.hpp
00001 #ifndef STK_UTIL_PARALLEL_MPI_hpp
00002 #define STK_UTIL_PARALLEL_MPI_hpp
00003 
00004 #include <stk_util/stk_config.h>
00005 #if defined( STK_HAS_MPI )
00006 
00007 #include <mpi.h>
00008 #include <vector>
00009 #include <iterator>
00010 #include <stdexcept>
00011 #include <complex>
00012 
00013 namespace sierra {
00014 namespace MPI {
00015 
00016 template<class T>
00017 inline
00018 void
00019   mpi_real_complex_sum(
00020     void *    invec,
00021     void *    inoutvec,
00022     int *   len,
00023     MPI_Datatype *  datatype)
00024   {
00025     std::complex<T> *complex_in = static_cast<std::complex<T> *>(invec);
00026     std::complex<T> *complex_inout = static_cast<std::complex<T> *>(inoutvec);
00027 
00028     for (int i = 0; i < *len; ++i)
00029       complex_inout[i] += complex_in[i];
00030   }
00031 
00036 
00037 
00044 MPI_Datatype float_complex_type();
00045 
00052 MPI_Datatype double_complex_type();
00053 
00060 MPI_Op double_complex_sum_op();
00061 
00068 template<class T>
00069 inline
00070 MPI_Op real_complex_sum_op()
00071 {
00072   static MPI_Op s_mpi_real_complex_sum;
00073   static bool initialized = false;
00074 
00075   if (!initialized) {
00076     initialized = true;
00077 
00078     MPI_Op_create(mpi_real_complex_sum<T>, true, &s_mpi_real_complex_sum);
00079   }
00080   return s_mpi_real_complex_sum;
00081 }
00082 
00088 MPI_Datatype long_long_int_int_type();
00089 
00090 
00096 MPI_Datatype double_double_int_type();
00097 
00098 
00104 template <typename T>
00105 struct Loc
00106 {
00107   Loc()
00108     : m_value(),
00109       m_loc(0)
00110   {}
00111   
00112   Loc(const T &value, int loc)
00113     : m_value(value),
00114       m_loc(loc)
00115   {}
00116   
00117   T   m_value;
00118   int   m_loc;
00119 };
00120 
00121 struct TempLoc
00122 {
00123   TempLoc()
00124     : m_value(),
00125       m_other(),
00126       m_loc(0)
00127   {}
00128   
00129   TempLoc(double value, double other, int loc)
00130     : m_value(value),
00131       m_other(other),
00132       m_loc(loc)
00133   {}
00134   
00135   double        m_value;
00136   double        m_other;
00137   int   m_loc;
00138 };
00139 
00140 
00149 template <typename T>
00150 struct Datatype;
00151 
00152 template <>
00153 struct Datatype<char>
00154 {
00155   static MPI_Datatype type() {
00156     return MPI_CHAR;
00157   }
00158 };
00159 
00160 template <>
00161 struct Datatype<signed char>
00162 {
00163   static MPI_Datatype type() {
00164     return MPI_CHAR;
00165   }
00166 };
00167 
00168 template <>
00169 struct Datatype<unsigned char>
00170 {
00171   static MPI_Datatype type() {
00172     return MPI_BYTE;
00173   }
00174 };
00175 
00176 template <>
00177 struct Datatype<int>
00178 {
00179   static MPI_Datatype type() {
00180     return MPI_INT;
00181   }
00182 };
00183 
00184 template <>
00185 struct Datatype<unsigned int>
00186 {
00187   static MPI_Datatype type() {
00188     return MPI_UNSIGNED;
00189   }
00190 };
00191 
00192 template <>
00193 struct Datatype<short>
00194 {
00195   static MPI_Datatype type() {
00196     return MPI_SHORT;
00197   }
00198 };
00199 
00200 template <>
00201 struct Datatype<unsigned short>
00202 {
00203   static MPI_Datatype type() {
00204     return MPI_UNSIGNED_SHORT;
00205   }
00206 };
00207 
00208 template <>
00209 struct Datatype<long>
00210 {
00211   static MPI_Datatype type() {
00212     return MPI_LONG;
00213   }
00214 };
00215 
00216 template <>
00217 struct Datatype<unsigned long>
00218 {
00219   static MPI_Datatype type() {
00220     return MPI_UNSIGNED_LONG;
00221   }
00222 };
00223 
00224 // #ifdef MPI_LONG_LONG_INT
00225 // template <>
00226 // struct Datatype<long long>
00227 // {
00228 //   static MPI_Datatype type() {
00229 //     return MPI_LONG_LONG_INT;
00230 //   }
00231 // };
00232 // #endif
00233 
00234 // #ifdef MPI_UNSIGNED_LONG_LONG_INT
00235 // template <>
00236 // struct Datatype<unsigned long long>
00237 // {
00238 //   static MPI_Datatype type() {
00239 //     return MPI_UNSIGNED_LONG_LONG_INT;
00240 //   }
00241 // };
00242 // #endif
00243 
00244 template <>
00245 struct Datatype<float>
00246 {
00247   static MPI_Datatype type() {
00248     return MPI_FLOAT;
00249   }
00250 };
00251 
00252 template <>
00253 struct Datatype<double>
00254 {
00255   static MPI_Datatype type() {
00256     return MPI_DOUBLE;
00257   }
00258 };
00259 
00260 
00261 template <>
00262 struct Datatype<std::complex<float> >
00263 {
00264   static MPI_Datatype type() {
00265     return float_complex_type();
00266   }
00267 };
00268 
00269 template <>
00270 struct Datatype<std::complex<double> >
00271 {
00272   static MPI_Datatype type() {
00273     return double_complex_type();
00274   }
00275 };
00276 
00277 
00278 template <>
00279 struct Datatype<Loc<int> >
00280 {
00281   static MPI_Datatype type() {
00282     return MPI_2INT;
00283   }
00284 };
00285 
00286 template <>
00287 struct Datatype<Loc<short> >
00288 {
00289   static MPI_Datatype type() {
00290     return MPI_SHORT_INT;
00291   }
00292 };
00293 
00294 template <>
00295 struct Datatype<Loc<long> >
00296 {
00297   static MPI_Datatype type() {
00298     return MPI_LONG_INT;
00299   }
00300 };
00301 
00302 template <>
00303 struct Datatype<Loc<unsigned long> >
00304 {
00305   static MPI_Datatype type() {
00306     return MPI_LONG_INT;
00307   }
00308 };
00309 
00310 // #ifdef MPI_LONG_LONG_INT
00311 // template <>
00312 // struct Datatype<Loc<long long> >
00313 // {
00314 //   static MPI_Datatype type() {
00315 //     return long_long_int_int_type();
00316 //   }
00317 // };
00318 // #endif
00319 
00320 template <>
00321 struct Datatype<Loc<float> >
00322 {
00323   static MPI_Datatype type() {
00324     return MPI_FLOAT_INT;
00325   }
00326 };
00327 
00328 template <>
00329 struct Datatype<Loc<double> >
00330 {
00331   static MPI_Datatype type() {
00332     return MPI_DOUBLE_INT;
00333   }
00334 };
00335 
00336 template <>
00337 struct Datatype<TempLoc>
00338 {
00339   static MPI_Datatype type() {
00340     return double_double_int_type();
00341   }
00342 };
00343 
00344 
00360 template<class T>
00361 inline void
00362 AllReduce(MPI_Comm mpi_comm, MPI_Op op, T *src_dest, size_t size)
00363 {
00364   std::vector<T> source(src_dest, src_dest + size);
00365 
00366   if (MPI_Allreduce(&source[0], &src_dest[0], (int) size, Datatype<T>::type(), op, mpi_comm) != MPI_SUCCESS )
00367     throw std::runtime_error("MPI_Allreduce failed");
00368 }
00369 
00385 template<class T>
00386 inline void
00387 AllReduce(MPI_Comm mpi_comm, MPI_Op op, std::vector<T> &dest)
00388 {
00389   std::vector<T> source(dest);
00390 
00391   if (MPI_Allreduce(&source[0], &dest[0], (int) dest.size(), Datatype<T>::type(), op, mpi_comm) != MPI_SUCCESS )
00392     throw std::runtime_error("MPI_Allreduce failed");
00393 }
00394 
00413 template<class T>
00414 inline void
00415 AllReduce(MPI_Comm mpi_comm, MPI_Op op, std::vector<T> &source, std::vector<T> &dest)
00416 {
00417   if (source.size() != dest.size())
00418     throw std::runtime_error("sierra::MPI::AllReduce(MPI_Comm mpi_comm, MPI_Op op, std::vector<T> &source, std::vector<T> &dest) vector lengths not equal");
00419 
00420   if (MPI_Allreduce(&source[0], &dest[0], (int) dest.size(), Datatype<T>::type(), op, mpi_comm) != MPI_SUCCESS )
00421     throw std::runtime_error("MPI_Allreduce failed");
00422 }
00423 
00424 
00425 template<class T>
00426 inline void
00427 AllGather(MPI_Comm mpi_comm, std::vector<T> &source, std::vector<T> &dest)
00428 {
00429   int nproc = 1;
00430   MPI_Comm_size(mpi_comm,&nproc);
00431   if (source.size()*nproc != dest.size())
00432     throw std::runtime_error("sierra::MPI::AllReduce(MPI_Comm mpi_comm, MPI_Op op, std::vector<T> &source, std::vector<T> &dest) vector lengths not equal");
00433 
00434   if (MPI_Allgather(&source[0], (int)source.size(), Datatype<T>::type(),
00435                     &dest[0],   (int)source.size(), Datatype<T>::type(),
00436                     mpi_comm) != MPI_SUCCESS ){
00437     throw std::runtime_error("MPI_Allreduce failed");
00438   }
00439 }
00440 
00450 template <typename T>
00451 T *align_cast(void *p)
00452 {
00453   enum {alignment = (sizeof(T) > sizeof(double) ? sizeof(double) : sizeof(T))};
00454   enum {mask = alignment - 1};
00455 
00456   char * c = reinterpret_cast<char *>(p);
00457   size_t front_misalign = (c - (char *)0) & mask;
00458   if (front_misalign > 0) {
00459     size_t correction = alignment - front_misalign;
00460     T *q = reinterpret_cast<T *>((c - (char *)0) + correction);
00461     return q;
00462   }
00463 
00464   return reinterpret_cast<T *>(p);
00465 }
00466 
00479 struct ReduceInterface {
00484   ReduceInterface()
00485   {}
00486 
00491   virtual ~ReduceInterface()
00492   {}
00493 
00503   virtual void size(void *&inbuf) const = 0;
00504 
00516   virtual void copyin(void *&inbuf) const = 0;
00517 
00529   virtual void copyout(void *&outbuf) const = 0;
00530 
00547   virtual void op(void *&inbuf, void *&outbuf) const = 0;
00548 };
00549 
00550 
00563 template<class Op, class LocalIt, class GlobalIt = LocalIt>
00564 struct Reduce : public ReduceInterface
00565 {
00566   typedef typename std::iterator_traits<LocalIt>::value_type value_type;
00567   typedef typename std::iterator_traits<LocalIt>::difference_type difference_type;
00568 
00569   Reduce(LocalIt local_begin, LocalIt local_end, GlobalIt global_begin, GlobalIt global_end)
00570     : m_localBegin(local_begin),
00571       m_localEnd(local_end),
00572       m_globalBegin(global_begin),
00573       m_globalEnd(global_end),
00574       m_length(local_end - local_begin)
00575   {
00576     if (global_end - global_begin != m_length)
00577       throw std::runtime_error("sierra::MPI::Reduce::Reduce(LocalIt local_begin, LocalIt local_end, GlobalIt global_begin, GlobalIt global_end) local and global lengths not equal");
00578   }
00579 
00580   virtual ~Reduce()
00581   {}
00582 
00583   virtual void size(void *&inbuf) const {
00584     value_type *t = align_cast<value_type>(inbuf);
00585     t += m_length;
00586     inbuf = t;
00587   }
00588 
00589   virtual void copyin(void *&inbuf) const {
00590     value_type *t = align_cast<value_type>(inbuf);
00591     for (LocalIt it = m_localBegin; it != m_localEnd; ++it)
00592       *t++ = (*it);
00593     inbuf = t;
00594   }
00595 
00596   virtual void copyout(void *&outbuf) const {
00597     value_type *t = align_cast<value_type>(outbuf);
00598     for (GlobalIt it = m_globalBegin; it != m_globalEnd; ++it)
00599       (*it) = *t++;
00600     outbuf = t;
00601   }
00602 
00603   virtual void op(void *&inbuf, void *&outbuf) const {
00604     value_type *tin = align_cast<value_type>(inbuf);
00605     value_type *tout = align_cast<value_type>(outbuf);
00606 
00607     for (size_t i = m_length; i; --i)
00608       Op(tout++, tin++);
00609     inbuf = tin;
00610     outbuf = tout;
00611   }
00612 
00613   LocalIt     m_localBegin;
00614   LocalIt     m_localEnd;
00615   GlobalIt      m_globalBegin;
00616   GlobalIt      m_globalEnd;
00617   difference_type   m_length;
00618 };
00619 
00620 
00625 class ReduceSet
00626 {
00627 public:
00628   typedef std::vector<ReduceInterface *> ReduceVector;
00629 
00630   ReduceSet();
00631 
00632   virtual ~ReduceSet();
00633 
00634   void add(ReduceInterface *reduce_interface);
00635 
00636   size_t size() const;
00637 
00638   void copyin(void * const buffer_in) const;
00639 
00640   void copyout(void * const buffer_out) const;
00641 
00642   void op(void * const buffer_in, void * const buffer_out) const;
00643 
00644   static void void_op(void * inv, void * outv, int *n, MPI_Datatype *datatype);
00645 
00646 private:
00647   ReduceVector    m_reduceVector;
00648 };
00649 
00658 void AllReduce(MPI_Comm comm, const ReduceSet &reduce_set);
00659 
00664 struct Sum
00665 {
00666   template <typename T>
00667   inline Sum(T * dest, const T *source) {
00668     *dest += *source;
00669   }
00670 };
00671 
00676 struct Prod
00677 {
00678   template <typename T>
00679   inline Prod(T * dest, const T *source) {
00680     *dest *= *source;
00681   }
00682 };
00683 
00688 struct Min
00689 {
00690   template <typename T>
00691   inline Min(T * dest, const T *source) {
00692     *dest = std::min(*dest, *source);
00693   }
00694 };
00695 
00700 struct Max
00701 {
00702   template <typename T>
00703   inline Max(T * dest, const T *source) {
00704     *dest = std::max(*dest, *source);
00705   }
00706 };
00707 
00712 struct MinLoc
00713 {
00714   template <typename T>
00715   inline MinLoc(Loc<T> * dest, const Loc<T> *source) {
00716     if (source->m_value < dest->m_value) {
00717       dest->m_value = source->m_value;
00718       dest->m_loc = source->m_loc;
00719     }
00720     else if (source->m_value == dest->m_value)
00721       dest->m_loc = std::min(dest->m_loc, source->m_loc);
00722   }
00723 };
00724 
00729 struct MaxLoc
00730 {
00731   template <typename T>
00732   inline MaxLoc(Loc<T> * dest, const Loc<T> *source) {
00733     if (source->m_value > dest->m_value) {
00734       dest->m_value = source->m_value;
00735       dest->m_loc = source->m_loc;
00736     }
00737     else if (source->m_value == dest->m_value)
00738       dest->m_loc = std::min(dest->m_loc, source->m_loc);
00739   }
00740 };
00741 
00742 
00743 struct MaxTempLoc
00744 {
00745   inline MaxTempLoc(TempLoc * dest, const TempLoc *source) {
00746     if (source->m_value > dest->m_value) {
00747       dest->m_value = source->m_value;
00748       dest->m_other = source->m_other;
00749       dest->m_loc = source->m_loc;
00750     }
00751     else if (source->m_value == dest->m_value) {
00752       if (dest->m_loc > source->m_loc) {
00753         dest->m_other = source->m_other;
00754         dest->m_loc = source->m_loc;
00755       }
00756     }
00757   }
00758 };
00759 
00760 struct MinTempLoc
00761 {
00762   inline MinTempLoc(TempLoc * dest, const TempLoc *source) {
00763     if (source->m_value < dest->m_value) {
00764       dest->m_value = source->m_value;
00765       dest->m_other = source->m_other;
00766       dest->m_loc = source->m_loc;
00767     }
00768     else if (source->m_value == dest->m_value) {
00769       if (dest->m_loc > source->m_loc) {
00770         dest->m_other = source->m_other;
00771         dest->m_loc = source->m_loc;
00772       }
00773     }
00774   }
00775 };
00776 
00788 template<typename T>
00789 Reduce<Sum, T *> *ReduceSum(T *t, T *u, size_t length) {
00790   return new Reduce<Sum, T *>(t, t + length, u, u + length);
00791 }
00792 
00804 template<typename T>
00805 Reduce<Prod, T *> *ReduceProd(T *t, T *u, size_t length) {
00806   return new Reduce<Prod, T *>(t, t + length, u, u + length);
00807 }
00808 
00820 template<typename T>
00821 Reduce<Max, T *> *ReduceMax(T *t, T *u, size_t length) {
00822   return new Reduce<Max, T *>(t, t + length, u, u + length);
00823 }
00824 
00836 template<typename T>
00837 Reduce<Min, T *> *ReduceMin(T *t, T *u, size_t length) {
00838   return new Reduce<Min, T *>(t, t + length, u, u + length);
00839 }
00840 
00841 
00851 template<typename T>
00852 Reduce<Sum, T *> *ReduceSum(T &t, T &u) {
00853   return new Reduce<Sum, T *>(&t, &t + 1, &u, &u + 1);
00854 }
00855 
00865 template<typename T>
00866 Reduce<Prod, T *> *ReduceProd(T &t, T &u) {
00867   return new Reduce<Prod, T *>(&t, &t + 1, &u, &u + 1);
00868 }
00869 
00879 template<typename T>
00880 Reduce<Max, T *> *ReduceMax(T &t, T &u) {
00881   return new Reduce<Max, T *>(&t, &t + 1, &u, &u + 1);
00882 }
00883 
00893 template<typename T>
00894 Reduce<Min, T *> *ReduceMin(T &t, T &u) {
00895   return new Reduce<Min, T *>(&t, &t + 1, &u, &u + 1);
00896 }
00897 
00898 
00912 template<class LocalIt, class GlobalIt>
00913 Reduce<Sum, LocalIt, GlobalIt> *ReduceSum(LocalIt local_begin, LocalIt local_end, GlobalIt global_begin, GlobalIt global_end) {
00914   return new Reduce<Sum, LocalIt, GlobalIt>(local_begin, local_end, global_begin, global_end);
00915 }
00916 
00930 template<class LocalIt, class GlobalIt>
00931 Reduce<Prod, LocalIt, GlobalIt> *ReduceProd(LocalIt local_begin, LocalIt local_end, GlobalIt global_begin, GlobalIt global_end) {
00932   return new Reduce<Prod, LocalIt, GlobalIt>(local_begin, local_end, global_begin, global_end);
00933 }
00934 
00948 template<typename T, class LocalIt, class GlobalIt>
00949 Reduce<Min, LocalIt, GlobalIt> *ReduceMin(LocalIt local_begin, LocalIt local_end, GlobalIt global_begin, GlobalIt global_end) {
00950   return new Reduce<Min, LocalIt>(local_begin, local_end, global_begin, global_end);
00951 }
00952 
00966 template<typename T, class LocalIt, class GlobalIt>
00967 Reduce<Max, LocalIt, GlobalIt> *ReduceMax(LocalIt local_begin, LocalIt local_end, GlobalIt global_begin, GlobalIt global_end) {
00968   return new Reduce<Max, LocalIt>(local_begin, local_end, global_begin, global_end);
00969 }
00970 
00981 template<class T, class U>
00982 inline void
00983 AllReduceCollected(MPI_Comm mpi_comm, MPI_Op op, U collector)
00984 {
00985   std::vector<T> source;
00986 
00987   std::back_insert_iterator<std::vector<T> > source_inserter(source);
00988   collector.gather(op, source_inserter);
00989 
00990   int size = source.size();
00991 
00992 #ifdef SIERRA_DEBUG
00993   //
00994   //  Check that the array lengths being reduces are all the same
00995   //
00996   int num_proc;
00997   int my_proc;
00998   MPI_Comm_size(mpi_comm, &num_proc);
00999   MPI_Comm_rank(mpi_comm, &my_proc);
01000 
01001 
01002   std::vector<int> local_array_len(num_proc, 0);
01003   local_array_len[my_proc] == size;
01004   std::vector<int> global_array_len(num_proc, 0);
01005 
01006   MPI_Allreduce(&local_array_len[0], &global_array_len[0], num_proc, MPI_INT, MPI_SUM, mpi_comm);
01007 
01008   for(unsigned i = 0; i < num_proc; ++i) {
01009     if(global_array_len[i] != size) {
01010       throw std::runtime_error("Slib_MPI.h::AllReduceCollected, not all processors have the same length array");
01011     }
01012   }
01013 #endif
01014 
01015   if (source.empty()) return;
01016   std::vector<T> dest(size);
01017 
01018   if (MPI_Allreduce(&source[0], &dest[0], size, Datatype<T>::type(), op, mpi_comm) != MPI_SUCCESS )
01019     throw std::runtime_error("MPI_Allreduce failed");
01020 
01021   typename std::vector<T>::iterator dest_getter = dest.begin();
01022   collector.scatter(op, dest_getter);
01023 }
01024 
01028 
01029 } // namespace MPI
01030 } // namespace sierra
01031 
01032 #endif // if defined( STK_HAS_MPI )
01033 #endif // STK_UTIL_PARALLEL_MPI_hpp
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines