Kokkos Node API and Local Linear Algebra Kernels Version of the Day
Kokkos_TPINode.hpp
00001 //@HEADER
00002 // ************************************************************************
00003 // 
00004 //          Kokkos: Node API and Parallel Node Kernels
00005 //              Copyright (2008) Sandia Corporation
00006 // 
00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00008 // the U.S. Government retains certain rights in this software.
00009 // 
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00038 // 
00039 // ************************************************************************
00040 //@HEADER
00041 
00042 #ifndef KOKKOS_TPINODE_HPP_
00043 #define KOKKOS_TPINODE_HPP_
00044 
00045 #include "Kokkos_StandardNodeMemoryModel.hpp"
00046 #include "Kokkos_NodeHelpers.hpp"
00047 #include <TPI.h>
00048 
00049 namespace Teuchos {
00050   // forward declarations
00051   class ParameterList;
00052 }
00053 
00054 namespace Kokkos {
00055 
00056   template <class WDP>
00057   struct WDPPlusRange {
00058     WDPPlusRange(int Beg, int End, WDP Wdp) : wdp(Wdp), beg(Beg), end(End){}
00059     WDP wdp;
00060     int beg, end;
00061   };
00062 
00063   inline
00064   void tpi_work_span(TPI_Work* work, int beg, int end, int& ibeg, int& iend)
00065   {
00066     const int chunk = ( end - beg + work->count - 1 ) / work->count ;
00067   
00068     iend = chunk * ( work->rank + 1 ) + beg;
00069     ibeg = chunk * ( work->rank ) + beg;
00070   
00071     if ( end < iend ) { iend = end; }
00072   }
00073 
00074   template<class WDP>
00075   void tpi_execute(TPI_Work * work)
00076   {
00077     // get work/data pair
00078     const WDPPlusRange<WDP>* const_wdp_wrapper = static_cast<const WDPPlusRange<WDP>*>(work->info);
00079     WDPPlusRange<WDP>* wdp_wrapper = const_cast<WDPPlusRange<WDP>*>(const_wdp_wrapper);
00080     WDP wdp = wdp_wrapper->wdp;
00081     int beg = wdp_wrapper->beg, end = wdp_wrapper->end;
00082     int ibeg, iend;
00083     // determine my share of the work
00084     tpi_work_span(work, beg, end, ibeg, iend);
00085     // do my share of the work
00086     for (int i=ibeg; i<iend; ++i) {
00087       wdp.execute(i);
00088     }
00089   }
00090 
00091   template<class WDP>
00092   void tpi_reduction_work(TPI_Work * work)
00093   {
00094     const WDPPlusRange<WDP>* wdp_wrapper = static_cast<const WDPPlusRange<WDP>*>(work->info);
00095     int beg = wdp_wrapper->beg, end = wdp_wrapper->end;
00096     WDP wdp = wdp_wrapper->wdp;
00097     int ibeg, iend;
00098     tpi_work_span(work, beg, end, ibeg, iend);
00099   
00100     typedef typename WDP::ReductionType ReductionType;
00101     ReductionType tmpi;
00102     ReductionType &res = *(static_cast<ReductionType*>(work->reduce));
00103   
00104     for (int i=ibeg; i<iend; ++i) {
00105       tmpi = wdp.generate(i);
00106       res = wdp.reduce(res, tmpi);
00107     }
00108   }
00109 
00110   template<class WDP>
00111   void tpi_reduction_join(TPI_Work * work, const void* src)
00112   {
00113     typedef typename WDP::ReductionType ReductionType;
00114   
00115     const WDPPlusRange<WDP>* wdp_wrapper = static_cast<const WDPPlusRange<WDP>*>(work->info);
00116     WDP wdp = wdp_wrapper->wdp;
00117   
00118     ReductionType& work_reduce = *(static_cast<ReductionType*>(work->reduce));
00119     const ReductionType& src_reduce  = *(static_cast<const ReductionType*>(src));
00120   
00121     work_reduce = wdp.reduce(work_reduce, src_reduce);
00122   }
00123 
00124   template<class WDP>
00125   void tpi_reduction_init(TPI_Work * work)
00126   {
00127     typedef typename WDP::ReductionType ReductionType;
00128     *(static_cast<ReductionType*>(work->reduce)) = WDP::identity();
00129   }
00130 
00134   class TPINode : public StandardNodeMemoryModel {
00135     public:
00136 
00144       TPINode(Teuchos::ParameterList &plist);
00145 
00149       void init(int numThreads);
00150 
00154       ~TPINode();
00155 
00157       template <class WDP>
00158       static void parallel_for(int beg, int end, WDP wd) {
00159         WDPPlusRange<WDP> wdp_plus(beg,end,wd);
00160         TPI_Run_threads(tpi_execute<WDP>, &wdp_plus, 0 );
00161       }
00162 
00164       template <class WDP>
00165       static typename WDP::ReductionType 
00166       parallel_reduce(int beg, int end, WDP wd) {
00167         typedef typename WDP::ReductionType ReductionType;
00168         ReductionType result = WDP::identity();
00169         WDPPlusRange<WDP> wdp_plus(beg,end,wd);
00170         TPI_Run_threads_reduce(tpi_reduction_work<WDP>, &wdp_plus,
00171                                tpi_reduction_join<WDP>,
00172                                tpi_reduction_init<WDP>, sizeof(result), &result);
00173         return result;
00174       }
00175 
00177       inline void sync() const {};
00178 
00179     private:
00180       int curNumThreads_;
00181   };
00182 
00183   template <> class ArrayOfViewsHelper<TPINode> : public ArrayOfViewsHelperTrivialImpl<TPINode> {};
00184 
00185 } // end namespace Kokkos
00186 
00187 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends