Tpetra Matrix/Vector Services Version of the Day
Tpetra_RTI_detail.hpp
00001 // @HEADER
00002 // ***********************************************************************
00003 // 
00004 //          Tpetra: Templated Linear Algebra Services Package
00005 //                 Copyright (2008) Sandia Corporation
00006 // 
00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00008 // the U.S. Government retains certain rights in this software.
00009 // 
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00038 // 
00039 // ************************************************************************
00040 // @HEADER
00041 
00042 #ifndef TPETRA_RTI_detail_HPP
00043 #define TPETRA_RTI_detail_HPP
00044 
00045 #include <Teuchos_Assert.hpp>
00046 #include <Teuchos_CommHelpers.hpp>
00047 #include <Kokkos_NodeHelpers.hpp>
00048 
00049 #include "Tpetra_Vector.hpp"
00050 
00051 namespace Tpetra {
00052 
00053   namespace RTI {
00054 
00056     namespace detail {
00057 
00059       template <class S>
00060       class StdOpKernel
00061       {
00062         protected:
00063           S _alpha, _beta;
00064           S       * _vec_inout;
00065           const S * _vec_in2;
00066         public:
00067           inline StdOpKernel() : _alpha(ScalarTraits<S>::one()), _beta(ScalarTraits<S>::zero()) {}
00068           inline void setData(S * vec_inout, const S * vec_in2)   { _vec_inout = vec_inout; _vec_in2 = vec_in2; }
00069           inline void setAlphaBeta(const S &alpha, const S &beta) { _alpha = alpha; _beta = beta; }
00070       };
00071 
00073       template <class OP, class S>
00074       class UnaryFunctorAdapter {
00075         protected:
00076           OP   _op;
00077           S  * _vec;
00078         public:
00079           UnaryFunctorAdapter(OP op) : _op(op) {}
00080           inline void setData(S *vec)                    { _vec = vec;             }
00081           inline void execute(int i)                     { _vec[i] = _op(_vec[i]); }
00082       };
00083 
00085       template <class OP, class S1, class S2>
00086       class BinaryFunctorAdapter {
00087         protected:
00088           OP         _op;
00089           S1       * _vec_inout;
00090           const S2 * _vec_in2;
00091         public:
00092           BinaryFunctorAdapter(OP op) : _op(op)    {}
00093           inline void setData(S1 *vec_inout, const S2 *vec_in2) { _vec_inout = vec_inout; _vec_in2 = vec_in2;   }
00094           inline void execute(int i)                            { _vec_inout[i] = _op(_vec_inout[i], _vec_in2[i]); }
00095       };
00096 
00098       template <class OP, class S>
00099       class BinaryFunctorAdapterWithAlphaBeta : public StdOpKernel<S> {
00100         protected:
00101           OP        _op;
00102           S       * _vec_inout;
00103           const S * _vec_in2;
00104         public:
00105           BinaryFunctorAdapterWithAlphaBeta(OP op) : _op(op)  {}
00106           inline void setData(S *vec_inout, const S *vec_in2) { _vec_inout = vec_inout; _vec_in2 = vec_in2;   }
00107           inline void execute(int i) 
00108           { 
00109             S res = _op(_vec_inout[i], _vec_in2[i]); 
00110             _vec_inout[i] = this->_alpha * res + this->_beta * _vec_inout[i];
00111           }
00112       };
00113 
00115       template <class OP, class S1, class S2, class S3>
00116       class TertiaryFunctorAdapter {
00117         protected:
00118           OP         _op;
00119           S1       * _vec_inout;
00120           const S2 * _vec_in2;
00121           const S3 * _vec_in3;
00122         public:
00123           TertiaryFunctorAdapter(OP op) : _op(op)    {}
00124           inline void setData(S1 *vec_inout, const S2 *vec_in2, const S3 *vec_in3) { _vec_inout = vec_inout; _vec_in2 = vec_in2; _vec_in3 = vec_in3;  }
00125           inline void execute(int i)                            { _vec_inout[i] = _op(_vec_inout[i], _vec_in2[i], _vec_in3[i]); }
00126       };
00127 
00129       template <class Glob, class S1, class S2>
00130       class RTIReductionAdapter {
00131         public:
00132           typedef typename Glob::GenOP                GenOP;
00133           typedef typename Glob::RedOP                RedOP;
00134           typedef typename Glob::IdOP                  IdOP;
00135           typedef typename RedOP::result_type ReductionType;
00136         protected:
00137           GenOP      _genop;
00138           RedOP      _redop;
00139           const S1 * _vec_in1;
00140           const S2 * _vec_in2;
00141         public:
00142           RTIReductionAdapter(Glob glob)                                : _genop(glob.genop), _redop(glob.redop)  {}
00143           inline void setData(const S1 *vec_in1, const S2 *vec_in2)     { _vec_in1 = vec_in1; _vec_in2 = vec_in2;  }
00144           static inline ReductionType identity()                        { return IdOP::identity();                 }
00145           inline ReductionType generate(int i)                          { return _genop(_vec_in1[i], _vec_in2[i]); }
00146           inline ReductionType reduce(ReductionType a, ReductionType b) { return _redop(a, b);                     }
00147       };
00148 
00150       template <class Glob, class S1, class S2, class S3>
00151       class RTIReductionAdapter3 {
00152         public:
00153           typedef typename Glob::GenOP                GenOP;
00154           typedef typename Glob::RedOP                RedOP;
00155           typedef typename Glob::IdOP                  IdOP;
00156           typedef typename RedOP::result_type ReductionType;
00157         protected:
00158           GenOP      _genop;
00159           RedOP      _redop;
00160           const S1 * _vec_in1;
00161           const S2 * _vec_in2;
00162           const S3 * _vec_in3;
00163         public:
00164           RTIReductionAdapter3(Glob glob)                                              : _genop(glob.genop), _redop(glob.redop)                     {}
00165           inline void setData(const S1 *vec_in1, const S2 *vec_in2, const S3 *vec_in3) { _vec_in1 = vec_in1; _vec_in2 = vec_in2; _vec_in3 = vec_in3; }
00166           static inline ReductionType identity()                                       { return IdOP::identity();                                    }
00167           inline ReductionType generate(int i)                                         { return _genop(_vec_in1[i], _vec_in2[i], _vec_in3[i]);       }
00168           inline ReductionType reduce(ReductionType a, ReductionType b)                { return _redop(a, b);                                        }
00169       };
00170 
00172       template <class Glob, class S1, class S2>
00173       class RTIPreTransformReductionAdapter {
00174         public:
00175           typedef typename Glob::TOP                    TOP;
00176           typedef typename Glob::GenOP                GenOP;
00177           typedef typename Glob::RedOP                RedOP;
00178           typedef typename Glob::IdOP                  IdOP;
00179           typedef typename RedOP::result_type ReductionType;
00180         protected:
00181           TOP        _top;
00182           GenOP      _genop;
00183           RedOP      _redop;
00184           S1       * _vec_inout;
00185           const S2 * _vec_in2;
00186         public:
00187           RTIPreTransformReductionAdapter(Glob glob)                    : _top(glob.top), _genop(glob.genop), _redop(glob.redop) {}
00188           inline void setData(S1 *vec_inout, const S2 *vec_in2)         { _vec_inout = vec_inout; _vec_in2 = vec_in2; }
00189           static inline ReductionType identity()                        { return IdOP::identity();                    }
00190           inline ReductionType reduce(ReductionType a, ReductionType b) { return _redop(a, b);                        }
00191           inline ReductionType generate(int i)
00192           {
00193             _vec_inout[i] = _top( _vec_inout[i], _vec_in2[i] );
00194             return _genop( _vec_inout[i], _vec_in2[i] ); 
00195           }
00196       };
00197 
00199       template <class Glob, class S1, class S2, class S3>
00200       class RTIPreTransformReductionAdapter3 {
00201         public:
00202           typedef typename Glob::TOP                    TOP;
00203           typedef typename Glob::GenOP                GenOP;
00204           typedef typename Glob::RedOP                RedOP;
00205           typedef typename Glob::IdOP                  IdOP;
00206           typedef typename RedOP::result_type ReductionType;
00207         protected:
00208           TOP        _top;
00209           GenOP      _genop;
00210           RedOP      _redop;
00211           S1       * _vec_inout;
00212           const S2 * _vec_in2;
00213           const S3 * _vec_in3;
00214         public:
00215           RTIPreTransformReductionAdapter3(Glob glob)                              : _top(glob.top), _genop(glob.genop), _redop(glob.redop) {}
00216           inline void setData(S1 *vec_inout, const S2 *vec_in2, const S3 *vec_in3) { _vec_inout = vec_inout; _vec_in2 = vec_in2; _vec_in3 = vec_in3; }
00217           static inline ReductionType identity()                                   { return IdOP::identity();                    }
00218           inline ReductionType reduce(ReductionType a, ReductionType b)            { return _redop(a, b);                        }
00219           inline ReductionType generate(int i)
00220           {
00221             _vec_inout[i] = _top( _vec_inout[i], _vec_in2[i], _vec_in3[i] );
00222             return _genop( _vec_inout[i], _vec_in2[i], _vec_in3[i] ); 
00223           }
00224       };
00225 
00227       template <class OP>
00228       class TeuchosValueTypeReductionOpAdapter : public Teuchos::ValueTypeReductionOp<int,typename OP::ReductionType> 
00229       {
00230         protected:
00231           mutable OP _op;
00232         public:
00233           typedef typename OP::ReductionType Packet;
00234           TeuchosValueTypeReductionOpAdapter(OP op) : _op(op) {}
00235           void reduce(const int count, const Packet inBuffer[], Packet inoutBuffer []) const 
00236           {
00237             for (int i=0; i != count; ++i) {
00238               inoutBuffer[i] = _op.reduce( inoutBuffer[i], inBuffer[i] );
00239             }
00240           }
00241       };
00242 
00244       template <class S, class LO, class GO, class Node, class OP>
00245       void unary_transform(Vector<S,LO,GO,Node> &vec, OP op) 
00246       {
00247         Kokkos::MultiVector<S,Node> &mv = vec.getLocalMVNonConst();
00248         const RCP<Node> node = mv.getNode();
00249         // ready data
00250         Kokkos::ReadyBufferHelper<Node> rbh(node);
00251         rbh.begin();
00252         S * out_ptr = rbh.addNonConstBuffer(mv.getValuesNonConst());
00253         rbh.end();
00254         op.setData(out_ptr);
00255         const size_t N = mv.getNumRows();
00256         node->template parallel_for(0, N, op);
00257       }
00258 
00260       template <class S1, class S2, class LO, class GO, class Node, class OP>
00261       void binary_transform(Vector<S1,LO,GO,Node> &vec_inout, const Vector<S2,LO,GO,Node> &vec_in2, OP op) 
00262       {
00263         Kokkos::MultiVector<S1,Node>       &mv_inout = vec_inout.getLocalMVNonConst();
00264         const Kokkos::MultiVector<S2,Node> &mv_in2   = vec_in2.getLocalMV();
00265         const RCP<Node> node = mv_inout.getNode();
00266         // ready data
00267         Kokkos::ReadyBufferHelper<Node> rbh(node);
00268         rbh.begin();
00269         S1       * out_ptr = rbh.addNonConstBuffer(mv_inout.getValuesNonConst());
00270         const S2 * in_ptr  = rbh.addConstBuffer(mv_in2.getValues());
00271         rbh.end();
00272         op.setData(out_ptr, in_ptr);
00273         const size_t N = mv_inout.getNumRows();
00274 #ifdef HAVE_TPETRA_DEBUG
00275         TEUCHOS_TEST_FOR_EXCEPTION( mv_in2.getNode() != mv_inout.getNode(), std::runtime_error, 
00276             "Tpetra::RTI::detail::binary_transform(): multivectors must share the same node.");
00277 #endif
00278         node->template parallel_for(0, N, op);
00279       }
00280 
00282       template <class S1, class S2, class S3, class LO, class GO, class Node, class OP>
00283       void tertiary_transform(Vector<S1,LO,GO,Node> &vec_inout, const Vector<S2,LO,GO,Node> &vec_in2, const Vector<S3,LO,GO,Node> &vec_in3, OP op) 
00284       {
00285         Kokkos::MultiVector<S1,Node>       &mv_inout = vec_inout.getLocalMVNonConst();
00286         const Kokkos::MultiVector<S2,Node> &mv_in2   = vec_in2.getLocalMV();
00287         const Kokkos::MultiVector<S3,Node> &mv_in3   = vec_in3.getLocalMV();
00288         const RCP<Node> node = mv_inout.getNode();
00289         // ready data
00290         Kokkos::ReadyBufferHelper<Node> rbh(node);
00291         rbh.begin();
00292         S1       * out_ptr = rbh.addNonConstBuffer(mv_inout.getValuesNonConst());
00293         const S2 * in_ptr2 = rbh.addConstBuffer(mv_in2.getValues());
00294         const S3 * in_ptr3 = rbh.addConstBuffer(mv_in3.getValues());
00295         rbh.end();
00296         op.setData(out_ptr, in_ptr2, in_ptr3);
00297         const size_t N = mv_inout.getNumRows();
00298 #ifdef HAVE_TPETRA_DEBUG
00299         TEUCHOS_TEST_FOR_EXCEPTION( mv_in2.getNode() != mv_inout.getNode() || mv_in3.getNode() != mv_in2.getNode(), std::runtime_error, 
00300             "Tpetra::RTI::detail::tertiary_transform(): multivectors must share the same node.");
00301 #endif
00302         node->template parallel_for(0, N, op);
00303       }
00304 
00306       template <class S1, class S2, class LO, class GO, class Node, class OP>
00307       typename OP::ReductionType 
00308       reduce(const Vector<S1,LO,GO,Node> &vec_in1, const Vector<S2,LO,GO,Node> &vec_in2, OP op) 
00309       {
00310         const Kokkos::MultiVector<S1,Node> &mv_in1 = vec_in1.getLocalMV(),
00311                                            &mv_in2 = vec_in2.getLocalMV();
00312         const RCP<Node> node = mv_in1.getNode();
00313         const RCP<const Teuchos::Comm<int> > comm = vec_in1.getMap()->getComm();
00314         // ready data
00315         Kokkos::ReadyBufferHelper<Node> rbh(node);
00316         rbh.begin();
00317         const S1 * in_ptr1 = rbh.addConstBuffer(mv_in1.getValues());
00318         const S2 * in_ptr2 = rbh.addConstBuffer(mv_in2.getValues());
00319         rbh.end();
00320         op.setData( in_ptr1, in_ptr2 );
00321         const size_t N = mv_in1.getNumRows();
00322 #ifdef HAVE_TPETRA_DEBUG
00323         TEUCHOS_TEST_FOR_EXCEPTION( mv_in1.getNode() != mv_in2.getNode(), std::runtime_error, 
00324             "Tpetra::RTI::detail::reduce(): multivectors must share the same node.");
00325 #endif
00326         // compute local reduction
00327         typename OP::ReductionType gbl_res, lcl_res;
00328         lcl_res = node->template parallel_reduce(0, N, op);
00329         // compute global reduction
00330         TeuchosValueTypeReductionOpAdapter<OP> vtrop(op);
00331         Teuchos::reduceAll(*comm, vtrop, 1, &lcl_res, &gbl_res);
00332         return gbl_res;
00333       }
00334 
00336       template <class S1, class S2, class S3, class LO, class GO, class Node, class OP>
00337       typename OP::ReductionType 
00338       reduce(const Vector<S1,LO,GO,Node> &vec_in1, const Vector<S2,LO,GO,Node> &vec_in2, const Vector<S3,LO,GO,Node> &vec_in3, OP op) 
00339       {
00340         const Kokkos::MultiVector<S1,Node> &mv_in1 = vec_in1.getLocalMV();
00341         const Kokkos::MultiVector<S2,Node> &mv_in2 = vec_in2.getLocalMV();
00342         const Kokkos::MultiVector<S3,Node> &mv_in3 = vec_in3.getLocalMV();
00343         const RCP<Node> node = mv_in1.getNode();
00344         const RCP<const Teuchos::Comm<int> > comm = vec_in1.getMap()->getComm();
00345         // ready data
00346         Kokkos::ReadyBufferHelper<Node> rbh(node);
00347         rbh.begin();
00348         const S1 * in_ptr1 = rbh.addConstBuffer(mv_in1.getValues());
00349         const S2 * in_ptr2 = rbh.addConstBuffer(mv_in2.getValues());
00350         const S3 * in_ptr3 = rbh.addConstBuffer(mv_in3.getValues());
00351         rbh.end();
00352         op.setData( in_ptr1, in_ptr2, in_ptr3 );
00353         const size_t N = mv_in1.getNumRows();
00354 #ifdef HAVE_TPETRA_DEBUG
00355         TEUCHOS_TEST_FOR_EXCEPTION( mv_in1.getNode() != mv_in2.getNode() || mv_in2.getNode() != mv_in3.getNode(), std::runtime_error, 
00356             "Tpetra::RTI::detail::reduce(): multivectors must share the same node.");
00357 #endif
00358         // compute local reduction
00359         typename OP::ReductionType gbl_res, lcl_res;
00360         lcl_res = node->template parallel_reduce(0, N, op);
00361         // compute global reduction
00362         TeuchosValueTypeReductionOpAdapter<OP> vtrop(op);
00363         Teuchos::reduceAll(*comm, vtrop, 1, &lcl_res, &gbl_res);
00364         return gbl_res;
00365       }
00366 
00368       template <class S1, class S2, class LO, class GO, class Node, class OP>
00369       typename OP::ReductionType 
00370       transform_reduce(Vector<S1,LO,GO,Node> &vec_inout, const Vector<S2,LO,GO,Node> &vec_in2, OP op) 
00371       {
00372         Kokkos::MultiVector<S1,Node>       &mv_inout = vec_inout.getLocalMVNonConst();
00373         const Kokkos::MultiVector<S2,Node> &mv_in2   = vec_in2.getLocalMV();
00374         const RCP<Node> node = mv_inout.getNode();
00375         const RCP<const Teuchos::Comm<int> > comm = vec_inout.getMap()->getComm();
00376         // ready data
00377         Kokkos::ReadyBufferHelper<Node> rbh(node);
00378         rbh.begin();
00379         S1 * in_ptr1 = rbh.addNonConstBuffer(mv_inout.getValuesNonConst());
00380         const S2 * in_ptr2 = rbh.addConstBuffer(mv_in2.getValues());
00381         rbh.end();
00382         op.setData( in_ptr1, in_ptr2 );
00383         const size_t N = mv_inout.getNumRows();
00384 #ifdef HAVE_TPETRA_DEBUG
00385         TEUCHOS_TEST_FOR_EXCEPTION( mv_inout.getNode() != mv_in2.getNode(), std::runtime_error, 
00386             "Tpetra::RTI::detail::transform_reduce(): multivectors must share the same node.");
00387 #endif
00388         // compute local reduction
00389         typename OP::ReductionType gbl_res, lcl_res;
00390         lcl_res = node->template parallel_reduce(0, N, op);
00391         // compute global reduction
00392         TeuchosValueTypeReductionOpAdapter<OP> vtrop(op);
00393         Teuchos::reduceAll(*comm, vtrop, 1, &lcl_res, &gbl_res);
00394         return gbl_res;
00395       }
00396 
00398       template <class S1, class S2, class S3, class LO, class GO, class Node, class OP>
00399       typename OP::ReductionType 
00400       transform_reduce(Vector<S1,LO,GO,Node> &vec_inout, const Vector<S2,LO,GO,Node> &vec_in2, const Vector<S3,LO,GO,Node> &vec_in3, OP op) 
00401       {
00402         Kokkos::MultiVector<S1,Node>       &mv_inout = vec_inout.getLocalMVNonConst();
00403         const Kokkos::MultiVector<S2,Node> &mv_in2   = vec_in2.getLocalMV();
00404         const Kokkos::MultiVector<S3,Node> &mv_in3   = vec_in3.getLocalMV();
00405         const RCP<Node> node = mv_inout.getNode();
00406         const RCP<const Teuchos::Comm<int> > comm = vec_inout.getMap()->getComm();
00407         // ready data
00408         Kokkos::ReadyBufferHelper<Node> rbh(node);
00409         rbh.begin();
00410         S1 * in_ptr1 = rbh.addNonConstBuffer(mv_inout.getValuesNonConst());
00411         const S2 * in_ptr2 = rbh.addConstBuffer(mv_in2.getValues());
00412         const S3 * in_ptr3 = rbh.addConstBuffer(mv_in3.getValues());
00413         rbh.end();
00414         op.setData( in_ptr1, in_ptr2, in_ptr3 );
00415         const size_t N = mv_inout.getNumRows();
00416 #ifdef HAVE_TPETRA_DEBUG
00417         TEUCHOS_TEST_FOR_EXCEPTION( mv_inout.getNode() != mv_in2.getNode() && mv_inout.getNode() != mv_in3.getNode(), std::runtime_error, 
00418             "Tpetra::RTI::detail::transform_transform(): multivectors must share the same node.");
00419 #endif
00420         // compute local reduction
00421         typename OP::ReductionType gbl_res, lcl_res;
00422         lcl_res = node->template parallel_reduce(0, N, op);
00423         // compute global reduction
00424         TeuchosValueTypeReductionOpAdapter<OP> vtrop(op);
00425         Teuchos::reduceAll(*comm, vtrop, 1, &lcl_res, &gbl_res);
00426         return gbl_res;
00427       }
00428 
00429     } // end of namespace Tpetra::RTI::detail
00430 
00431   } // end of namespace Tpetra::RTI
00432 
00433 } // end of namespace Tpetra
00434 
00435 #endif // TPETRA_RTI_detail_HPP
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines