Tpetra Matrix/Vector Services Version of the Day
Tpetra_RTI_detail.hpp
00001 //@HEADER
00002 // ************************************************************************
00003 // 
00004 //               Tpetra: Templated Linear Algebra Services Package 
00005 //                 Copyright (2008) Sandia Corporation
00006 // 
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 // 
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //  
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more detail.
00019 //  
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00025 // 
00026 // ************************************************************************
00027 //@HEADER
00028 
00029 #ifndef TPETRA_RTI_detail_HPP
00030 #define TPETRA_RTI_detail_HPP
00031 
00032 #include <Teuchos_TestForException.hpp>
00033 #include <Teuchos_CommHelpers.hpp>
00034 #include <Kokkos_NodeHelpers.hpp>
00035 
00036 #include "Tpetra_Vector.hpp"
00037 
00038 namespace Tpetra {
00039 
00040   namespace RTI {
00041 
00043     namespace detail {
00044 
00046       template <class S>
00047       class StdOpKernel
00048       {
00049         protected:
00050           S _alpha, _beta;
00051           S       * _vec_inout;
00052           const S * _vec_in2;
00053         public:
00054           inline StdOpKernel() : _alpha(ScalarTraits<S>::one()), _beta(ScalarTraits<S>::zero()) {}
00055           inline void setData(S * vec_inout, const S * vec_in2)   { _vec_inout = vec_inout; _vec_in2 = vec_in2; }
00056           inline void setAlphaBeta(const S &alpha, const S &beta) { _alpha = alpha; _beta = beta; }
00057       };
00058 
00060       template <class OP, class S>
00061       class UnaryFunctorAdapter {
00062         protected:
00063           OP   _op;
00064           S  * _vec;
00065         public:
00066           UnaryFunctorAdapter(OP op) : _op(op) {}
00067           inline void setData(S *vec)                    { _vec = vec;             }
00068           inline void execute(int i)                     { _vec[i] = _op(_vec[i]); }
00069       };
00070 
00072       template <class OP, class S1, class S2>
00073       class BinaryFunctorAdapter {
00074         protected:
00075           OP         _op;
00076           S1       * _vec_inout;
00077           const S2 * _vec_in2;
00078         public:
00079           BinaryFunctorAdapter(OP op) : _op(op)    {}
00080           inline void setData(S1 *vec_inout, const S2 *vec_in2) { _vec_inout = vec_inout; _vec_in2 = vec_in2;   }
00081           inline void execute(int i)                            { _vec_inout[i] = _op(_vec_inout[i], _vec_in2[i]); }
00082       };
00083 
00085       template <class OP, class S>
00086       class BinaryFunctorAdapterWithAlphaBeta : public StdOpKernel<S> {
00087         protected:
00088           OP        _op;
00089           S       * _vec_inout;
00090           const S * _vec_in2;
00091         public:
00092           BinaryFunctorAdapterWithAlphaBeta(OP op) : _op(op)  {}
00093           inline void setData(S *vec_inout, const S *vec_in2) { _vec_inout = vec_inout; _vec_in2 = vec_in2;   }
00094           inline void execute(int i) 
00095           { 
00096             S res = _op(_vec_inout[i], _vec_in2[i]); 
00097             _vec_inout[i] = this->_alpha * res + this->_beta * _vec_inout[i];
00098           }
00099       };
00100 
00102       template <class OP, class S1, class S2, class S3>
00103       class TertiaryFunctorAdapter {
00104         protected:
00105           OP         _op;
00106           S1       * _vec_inout;
00107           const S2 * _vec_in2;
00108           const S3 * _vec_in3;
00109         public:
00110           TertiaryFunctorAdapter(OP op) : _op(op)    {}
00111           inline void setData(S1 *vec_inout, const S2 *vec_in2, const S3 *vec_in3) { _vec_inout = vec_inout; _vec_in2 = vec_in2; _vec_in3 = vec_in3;  }
00112           inline void execute(int i)                            { _vec_inout[i] = _op(_vec_inout[i], _vec_in2[i], _vec_in3[i]); }
00113       };
00114 
00116       template <class Glob, class S1, class S2>
00117       class RTIReductionAdapter {
00118         public:
00119           typedef typename Glob::GenOP                GenOP;
00120           typedef typename Glob::RedOP                RedOP;
00121           typedef typename Glob::IdOP                  IdOP;
00122           typedef typename RedOP::result_type ReductionType;
00123         protected:
00124           GenOP      _genop;
00125           RedOP      _redop;
00126           const S1 * _vec_in1;
00127           const S2 * _vec_in2;
00128         public:
00129           RTIReductionAdapter(Glob glob)                                : _genop(glob.genop), _redop(glob.redop)  {}
00130           inline void setData(const S1 *vec_in1, const S2 *vec_in2)     { _vec_in1 = vec_in1; _vec_in2 = vec_in2;  }
00131           static inline ReductionType identity()                        { return IdOP::identity();                 }
00132           inline ReductionType generate(int i)                          { return _genop(_vec_in1[i], _vec_in2[i]); }
00133           inline ReductionType reduce(ReductionType a, ReductionType b) { return _redop(a, b);                     }
00134       };
00135 
00137       template <class Glob, class S1, class S2, class S3>
00138       class RTIReductionAdapter3 {
00139         public:
00140           typedef typename Glob::GenOP                GenOP;
00141           typedef typename Glob::RedOP                RedOP;
00142           typedef typename Glob::IdOP                  IdOP;
00143           typedef typename RedOP::result_type ReductionType;
00144         protected:
00145           GenOP      _genop;
00146           RedOP      _redop;
00147           const S1 * _vec_in1;
00148           const S2 * _vec_in2;
00149           const S3 * _vec_in3;
00150         public:
00151           RTIReductionAdapter3(Glob glob)                                              : _genop(glob.genop), _redop(glob.redop)                     {}
00152           inline void setData(const S1 *vec_in1, const S2 *vec_in2, const S3 *vec_in3) { _vec_in1 = vec_in1; _vec_in2 = vec_in2; _vec_in3 = vec_in3; }
00153           static inline ReductionType identity()                                       { return IdOP::identity();                                    }
00154           inline ReductionType generate(int i)                                         { return _genop(_vec_in1[i], _vec_in2[i], _vec_in3[i]);       }
00155           inline ReductionType reduce(ReductionType a, ReductionType b)                { return _redop(a, b);                                        }
00156       };
00157 
00159       template <class Glob, class S1, class S2>
00160       class RTIPreTransformReductionAdapter {
00161         public:
00162           typedef typename Glob::TOP                    TOP;
00163           typedef typename Glob::GenOP                GenOP;
00164           typedef typename Glob::RedOP                RedOP;
00165           typedef typename Glob::IdOP                  IdOP;
00166           typedef typename RedOP::result_type ReductionType;
00167         protected:
00168           TOP        _top;
00169           GenOP      _genop;
00170           RedOP      _redop;
00171           S1       * _vec_inout;
00172           const S2 * _vec_in2;
00173         public:
00174           RTIPreTransformReductionAdapter(Glob glob)                    : _top(glob.top), _genop(glob.genop), _redop(glob.redop) {}
00175           inline void setData(S1 *vec_inout, const S2 *vec_in2)         { _vec_inout = vec_inout; _vec_in2 = vec_in2; }
00176           static inline ReductionType identity()                        { return IdOP::identity();                    }
00177           inline ReductionType reduce(ReductionType a, ReductionType b) { return _redop(a, b);                        }
00178           inline ReductionType generate(int i)
00179           {
00180             _vec_inout[i] = _top( _vec_inout[i], _vec_in2[i] );
00181             return _genop( _vec_inout[i], _vec_in2[i] ); 
00182           }
00183       };
00184 
00186       template <class Glob, class S1, class S2, class S3>
00187       class RTIPreTransformReductionAdapter3 {
00188         public:
00189           typedef typename Glob::TOP                    TOP;
00190           typedef typename Glob::GenOP                GenOP;
00191           typedef typename Glob::RedOP                RedOP;
00192           typedef typename Glob::IdOP                  IdOP;
00193           typedef typename RedOP::result_type ReductionType;
00194         protected:
00195           TOP        _top;
00196           GenOP      _genop;
00197           RedOP      _redop;
00198           S1       * _vec_inout;
00199           const S2 * _vec_in2;
00200           const S3 * _vec_in3;
00201         public:
00202           RTIPreTransformReductionAdapter3(Glob glob)                              : _top(glob.top), _genop(glob.genop), _redop(glob.redop) {}
00203           inline void setData(S1 *vec_inout, const S2 *vec_in2, const S3 *vec_in3) { _vec_inout = vec_inout; _vec_in2 = vec_in2; _vec_in3 = vec_in3; }
00204           static inline ReductionType identity()                                   { return IdOP::identity();                    }
00205           inline ReductionType reduce(ReductionType a, ReductionType b)            { return _redop(a, b);                        }
00206           inline ReductionType generate(int i)
00207           {
00208             _vec_inout[i] = _top( _vec_inout[i], _vec_in2[i], _vec_in3[i] );
00209             return _genop( _vec_inout[i], _vec_in2[i], _vec_in3[i] ); 
00210           }
00211       };
00212 
00214       template <class OP>
00215       class TeuchosValueTypeReductionOpAdapter : public Teuchos::ValueTypeReductionOp<int,typename OP::ReductionType> 
00216       {
00217         protected:
00218           mutable OP _op;
00219         public:
00220           typedef typename OP::ReductionType Packet;
00221           TeuchosValueTypeReductionOpAdapter(OP op) : _op(op) {}
00222           void reduce(const int count, const Packet inBuffer[], Packet inoutBuffer []) const 
00223           {
00224             for (int i=0; i != count; ++i) {
00225               inoutBuffer[i] = _op.reduce( inoutBuffer[i], inBuffer[i] );
00226             }
00227           }
00228       };
00229 
00231       template <class S, class LO, class GO, class Node, class OP>
00232       void unary_transform(Vector<S,LO,GO,Node> &vec, OP op) 
00233       {
00234         Kokkos::MultiVector<S,Node> &mv = vec.getLocalMVNonConst();
00235         const RCP<Node> node = mv.getNode();
00236         // ready data
00237         Kokkos::ReadyBufferHelper<Node> rbh(node);
00238         rbh.begin();
00239         S * out_ptr = rbh.addNonConstBuffer(mv.getValuesNonConst());
00240         rbh.end();
00241         op.setData(out_ptr);
00242         const size_t N = mv.getNumRows();
00243         node->template parallel_for(0, N, op);
00244       }
00245 
00247       template <class S1, class S2, class LO, class GO, class Node, class OP>
00248       void binary_transform(Vector<S1,LO,GO,Node> &vec_inout, const Vector<S2,LO,GO,Node> &vec_in2, OP op) 
00249       {
00250         Kokkos::MultiVector<S1,Node>       &mv_inout = vec_inout.getLocalMVNonConst();
00251         const Kokkos::MultiVector<S2,Node> &mv_in2   = vec_in2.getLocalMV();
00252         const RCP<Node> node = mv_inout.getNode();
00253         // ready data
00254         Kokkos::ReadyBufferHelper<Node> rbh(node);
00255         rbh.begin();
00256         S1       * out_ptr = rbh.addNonConstBuffer(mv_inout.getValuesNonConst());
00257         const S2 * in_ptr  = rbh.addConstBuffer(mv_in2.getValues());
00258         rbh.end();
00259         op.setData(out_ptr, in_ptr);
00260         const size_t N = mv_inout.getNumRows();
00261 #ifdef HAVE_TPETRA_DEBUG
00262         TEST_FOR_EXCEPTION( mv_in2.getNode() != mv_inout.getNode(), std::runtime_error, 
00263             "Tpetra::RTI::detail::binary_transform(): multivectors must share the same node.");
00264 #endif
00265         node->template parallel_for(0, N, op);
00266       }
00267 
00269       template <class S1, class S2, class S3, class LO, class GO, class Node, class OP>
00270       void tertiary_transform(Vector<S1,LO,GO,Node> &vec_inout, const Vector<S2,LO,GO,Node> &vec_in2, const Vector<S3,LO,GO,Node> &vec_in3, OP op) 
00271       {
00272         Kokkos::MultiVector<S1,Node>       &mv_inout = vec_inout.getLocalMVNonConst();
00273         const Kokkos::MultiVector<S2,Node> &mv_in2   = vec_in2.getLocalMV();
00274         const Kokkos::MultiVector<S3,Node> &mv_in3   = vec_in3.getLocalMV();
00275         const RCP<Node> node = mv_inout.getNode();
00276         // ready data
00277         Kokkos::ReadyBufferHelper<Node> rbh(node);
00278         rbh.begin();
00279         S1       * out_ptr = rbh.addNonConstBuffer(mv_inout.getValuesNonConst());
00280         const S2 * in_ptr2 = rbh.addConstBuffer(mv_in2.getValues());
00281         const S3 * in_ptr3 = rbh.addConstBuffer(mv_in3.getValues());
00282         rbh.end();
00283         op.setData(out_ptr, in_ptr2, in_ptr3);
00284         const size_t N = mv_inout.getNumRows();
00285 #ifdef HAVE_TPETRA_DEBUG
00286         TEST_FOR_EXCEPTION( mv_in2.getNode() != mv_inout.getNode() || mv_in3.getNode() != mv_in2.getNode(), std::runtime_error, 
00287             "Tpetra::RTI::detail::tertiary_transform(): multivectors must share the same node.");
00288 #endif
00289         node->template parallel_for(0, N, op);
00290       }
00291 
00293       template <class S1, class S2, class LO, class GO, class Node, class OP>
00294       typename OP::ReductionType 
00295       reduce(const Vector<S1,LO,GO,Node> &vec_in1, const Vector<S2,LO,GO,Node> &vec_in2, OP op) 
00296       {
00297         const Kokkos::MultiVector<S1,Node> &mv_in1 = vec_in1.getLocalMV(),
00298                                            &mv_in2 = vec_in2.getLocalMV();
00299         const RCP<Node> node = mv_in1.getNode();
00300         const RCP<const Teuchos::Comm<int> > comm = vec_in1.getMap()->getComm();
00301         // ready data
00302         Kokkos::ReadyBufferHelper<Node> rbh(node);
00303         rbh.begin();
00304         const S1 * in_ptr1 = rbh.addConstBuffer(mv_in1.getValues());
00305         const S2 * in_ptr2 = rbh.addConstBuffer(mv_in2.getValues());
00306         rbh.end();
00307         op.setData( in_ptr1, in_ptr2 );
00308         const size_t N = mv_in1.getNumRows();
00309 #ifdef HAVE_TPETRA_DEBUG
00310         TEST_FOR_EXCEPTION( mv_in1.getNode() != mv_in2.getNode(), std::runtime_error, 
00311             "Tpetra::RTI::detail::reduce(): multivectors must share the same node.");
00312 #endif
00313         // compute local reduction
00314         typename OP::ReductionType gbl_res, lcl_res;
00315         lcl_res = node->template parallel_reduce(0, N, op);
00316         // compute global reduction
00317         TeuchosValueTypeReductionOpAdapter<OP> vtrop(op);
00318         Teuchos::reduceAll(*comm, vtrop, 1, &lcl_res, &gbl_res);
00319         return gbl_res;
00320       }
00321 
00323       template <class S1, class S2, class S3, class LO, class GO, class Node, class OP>
00324       typename OP::ReductionType 
00325       reduce(const Vector<S1,LO,GO,Node> &vec_in1, const Vector<S2,LO,GO,Node> &vec_in2, const Vector<S3,LO,GO,Node> &vec_in3, OP op) 
00326       {
00327         const Kokkos::MultiVector<S1,Node> &mv_in1 = vec_in1.getLocalMV();
00328         const Kokkos::MultiVector<S2,Node> &mv_in2 = vec_in2.getLocalMV();
00329         const Kokkos::MultiVector<S3,Node> &mv_in3 = vec_in3.getLocalMV();
00330         const RCP<Node> node = mv_in1.getNode();
00331         const RCP<const Teuchos::Comm<int> > comm = vec_in1.getMap()->getComm();
00332         // ready data
00333         Kokkos::ReadyBufferHelper<Node> rbh(node);
00334         rbh.begin();
00335         const S1 * in_ptr1 = rbh.addConstBuffer(mv_in1.getValues());
00336         const S2 * in_ptr2 = rbh.addConstBuffer(mv_in2.getValues());
00337         const S3 * in_ptr3 = rbh.addConstBuffer(mv_in3.getValues());
00338         rbh.end();
00339         op.setData( in_ptr1, in_ptr2, in_ptr3 );
00340         const size_t N = mv_in1.getNumRows();
00341 #ifdef HAVE_TPETRA_DEBUG
00342         TEST_FOR_EXCEPTION( mv_in1.getNode() != mv_in2.getNode() || mv_in2.getNode() != mv_in3.getNode(), std::runtime_error, 
00343             "Tpetra::RTI::detail::reduce(): multivectors must share the same node.");
00344 #endif
00345         // compute local reduction
00346         typename OP::ReductionType gbl_res, lcl_res;
00347         lcl_res = node->template parallel_reduce(0, N, op);
00348         // compute global reduction
00349         TeuchosValueTypeReductionOpAdapter<OP> vtrop(op);
00350         Teuchos::reduceAll(*comm, vtrop, 1, &lcl_res, &gbl_res);
00351         return gbl_res;
00352       }
00353 
00355       template <class S1, class S2, class LO, class GO, class Node, class OP>
00356       typename OP::ReductionType 
00357       transform_reduce(Vector<S1,LO,GO,Node> &vec_inout, const Vector<S2,LO,GO,Node> &vec_in2, OP op) 
00358       {
00359         Kokkos::MultiVector<S1,Node>       &mv_inout = vec_inout.getLocalMVNonConst();
00360         const Kokkos::MultiVector<S2,Node> &mv_in2   = vec_in2.getLocalMV();
00361         const RCP<Node> node = mv_inout.getNode();
00362         const RCP<const Teuchos::Comm<int> > comm = vec_inout.getMap()->getComm();
00363         // ready data
00364         Kokkos::ReadyBufferHelper<Node> rbh(node);
00365         rbh.begin();
00366         S1 * in_ptr1 = rbh.addNonConstBuffer(mv_inout.getValuesNonConst());
00367         const S2 * in_ptr2 = rbh.addConstBuffer(mv_in2.getValues());
00368         rbh.end();
00369         op.setData( in_ptr1, in_ptr2 );
00370         const size_t N = mv_inout.getNumRows();
00371 #ifdef HAVE_TPETRA_DEBUG
00372         TEST_FOR_EXCEPTION( mv_inout.getNode() != mv_in2.getNode(), std::runtime_error, 
00373             "Tpetra::RTI::detail::transform_reduce(): multivectors must share the same node.");
00374 #endif
00375         // compute local reduction
00376         typename OP::ReductionType gbl_res, lcl_res;
00377         lcl_res = node->template parallel_reduce(0, N, op);
00378         // compute global reduction
00379         TeuchosValueTypeReductionOpAdapter<OP> vtrop(op);
00380         Teuchos::reduceAll(*comm, vtrop, 1, &lcl_res, &gbl_res);
00381         return gbl_res;
00382       }
00383 
00385       template <class S1, class S2, class S3, class LO, class GO, class Node, class OP>
00386       typename OP::ReductionType 
00387       transform_reduce(Vector<S1,LO,GO,Node> &vec_inout, const Vector<S2,LO,GO,Node> &vec_in2, const Vector<S3,LO,GO,Node> &vec_in3, OP op) 
00388       {
00389         Kokkos::MultiVector<S1,Node>       &mv_inout = vec_inout.getLocalMVNonConst();
00390         const Kokkos::MultiVector<S2,Node> &mv_in2   = vec_in2.getLocalMV();
00391         const Kokkos::MultiVector<S3,Node> &mv_in3   = vec_in3.getLocalMV();
00392         const RCP<Node> node = mv_inout.getNode();
00393         const RCP<const Teuchos::Comm<int> > comm = vec_inout.getMap()->getComm();
00394         // ready data
00395         Kokkos::ReadyBufferHelper<Node> rbh(node);
00396         rbh.begin();
00397         S1 * in_ptr1 = rbh.addNonConstBuffer(mv_inout.getValuesNonConst());
00398         const S2 * in_ptr2 = rbh.addConstBuffer(mv_in2.getValues());
00399         const S3 * in_ptr3 = rbh.addConstBuffer(mv_in3.getValues());
00400         rbh.end();
00401         op.setData( in_ptr1, in_ptr2, in_ptr3 );
00402         const size_t N = mv_inout.getNumRows();
00403 #ifdef HAVE_TPETRA_DEBUG
00404         TEST_FOR_EXCEPTION( mv_inout.getNode() != mv_in2.getNode() && mv_inout.getNode() != mv_in3.getNode(), std::runtime_error, 
00405             "Tpetra::RTI::detail::transform_transform(): multivectors must share the same node.");
00406 #endif
00407         // compute local reduction
00408         typename OP::ReductionType gbl_res, lcl_res;
00409         lcl_res = node->template parallel_reduce(0, N, op);
00410         // compute global reduction
00411         TeuchosValueTypeReductionOpAdapter<OP> vtrop(op);
00412         Teuchos::reduceAll(*comm, vtrop, 1, &lcl_res, &gbl_res);
00413         return gbl_res;
00414       }
00415 
00416     } // end of namespace Tpetra::RTI::detail
00417 
00418   } // end of namespace Tpetra::RTI
00419 
00420 } // end of namespace Tpetra
00421 
00422 #endif // TPETRA_RTI_detail_HPP
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines