|
Tpetra Matrix/Vector Services Version of the Day
|
00001 // @HEADER 00002 // *********************************************************************** 00003 // 00004 // Tpetra: Templated Linear Algebra Services Package 00005 // Copyright (2008) Sandia Corporation 00006 // 00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, 00008 // the U.S. Government retains certain rights in this software. 00009 // 00010 // Redistribution and use in source and binary forms, with or without 00011 // modification, are permitted provided that the following conditions are 00012 // met: 00013 // 00014 // 1. Redistributions of source code must retain the above copyright 00015 // notice, this list of conditions and the following disclaimer. 00016 // 00017 // 2. Redistributions in binary form must reproduce the above copyright 00018 // notice, this list of conditions and the following disclaimer in the 00019 // documentation and/or other materials provided with the distribution. 00020 // 00021 // 3. Neither the name of the Corporation nor the names of the 00022 // contributors may be used to endorse or promote products derived from 00023 // this software without specific prior written permission. 00024 // 00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY 00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE 00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00036 // 00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 00038 // 00039 // ************************************************************************ 00040 // @HEADER 00041 00042 #ifndef TPETRA_RTI_detail_HPP 00043 #define TPETRA_RTI_detail_HPP 00044 00045 #include <Teuchos_Assert.hpp> 00046 #include <Teuchos_CommHelpers.hpp> 00047 #include <Kokkos_NodeHelpers.hpp> 00048 00049 #include "Tpetra_Vector.hpp" 00050 00051 namespace Tpetra { 00052 00053 namespace RTI { 00054 00056 namespace detail { 00057 00059 template <class S> 00060 class StdOpKernel 00061 { 00062 protected: 00063 S _alpha, _beta; 00064 S * _vec_inout; 00065 const S * _vec_in2; 00066 public: 00067 inline StdOpKernel() : _alpha(ScalarTraits<S>::one()), _beta(ScalarTraits<S>::zero()) {} 00068 inline void setData(S * vec_inout, const S * vec_in2) { _vec_inout = vec_inout; _vec_in2 = vec_in2; } 00069 inline void setAlphaBeta(const S &alpha, const S &beta) { _alpha = alpha; _beta = beta; } 00070 }; 00071 00073 template <class OP, class S> 00074 class UnaryFunctorAdapter { 00075 protected: 00076 OP _op; 00077 S * _vec; 00078 public: 00079 UnaryFunctorAdapter(OP op) : _op(op) {} 00080 inline void setData(S *vec) { _vec = vec; } 00081 inline void execute(int i) { _vec[i] = _op(_vec[i]); } 00082 }; 00083 00085 template <class OP, class S1, class S2> 00086 class BinaryFunctorAdapter { 00087 protected: 00088 OP _op; 00089 S1 * _vec_inout; 00090 const S2 * _vec_in2; 00091 public: 00092 BinaryFunctorAdapter(OP op) : _op(op) {} 00093 inline void setData(S1 *vec_inout, const S2 *vec_in2) { _vec_inout = vec_inout; _vec_in2 = vec_in2; } 00094 inline void execute(int i) { _vec_inout[i] = _op(_vec_inout[i], _vec_in2[i]); } 00095 }; 00096 00098 template <class OP, class S> 00099 class BinaryFunctorAdapterWithAlphaBeta : public StdOpKernel<S> { 00100 protected: 00101 OP _op; 00102 S * _vec_inout; 00103 const S * _vec_in2; 00104 public: 00105 BinaryFunctorAdapterWithAlphaBeta(OP op) : _op(op) {} 00106 inline void setData(S *vec_inout, const S *vec_in2) { _vec_inout = vec_inout; _vec_in2 = vec_in2; } 00107 inline void execute(int i) 00108 { 00109 S res = _op(_vec_inout[i], _vec_in2[i]); 00110 _vec_inout[i] = this->_alpha * res + this->_beta * _vec_inout[i]; 00111 } 00112 }; 00113 00115 template <class OP, class S1, class S2, class S3> 00116 class TertiaryFunctorAdapter { 00117 protected: 00118 OP _op; 00119 S1 * _vec_inout; 00120 const S2 * _vec_in2; 00121 const S3 * _vec_in3; 00122 public: 00123 TertiaryFunctorAdapter(OP op) : _op(op) {} 00124 inline void setData(S1 *vec_inout, const S2 *vec_in2, const S3 *vec_in3) { _vec_inout = vec_inout; _vec_in2 = vec_in2; _vec_in3 = vec_in3; } 00125 inline void execute(int i) { _vec_inout[i] = _op(_vec_inout[i], _vec_in2[i], _vec_in3[i]); } 00126 }; 00127 00129 template <class Glob, class S1, class S2> 00130 class RTIReductionAdapter { 00131 public: 00132 typedef typename Glob::GenOP GenOP; 00133 typedef typename Glob::RedOP RedOP; 00134 typedef typename Glob::IdOP IdOP; 00135 typedef typename RedOP::result_type ReductionType; 00136 protected: 00137 GenOP _genop; 00138 RedOP _redop; 00139 const S1 * _vec_in1; 00140 const S2 * _vec_in2; 00141 public: 00142 RTIReductionAdapter(Glob glob) : _genop(glob.genop), _redop(glob.redop) {} 00143 inline void setData(const S1 *vec_in1, const S2 *vec_in2) { _vec_in1 = vec_in1; _vec_in2 = vec_in2; } 00144 static inline ReductionType identity() { return IdOP::identity(); } 00145 inline ReductionType generate(int i) { return _genop(_vec_in1[i], _vec_in2[i]); } 00146 inline ReductionType reduce(ReductionType a, ReductionType b) { return _redop(a, b); } 00147 }; 00148 00150 template <class Glob, class S1, class S2, class S3> 00151 class RTIReductionAdapter3 { 00152 public: 00153 typedef typename Glob::GenOP GenOP; 00154 typedef typename Glob::RedOP RedOP; 00155 typedef typename Glob::IdOP IdOP; 00156 typedef typename RedOP::result_type ReductionType; 00157 protected: 00158 GenOP _genop; 00159 RedOP _redop; 00160 const S1 * _vec_in1; 00161 const S2 * _vec_in2; 00162 const S3 * _vec_in3; 00163 public: 00164 RTIReductionAdapter3(Glob glob) : _genop(glob.genop), _redop(glob.redop) {} 00165 inline void setData(const S1 *vec_in1, const S2 *vec_in2, const S3 *vec_in3) { _vec_in1 = vec_in1; _vec_in2 = vec_in2; _vec_in3 = vec_in3; } 00166 static inline ReductionType identity() { return IdOP::identity(); } 00167 inline ReductionType generate(int i) { return _genop(_vec_in1[i], _vec_in2[i], _vec_in3[i]); } 00168 inline ReductionType reduce(ReductionType a, ReductionType b) { return _redop(a, b); } 00169 }; 00170 00172 template <class Glob, class S1, class S2> 00173 class RTIPreTransformReductionAdapter { 00174 public: 00175 typedef typename Glob::TOP TOP; 00176 typedef typename Glob::GenOP GenOP; 00177 typedef typename Glob::RedOP RedOP; 00178 typedef typename Glob::IdOP IdOP; 00179 typedef typename RedOP::result_type ReductionType; 00180 protected: 00181 TOP _top; 00182 GenOP _genop; 00183 RedOP _redop; 00184 S1 * _vec_inout; 00185 const S2 * _vec_in2; 00186 public: 00187 RTIPreTransformReductionAdapter(Glob glob) : _top(glob.top), _genop(glob.genop), _redop(glob.redop) {} 00188 inline void setData(S1 *vec_inout, const S2 *vec_in2) { _vec_inout = vec_inout; _vec_in2 = vec_in2; } 00189 static inline ReductionType identity() { return IdOP::identity(); } 00190 inline ReductionType reduce(ReductionType a, ReductionType b) { return _redop(a, b); } 00191 inline ReductionType generate(int i) 00192 { 00193 _vec_inout[i] = _top( _vec_inout[i], _vec_in2[i] ); 00194 return _genop( _vec_inout[i], _vec_in2[i] ); 00195 } 00196 }; 00197 00199 template <class Glob, class S1, class S2, class S3> 00200 class RTIPreTransformReductionAdapter3 { 00201 public: 00202 typedef typename Glob::TOP TOP; 00203 typedef typename Glob::GenOP GenOP; 00204 typedef typename Glob::RedOP RedOP; 00205 typedef typename Glob::IdOP IdOP; 00206 typedef typename RedOP::result_type ReductionType; 00207 protected: 00208 TOP _top; 00209 GenOP _genop; 00210 RedOP _redop; 00211 S1 * _vec_inout; 00212 const S2 * _vec_in2; 00213 const S3 * _vec_in3; 00214 public: 00215 RTIPreTransformReductionAdapter3(Glob glob) : _top(glob.top), _genop(glob.genop), _redop(glob.redop) {} 00216 inline void setData(S1 *vec_inout, const S2 *vec_in2, const S3 *vec_in3) { _vec_inout = vec_inout; _vec_in2 = vec_in2; _vec_in3 = vec_in3; } 00217 static inline ReductionType identity() { return IdOP::identity(); } 00218 inline ReductionType reduce(ReductionType a, ReductionType b) { return _redop(a, b); } 00219 inline ReductionType generate(int i) 00220 { 00221 _vec_inout[i] = _top( _vec_inout[i], _vec_in2[i], _vec_in3[i] ); 00222 return _genop( _vec_inout[i], _vec_in2[i], _vec_in3[i] ); 00223 } 00224 }; 00225 00227 template <class OP> 00228 class TeuchosValueTypeReductionOpAdapter : public Teuchos::ValueTypeReductionOp<int,typename OP::ReductionType> 00229 { 00230 protected: 00231 mutable OP _op; 00232 public: 00233 typedef typename OP::ReductionType Packet; 00234 TeuchosValueTypeReductionOpAdapter(OP op) : _op(op) {} 00235 void reduce(const int count, const Packet inBuffer[], Packet inoutBuffer []) const 00236 { 00237 for (int i=0; i != count; ++i) { 00238 inoutBuffer[i] = _op.reduce( inoutBuffer[i], inBuffer[i] ); 00239 } 00240 } 00241 }; 00242 00244 template <class S, class LO, class GO, class Node, class OP> 00245 void unary_transform(Vector<S,LO,GO,Node> &vec, OP op) 00246 { 00247 Kokkos::MultiVector<S,Node> &mv = vec.getLocalMVNonConst(); 00248 const RCP<Node> node = mv.getNode(); 00249 // ready data 00250 Kokkos::ReadyBufferHelper<Node> rbh(node); 00251 rbh.begin(); 00252 S * out_ptr = rbh.addNonConstBuffer(mv.getValuesNonConst()); 00253 rbh.end(); 00254 op.setData(out_ptr); 00255 const size_t N = mv.getNumRows(); 00256 node->template parallel_for(0, N, op); 00257 } 00258 00260 template <class S1, class S2, class LO, class GO, class Node, class OP> 00261 void binary_transform(Vector<S1,LO,GO,Node> &vec_inout, const Vector<S2,LO,GO,Node> &vec_in2, OP op) 00262 { 00263 Kokkos::MultiVector<S1,Node> &mv_inout = vec_inout.getLocalMVNonConst(); 00264 const Kokkos::MultiVector<S2,Node> &mv_in2 = vec_in2.getLocalMV(); 00265 const RCP<Node> node = mv_inout.getNode(); 00266 // ready data 00267 Kokkos::ReadyBufferHelper<Node> rbh(node); 00268 rbh.begin(); 00269 S1 * out_ptr = rbh.addNonConstBuffer(mv_inout.getValuesNonConst()); 00270 const S2 * in_ptr = rbh.addConstBuffer(mv_in2.getValues()); 00271 rbh.end(); 00272 op.setData(out_ptr, in_ptr); 00273 const size_t N = mv_inout.getNumRows(); 00274 #ifdef HAVE_TPETRA_DEBUG 00275 TEUCHOS_TEST_FOR_EXCEPTION( mv_in2.getNode() != mv_inout.getNode(), std::runtime_error, 00276 "Tpetra::RTI::detail::binary_transform(): multivectors must share the same node."); 00277 #endif 00278 node->template parallel_for(0, N, op); 00279 } 00280 00282 template <class S1, class S2, class S3, class LO, class GO, class Node, class OP> 00283 void tertiary_transform(Vector<S1,LO,GO,Node> &vec_inout, const Vector<S2,LO,GO,Node> &vec_in2, const Vector<S3,LO,GO,Node> &vec_in3, OP op) 00284 { 00285 Kokkos::MultiVector<S1,Node> &mv_inout = vec_inout.getLocalMVNonConst(); 00286 const Kokkos::MultiVector<S2,Node> &mv_in2 = vec_in2.getLocalMV(); 00287 const Kokkos::MultiVector<S3,Node> &mv_in3 = vec_in3.getLocalMV(); 00288 const RCP<Node> node = mv_inout.getNode(); 00289 // ready data 00290 Kokkos::ReadyBufferHelper<Node> rbh(node); 00291 rbh.begin(); 00292 S1 * out_ptr = rbh.addNonConstBuffer(mv_inout.getValuesNonConst()); 00293 const S2 * in_ptr2 = rbh.addConstBuffer(mv_in2.getValues()); 00294 const S3 * in_ptr3 = rbh.addConstBuffer(mv_in3.getValues()); 00295 rbh.end(); 00296 op.setData(out_ptr, in_ptr2, in_ptr3); 00297 const size_t N = mv_inout.getNumRows(); 00298 #ifdef HAVE_TPETRA_DEBUG 00299 TEUCHOS_TEST_FOR_EXCEPTION( mv_in2.getNode() != mv_inout.getNode() || mv_in3.getNode() != mv_in2.getNode(), std::runtime_error, 00300 "Tpetra::RTI::detail::tertiary_transform(): multivectors must share the same node."); 00301 #endif 00302 node->template parallel_for(0, N, op); 00303 } 00304 00306 template <class S1, class S2, class LO, class GO, class Node, class OP> 00307 typename OP::ReductionType 00308 reduce(const Vector<S1,LO,GO,Node> &vec_in1, const Vector<S2,LO,GO,Node> &vec_in2, OP op) 00309 { 00310 const Kokkos::MultiVector<S1,Node> &mv_in1 = vec_in1.getLocalMV(), 00311 &mv_in2 = vec_in2.getLocalMV(); 00312 const RCP<Node> node = mv_in1.getNode(); 00313 const RCP<const Teuchos::Comm<int> > comm = vec_in1.getMap()->getComm(); 00314 // ready data 00315 Kokkos::ReadyBufferHelper<Node> rbh(node); 00316 rbh.begin(); 00317 const S1 * in_ptr1 = rbh.addConstBuffer(mv_in1.getValues()); 00318 const S2 * in_ptr2 = rbh.addConstBuffer(mv_in2.getValues()); 00319 rbh.end(); 00320 op.setData( in_ptr1, in_ptr2 ); 00321 const size_t N = mv_in1.getNumRows(); 00322 #ifdef HAVE_TPETRA_DEBUG 00323 TEUCHOS_TEST_FOR_EXCEPTION( mv_in1.getNode() != mv_in2.getNode(), std::runtime_error, 00324 "Tpetra::RTI::detail::reduce(): multivectors must share the same node."); 00325 #endif 00326 // compute local reduction 00327 typename OP::ReductionType gbl_res, lcl_res; 00328 lcl_res = node->template parallel_reduce(0, N, op); 00329 // compute global reduction 00330 TeuchosValueTypeReductionOpAdapter<OP> vtrop(op); 00331 Teuchos::reduceAll(*comm, vtrop, 1, &lcl_res, &gbl_res); 00332 return gbl_res; 00333 } 00334 00336 template <class S1, class S2, class S3, class LO, class GO, class Node, class OP> 00337 typename OP::ReductionType 00338 reduce(const Vector<S1,LO,GO,Node> &vec_in1, const Vector<S2,LO,GO,Node> &vec_in2, const Vector<S3,LO,GO,Node> &vec_in3, OP op) 00339 { 00340 const Kokkos::MultiVector<S1,Node> &mv_in1 = vec_in1.getLocalMV(); 00341 const Kokkos::MultiVector<S2,Node> &mv_in2 = vec_in2.getLocalMV(); 00342 const Kokkos::MultiVector<S3,Node> &mv_in3 = vec_in3.getLocalMV(); 00343 const RCP<Node> node = mv_in1.getNode(); 00344 const RCP<const Teuchos::Comm<int> > comm = vec_in1.getMap()->getComm(); 00345 // ready data 00346 Kokkos::ReadyBufferHelper<Node> rbh(node); 00347 rbh.begin(); 00348 const S1 * in_ptr1 = rbh.addConstBuffer(mv_in1.getValues()); 00349 const S2 * in_ptr2 = rbh.addConstBuffer(mv_in2.getValues()); 00350 const S3 * in_ptr3 = rbh.addConstBuffer(mv_in3.getValues()); 00351 rbh.end(); 00352 op.setData( in_ptr1, in_ptr2, in_ptr3 ); 00353 const size_t N = mv_in1.getNumRows(); 00354 #ifdef HAVE_TPETRA_DEBUG 00355 TEUCHOS_TEST_FOR_EXCEPTION( mv_in1.getNode() != mv_in2.getNode() || mv_in2.getNode() != mv_in3.getNode(), std::runtime_error, 00356 "Tpetra::RTI::detail::reduce(): multivectors must share the same node."); 00357 #endif 00358 // compute local reduction 00359 typename OP::ReductionType gbl_res, lcl_res; 00360 lcl_res = node->template parallel_reduce(0, N, op); 00361 // compute global reduction 00362 TeuchosValueTypeReductionOpAdapter<OP> vtrop(op); 00363 Teuchos::reduceAll(*comm, vtrop, 1, &lcl_res, &gbl_res); 00364 return gbl_res; 00365 } 00366 00368 template <class S1, class S2, class LO, class GO, class Node, class OP> 00369 typename OP::ReductionType 00370 transform_reduce(Vector<S1,LO,GO,Node> &vec_inout, const Vector<S2,LO,GO,Node> &vec_in2, OP op) 00371 { 00372 Kokkos::MultiVector<S1,Node> &mv_inout = vec_inout.getLocalMVNonConst(); 00373 const Kokkos::MultiVector<S2,Node> &mv_in2 = vec_in2.getLocalMV(); 00374 const RCP<Node> node = mv_inout.getNode(); 00375 const RCP<const Teuchos::Comm<int> > comm = vec_inout.getMap()->getComm(); 00376 // ready data 00377 Kokkos::ReadyBufferHelper<Node> rbh(node); 00378 rbh.begin(); 00379 S1 * in_ptr1 = rbh.addNonConstBuffer(mv_inout.getValuesNonConst()); 00380 const S2 * in_ptr2 = rbh.addConstBuffer(mv_in2.getValues()); 00381 rbh.end(); 00382 op.setData( in_ptr1, in_ptr2 ); 00383 const size_t N = mv_inout.getNumRows(); 00384 #ifdef HAVE_TPETRA_DEBUG 00385 TEUCHOS_TEST_FOR_EXCEPTION( mv_inout.getNode() != mv_in2.getNode(), std::runtime_error, 00386 "Tpetra::RTI::detail::transform_reduce(): multivectors must share the same node."); 00387 #endif 00388 // compute local reduction 00389 typename OP::ReductionType gbl_res, lcl_res; 00390 lcl_res = node->template parallel_reduce(0, N, op); 00391 // compute global reduction 00392 TeuchosValueTypeReductionOpAdapter<OP> vtrop(op); 00393 Teuchos::reduceAll(*comm, vtrop, 1, &lcl_res, &gbl_res); 00394 return gbl_res; 00395 } 00396 00398 template <class S1, class S2, class S3, class LO, class GO, class Node, class OP> 00399 typename OP::ReductionType 00400 transform_reduce(Vector<S1,LO,GO,Node> &vec_inout, const Vector<S2,LO,GO,Node> &vec_in2, const Vector<S3,LO,GO,Node> &vec_in3, OP op) 00401 { 00402 Kokkos::MultiVector<S1,Node> &mv_inout = vec_inout.getLocalMVNonConst(); 00403 const Kokkos::MultiVector<S2,Node> &mv_in2 = vec_in2.getLocalMV(); 00404 const Kokkos::MultiVector<S3,Node> &mv_in3 = vec_in3.getLocalMV(); 00405 const RCP<Node> node = mv_inout.getNode(); 00406 const RCP<const Teuchos::Comm<int> > comm = vec_inout.getMap()->getComm(); 00407 // ready data 00408 Kokkos::ReadyBufferHelper<Node> rbh(node); 00409 rbh.begin(); 00410 S1 * in_ptr1 = rbh.addNonConstBuffer(mv_inout.getValuesNonConst()); 00411 const S2 * in_ptr2 = rbh.addConstBuffer(mv_in2.getValues()); 00412 const S3 * in_ptr3 = rbh.addConstBuffer(mv_in3.getValues()); 00413 rbh.end(); 00414 op.setData( in_ptr1, in_ptr2, in_ptr3 ); 00415 const size_t N = mv_inout.getNumRows(); 00416 #ifdef HAVE_TPETRA_DEBUG 00417 TEUCHOS_TEST_FOR_EXCEPTION( mv_inout.getNode() != mv_in2.getNode() && mv_inout.getNode() != mv_in3.getNode(), std::runtime_error, 00418 "Tpetra::RTI::detail::transform_transform(): multivectors must share the same node."); 00419 #endif 00420 // compute local reduction 00421 typename OP::ReductionType gbl_res, lcl_res; 00422 lcl_res = node->template parallel_reduce(0, N, op); 00423 // compute global reduction 00424 TeuchosValueTypeReductionOpAdapter<OP> vtrop(op); 00425 Teuchos::reduceAll(*comm, vtrop, 1, &lcl_res, &gbl_res); 00426 return gbl_res; 00427 } 00428 00429 } // end of namespace Tpetra::RTI::detail 00430 00431 } // end of namespace Tpetra::RTI 00432 00433 } // end of namespace Tpetra 00434 00435 #endif // TPETRA_RTI_detail_HPP
1.7.4