00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029 #ifndef THYRA_SPMD_MULTI_VECTOR_BASE_HPP
00030 #define THYRA_SPMD_MULTI_VECTOR_BASE_HPP
00031
00032 #include "Thyra_SpmdMultiVectorBaseDecl.hpp"
00033 #include "Thyra_MultiVectorDefaultBase.hpp"
00034 #include "Thyra_SingleScalarEuclideanLinearOpBase.hpp"
00035 #include "Thyra_SpmdVectorSpaceDefaultBase.hpp"
00036 #include "Thyra_DetachedMultiVectorView.hpp"
00037 #include "RTOpPack_SPMD_apply_op.hpp"
00038 #include "RTOp_parallel_helpers.h"
00039 #include "Teuchos_Workspace.hpp"
00040 #include "Teuchos_dyn_cast.hpp"
00041 #include "Teuchos_Time.hpp"
00042 #include "Teuchos_CommHelpers.hpp"
00043
00044
00045
00046
00047 namespace Thyra {
00048
00049 template<class Scalar>
00050 SpmdMultiVectorBase<Scalar>::SpmdMultiVectorBase()
00051 :in_applyOp_(false)
00052 ,globalDim_(0)
00053 ,localOffset_(-1)
00054 ,localSubDim_(0)
00055 ,numCols_(0)
00056 {}
00057
00058
00059
00060 template<class Scalar>
00061 Teuchos::RefCountPtr< const ScalarProdVectorSpaceBase<Scalar> >
00062 SpmdMultiVectorBase<Scalar>::rangeScalarProdVecSpc() const
00063 {
00064 return Teuchos::rcp_dynamic_cast<const ScalarProdVectorSpaceBase<Scalar> >(
00065 spmdSpace(),true
00066 );
00067 }
00068
00069
00070
00071 template<class Scalar>
00072 void SpmdMultiVectorBase<Scalar>::apply(
00073 const ETransp M_trans
00074 ,const MultiVectorBase<Scalar> &X
00075 ,MultiVectorBase<Scalar> *Y
00076 ,const Scalar alpha
00077 ,const Scalar beta
00078 ) const
00079 {
00080 this->single_scalar_euclidean_apply_impl(M_trans,X,Y,alpha,beta);
00081 }
00082
00083
00084
00085 template<class Scalar>
00086 void SpmdMultiVectorBase<Scalar>::applyOp(
00087 const RTOpPack::RTOpT<Scalar> &pri_op
00088 ,const int num_multi_vecs
00089 ,const MultiVectorBase<Scalar>*const multi_vecs[]
00090 ,const int num_targ_multi_vecs
00091 ,MultiVectorBase<Scalar>*const targ_multi_vecs[]
00092 ,RTOpPack::ReductTarget*const reduct_objs[]
00093 ,const Index pri_first_ele_offset_in
00094 ,const Index pri_sub_dim_in
00095 ,const Index pri_global_offset_in
00096 ,const Index sec_first_ele_offset_in
00097 ,const Index sec_sub_dim_in
00098 ) const
00099 {
00100 using Teuchos::dyn_cast;
00101 using Teuchos::Workspace;
00102 Teuchos::WorkspaceStore* wss = Teuchos::get_default_workspace_store().get();
00103 const Index numCols = this->domain()->dim();
00104 const SpmdVectorSpaceBase<Scalar> &spmdSpc = *spmdSpace();
00105 #ifdef TEUCHOS_DEBUG
00106 TEST_FOR_EXCEPTION(
00107 in_applyOp_, std::invalid_argument
00108 ,"SpmdMultiVectorBase<>::applyOp(...): Error, this method is being entered recursively which is a "
00109 "clear sign that one of the methods acquireDetachedView(...), releaseDetachedView(...) or commitDetachedView(...) "
00110 "was not implemented properly!"
00111 );
00112 apply_op_validate_input(
00113 "SpmdMultiVectorBase<>::applyOp(...)", *this->domain(), *this->range()
00114 ,pri_op,num_multi_vecs,multi_vecs,num_targ_multi_vecs,targ_multi_vecs
00115 ,reduct_objs,pri_first_ele_offset_in,pri_sub_dim_in,pri_global_offset_in
00116 ,sec_first_ele_offset_in,sec_sub_dim_in
00117 );
00118 #endif
00119
00120 in_applyOp_ = true;
00121
00122
00123 const bool locallyReplicated = (localSubDim_ == globalDim_);
00124
00125
00126 Teuchos_Index overlap_first_local_ele_off = 0;
00127 Teuchos_Index overlap_local_sub_dim = 0;
00128 Teuchos_Index overlap_global_offset = 0;
00129 RTOp_parallel_calc_overlap(
00130 globalDim_, localSubDim_, localOffset_, pri_first_ele_offset_in, pri_sub_dim_in, pri_global_offset_in
00131 ,&overlap_first_local_ele_off, &overlap_local_sub_dim, &overlap_global_offset
00132 );
00133 const Range1D
00134 local_rng = (
00135 overlap_first_local_ele_off>=0
00136 ? Range1D( localOffset_+overlap_first_local_ele_off, localOffset_+overlap_first_local_ele_off+overlap_local_sub_dim-1 )
00137 : Range1D::Invalid
00138 ),
00139 col_rng(
00140 sec_first_ele_offset_in
00141 ,sec_sub_dim_in >= 0 ? sec_first_ele_offset_in+sec_sub_dim_in-1 : numCols-1
00142 );
00143
00144 Workspace<RTOpPack::ConstSubMultiVectorView<Scalar> > sub_multi_vecs(wss,num_multi_vecs);
00145 Workspace<RTOpPack::SubMultiVectorView<Scalar> > targ_sub_multi_vecs(wss,num_targ_multi_vecs);
00146 if( overlap_first_local_ele_off >= 0 ) {
00147 for(int k = 0; k < num_multi_vecs; ++k ) {
00148 multi_vecs[k]->acquireDetachedView( local_rng, col_rng, &sub_multi_vecs[k] );
00149 sub_multi_vecs[k].setGlobalOffset( overlap_global_offset );
00150 }
00151 for(int k = 0; k < num_targ_multi_vecs; ++k ) {
00152 targ_multi_vecs[k]->acquireDetachedView( local_rng, col_rng, &targ_sub_multi_vecs[k] );
00153 targ_sub_multi_vecs[k].setGlobalOffset( overlap_global_offset );
00154 }
00155 }
00156
00157 RTOpPack::SPMD_apply_op(
00158 locallyReplicated ? NULL : spmdSpc.getComm().get()
00159 ,pri_op
00160 ,col_rng.size()
00161 ,num_multi_vecs
00162 ,num_multi_vecs && overlap_first_local_ele_off>=0 ? &sub_multi_vecs[0] : NULL
00163 ,num_targ_multi_vecs
00164 ,num_targ_multi_vecs && overlap_first_local_ele_off>=0 ? &targ_sub_multi_vecs[0] : NULL
00165 ,reduct_objs
00166 );
00167
00168 if( overlap_first_local_ele_off >= 0 ) {
00169 for(int k = 0; k < num_multi_vecs; ++k ) {
00170 sub_multi_vecs[k].setGlobalOffset(local_rng.lbound());
00171 multi_vecs[k]->releaseDetachedView( &sub_multi_vecs[k] );
00172 }
00173 for(int k = 0; k < num_targ_multi_vecs; ++k ) {
00174 targ_sub_multi_vecs[k].setGlobalOffset(local_rng.lbound());
00175 targ_multi_vecs[k]->commitDetachedView( &targ_sub_multi_vecs[k] );
00176 }
00177 }
00178
00179 in_applyOp_ = false;
00180 }
00181
00182 template<class Scalar>
00183 void SpmdMultiVectorBase<Scalar>::acquireDetachedView(
00184 const Range1D &rowRng_in
00185 ,const Range1D &colRng_in
00186 ,RTOpPack::ConstSubMultiVectorView<Scalar> *sub_mv
00187 ) const
00188 {
00189 const Range1D rowRng = validateRowRange(rowRng_in);
00190 const Range1D colRng = validateColRange(colRng_in);
00191 if( rowRng.lbound() < localOffset_ || localOffset_+localSubDim_-1 < rowRng.ubound() ) {
00192
00193 MultiVectorDefaultBase<Scalar>::acquireDetachedView(rowRng_in,colRng_in,sub_mv);
00194 return;
00195 }
00196
00197 const Scalar *localValues = NULL;
00198 int leadingDim = 0;
00199 this->getLocalData(&localValues,&leadingDim);
00200 sub_mv->initialize(
00201 rowRng.lbound()
00202 ,rowRng.size()
00203 ,colRng.lbound()
00204 ,colRng.size()
00205 ,localValues
00206 +(rowRng.lbound()-localOffset_)
00207 +colRng.lbound()*leadingDim
00208 ,leadingDim
00209 );
00210 }
00211
00212 template<class Scalar>
00213 void SpmdMultiVectorBase<Scalar>::releaseDetachedView(
00214 RTOpPack::ConstSubMultiVectorView<Scalar>* sub_mv
00215 ) const
00216 {
00217 if( sub_mv->globalOffset() < localOffset_ || localOffset_+localSubDim_ < sub_mv->globalOffset()+sub_mv->subDim() ) {
00218
00219 MultiVectorDefaultBase<Scalar>::releaseDetachedView(sub_mv);
00220 return;
00221 }
00222 freeLocalData( sub_mv->values() );
00223 sub_mv->set_uninitialized();
00224 }
00225
00226 template<class Scalar>
00227 void SpmdMultiVectorBase<Scalar>::acquireDetachedView(
00228 const Range1D &rowRng_in
00229 ,const Range1D &colRng_in
00230 ,RTOpPack::SubMultiVectorView<Scalar> *sub_mv
00231 )
00232 {
00233 const Range1D rowRng = validateRowRange(rowRng_in);
00234 const Range1D colRng = validateColRange(colRng_in);
00235 if( rowRng.lbound() < localOffset_ || localOffset_+localSubDim_-1 < rowRng.ubound() ) {
00236
00237 MultiVectorDefaultBase<Scalar>::acquireDetachedView(rowRng_in,colRng_in,sub_mv);
00238 return;
00239 }
00240
00241 Scalar *localValues = NULL;
00242 int leadingDim = 0;
00243 this->getLocalData(&localValues,&leadingDim);
00244 sub_mv->initialize(
00245 rowRng.lbound()
00246 ,rowRng.size()
00247 ,colRng.lbound()
00248 ,colRng.size()
00249 ,localValues
00250 +(rowRng.lbound()-localOffset_)
00251 +colRng.lbound()*leadingDim
00252 ,leadingDim
00253 );
00254 }
00255
00256 template<class Scalar>
00257 void SpmdMultiVectorBase<Scalar>::commitDetachedView(
00258 RTOpPack::SubMultiVectorView<Scalar>* sub_mv
00259 )
00260 {
00261 if( sub_mv->globalOffset() < localOffset_ || localOffset_+localSubDim_ < sub_mv->globalOffset()+sub_mv->subDim() ) {
00262
00263 MultiVectorDefaultBase<Scalar>::commitDetachedView(sub_mv);
00264 return;
00265 }
00266 commitLocalData( sub_mv->values() );
00267 sub_mv->set_uninitialized();
00268 }
00269
00270
00271
00272 template<class Scalar>
00273 void SpmdMultiVectorBase<Scalar>::euclideanApply(
00274 const ETransp M_trans
00275 ,const MultiVectorBase<Scalar> &X
00276 ,MultiVectorBase<Scalar> *Y
00277 ,const Scalar alpha
00278 ,const Scalar beta
00279 ) const
00280 {
00281 typedef Teuchos::ScalarTraits<Scalar> ST;
00282 using Teuchos::Workspace;
00283 Teuchos::WorkspaceStore* wss = Teuchos::get_default_workspace_store().get();
00284
00285 #ifdef THYRA_SPMD_MULTI_VECTOR_BASE_PRINT_TIMES
00286 Teuchos::Time timerTotal("dummy",true);
00287 Teuchos::Time timer("dummy");
00288 #endif
00289
00290
00291
00292
00293
00294
00295
00296
00297
00298
00299
00300
00301
00302
00303
00304
00305
00306
00307
00308
00309
00310
00311
00312
00313
00314
00315 const SpmdVectorSpaceBase<Scalar> &spmdSpc = *this->spmdSpace();
00316
00317
00318 const Teuchos::RefCountPtr<const Teuchos::Comm<Index> >
00319 comm = spmdSpc.getComm();
00320 #ifdef TEUCHOS_DEBUG
00321 const VectorSpaceBase<Scalar>
00322 &Y_range = *Y->range(),
00323 &X_range = *X.range();
00324
00325 TEST_FOR_EXCEPTION(
00326 ( globalDim_ > localSubDim_ ) && comm.get()==NULL, std::logic_error
00327 ,"SpmdMultiVectorBase<Scalar>::apply(...MultiVectorBase<Scalar>...): Error!"
00328 );
00329
00330
00331
00332 TEST_FOR_EXCEPTION(
00333 real_trans(M_trans)==NOTRANS && !spmdSpc.isCompatible(Y_range), Exceptions::IncompatibleVectorSpaces
00334 ,"SpmdMultiVectorBase<Scalar>::apply(...MultiVectorBase<Scalar>...): Error!"
00335 );
00336 TEST_FOR_EXCEPTION(
00337 real_trans(M_trans)==TRANS && !spmdSpc.isCompatible(X_range), Exceptions::IncompatibleVectorSpaces
00338 ,"SpmdMultiVectorBase<Scalar>::apply(...MultiVectorBase<Scalar>...): Error!"
00339 );
00340 #endif
00341
00342
00343
00344
00345
00346 #ifdef THYRA_SPMD_MULTI_VECTOR_BASE_PRINT_TIMES
00347 timer.start();
00348 #endif
00349
00350 DetachedMultiVectorView<Scalar>
00351 Y_local(
00352 *Y
00353 ,real_trans(M_trans)==NOTRANS ? Range1D(localOffset_,localOffset_+localSubDim_-1) : Range1D()
00354 ,Range1D()
00355 );
00356 ConstDetachedMultiVectorView<Scalar>
00357 M_local(
00358 *this
00359 ,Range1D(localOffset_,localOffset_+localSubDim_-1)
00360 ,Range1D()
00361 );
00362 ConstDetachedMultiVectorView<Scalar>
00363 X_local(
00364 X
00365 ,real_trans(M_trans)==NOTRANS ? Range1D() : Range1D(localOffset_,localOffset_+localSubDim_-1)
00366 ,Range1D()
00367 );
00368 #ifdef THYRA_SPMD_MULTI_VECTOR_BASE_PRINT_TIMES
00369 timer.stop();
00370 std::cout << "\nSpmdMultiVectorBase<Scalar>::apply(...): Time for getting view = " << timer.totalElapsedTime() << " seconds\n";
00371 #endif
00372 #ifdef TEUCHOS_DEBUG
00373 TEST_FOR_EXCEPTION(
00374 real_trans(M_trans)==NOTRANS && ( M_local.numSubCols() != X_local.subDim() || X_local.numSubCols() != Y_local.numSubCols() )
00375 , Exceptions::IncompatibleVectorSpaces
00376 ,"SpmdMultiVectorBase<Scalar>::apply(...MultiVectorBase<Scalar>...): Error!"
00377 );
00378 TEST_FOR_EXCEPTION(
00379 real_trans(M_trans)==TRANS && ( M_local.subDim() != X_local.subDim() || X_local.numSubCols() != Y_local.numSubCols() )
00380 , Exceptions::IncompatibleVectorSpaces
00381 ,"SpmdMultiVectorBase<Scalar>::apply(...MultiVectorBase<Scalar>...): Error!"
00382 );
00383 #endif
00384
00385
00386
00387
00388
00389
00390
00391
00392
00393
00394
00395
00396
00397
00398
00399
00400
00401
00402
00403
00404
00405
00406
00407
00408
00409
00410 #ifdef THYRA_SPMD_MULTI_VECTOR_BASE_PRINT_TIMES
00411 timer.start();
00412 #endif
00413
00414 Workspace<Scalar> Y_local_tmp_store(wss,Y_local.subDim()*Y_local.numSubCols(),false);
00415 RTOpPack::SubMultiVectorView<Scalar> Y_local_tmp;
00416 Scalar localBeta;
00417 if( real_trans(M_trans) == TRANS && globalDim_ > localSubDim_ ) {
00418
00419 Y_local_tmp.initialize(
00420 0, Y_local.subDim()
00421 ,0, Y_local.numSubCols()
00422 ,&Y_local_tmp_store[0], Y_local.subDim()
00423 );
00424 if( localOffset_ == 0 ) {
00425
00426 for( int j = 0; j < Y_local.numSubCols(); ++j ) {
00427 Scalar *Y_local_j = Y_local.values() + Y_local.leadingDim()*j;
00428 std::copy( Y_local_j, Y_local_j + Y_local.subDim(), Y_local_tmp.values() + Y_local_tmp.leadingDim()*j );
00429 }
00430 localBeta = beta;
00431 }
00432 else {
00433
00434 localBeta = 0.0;
00435 }
00436 }
00437 else {
00438
00439 Y_local_tmp = Y_local.smv();
00440 localBeta = beta;
00441 }
00442
00443 #ifdef THYRA_SPMD_MULTI_VECTOR_BASE_PRINT_TIMES
00444 timer.stop();
00445 std::cout << "\nSpmdMultiVectorBase<Scalar>::apply(...): Time for setting up Y_local_tmp and localBeta = " << timer.totalElapsedTime() << " seconds\n";
00446 #endif
00447
00448
00449
00450
00451
00452
00453
00454
00455
00456
00457
00458 #ifdef THYRA_SPMD_MULTI_VECTOR_BASE_PRINT_TIMES
00459 timer.start();
00460 #endif
00461 Teuchos::ETransp t_transp;
00462 if(ST::isComplex) {
00463 switch(M_trans) {
00464 case NOTRANS: t_transp = Teuchos::NO_TRANS; break;
00465 case TRANS: t_transp = Teuchos::TRANS; break;
00466 case CONJTRANS: t_transp = Teuchos::CONJ_TRANS; break;
00467 default: TEST_FOR_EXCEPT(true);
00468 }
00469 }
00470 else {
00471 switch(real_trans(M_trans)) {
00472 case NOTRANS: t_transp = Teuchos::NO_TRANS; break;
00473 case TRANS: t_transp = Teuchos::TRANS; break;
00474 default: TEST_FOR_EXCEPT(true);
00475 }
00476 }
00477 blas_.GEMM(
00478 t_transp
00479 ,Teuchos::NO_TRANS
00480 ,Y_local.subDim()
00481 ,Y_local.numSubCols()
00482 ,real_trans(M_trans)==NOTRANS ? M_local.numSubCols() : M_local.subDim()
00483 ,alpha
00484 ,const_cast<Scalar*>(M_local.values())
00485 ,M_local.leadingDim()
00486 ,const_cast<Scalar*>(X_local.values())
00487 ,X_local.leadingDim()
00488 ,localBeta
00489 ,Y_local_tmp.values()
00490 ,Y_local_tmp.leadingDim()
00491 );
00492 #ifdef THYRA_SPMD_MULTI_VECTOR_BASE_PRINT_TIMES
00493 timer.stop();
00494 std::cout
00495 << "\nSpmdMultiVectorBase<Scalar>::apply(...): Time for GEMM = "
00496 << timer.totalElapsedTime() << " seconds\n";
00497 #endif
00498
00499 if( comm.get() ) {
00500
00501
00502
00503
00504
00505 if( real_trans(M_trans)==TRANS && globalDim_ > localSubDim_ ) {
00506
00507 Workspace<Scalar> Y_local_final_buff(wss,Y_local.subDim()*Y_local.numSubCols(),false);
00508
00509 Teuchos::reduceAll<Index,Scalar>(
00510 *comm,Teuchos::REDUCE_SUM,Y_local_final_buff.size(),Y_local_tmp.values()
00511 ,&Y_local_final_buff[0]
00512 );
00513
00514 const Scalar *Y_local_final_buff_ptr = &Y_local_final_buff[0];
00515 for( int j = 0; j < Y_local.numSubCols(); ++j ) {
00516 Scalar *Y_local_ptr = Y_local.values() + Y_local.leadingDim()*j;
00517 for( int i = 0; i < Y_local.subDim(); ++i ) {
00518 (*Y_local_ptr++) = (*Y_local_final_buff_ptr++);
00519 }
00520 }
00521 }
00522 }
00523 else {
00524
00525
00526
00527
00528 }
00529
00530 #ifdef THYRA_SPMD_MULTI_VECTOR_BASE_PRINT_TIMES
00531 timer.stop();
00532 std::cout
00533 << "\nSpmdMultiVectorBase<Scalar>::apply(...): Total time = "
00534 << timerTotal.totalElapsedTime() << " seconds\n";
00535 #endif
00536
00537 }
00538
00539
00540
00541 template<class Scalar>
00542 bool SpmdMultiVectorBase<Scalar>::opSupported(ETransp M_trans) const
00543 {
00544 typedef Teuchos::ScalarTraits<Scalar> ST;
00545 return ( ST::isComplex ? ( M_trans!=CONJ ) : true );
00546 }
00547
00548 template<class Scalar>
00549 void SpmdMultiVectorBase<Scalar>::updateSpmdSpace()
00550 {
00551 if(globalDim_ == 0) {
00552 const SpmdVectorSpaceBase<Scalar> *spmdSpace = this->spmdSpace().get();
00553 if(spmdSpace) {
00554 globalDim_ = spmdSpace->dim();
00555 localOffset_ = spmdSpace->localOffset();
00556 localSubDim_ = spmdSpace->localSubDim();
00557 numCols_ = this->domain()->dim();
00558 }
00559 else {
00560 globalDim_ = 0;
00561 localOffset_ = -1;
00562 localSubDim_ = 0;
00563 numCols_ = 0;
00564 }
00565 }
00566 }
00567
00568 template<class Scalar>
00569 Range1D SpmdMultiVectorBase<Scalar>::validateRowRange( const Range1D &rowRng_in ) const
00570 {
00571 const Range1D rowRng = Teuchos::full_range(rowRng_in,0,globalDim_-1);
00572 #ifdef TEUCHOS_DEBUG
00573 TEST_FOR_EXCEPTION(
00574 !( 0 <= rowRng.lbound() && rowRng.ubound() < globalDim_ ), std::invalid_argument
00575 ,"SpmdMultiVectorBase<Scalar>::validateRowRange(rowRng): Error, the range rowRng = ["
00576 <<rowRng.lbound()<<","<<rowRng.ubound()<<"] is not "
00577 "in the range [0,"<<(globalDim_-1)<<"]!"
00578 );
00579 #endif
00580 return rowRng;
00581 }
00582
00583 template<class Scalar>
00584 Range1D SpmdMultiVectorBase<Scalar>::validateColRange( const Range1D &colRng_in ) const
00585 {
00586 const Range1D colRng = Teuchos::full_range(colRng_in,0,numCols_-1);
00587 #ifdef TEUCHOS_DEBUG
00588 TEST_FOR_EXCEPTION(
00589 !(0 <= colRng.lbound() && colRng.ubound() < numCols_), std::invalid_argument
00590 ,"SpmdMultiVectorBase<Scalar>::validateColRange(colRng): Error, the range colRng = ["
00591 <<colRng.lbound()<<","<<colRng.ubound()<<"] is not "
00592 "in the range [0,"<<(numCols_-1)<<"]!"
00593 );
00594 #endif
00595 return colRng;
00596 }
00597
00598 }
00599
00600 #endif // THYRA_SPMD_MULTI_VECTOR_BASE_HPP