00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029 #ifndef THYRA_SPMD_MULTI_VECTOR_BASE_HPP
00030 #define THYRA_SPMD_MULTI_VECTOR_BASE_HPP
00031
00032
00033 #include "Thyra_SpmdMultiVectorBaseDecl.hpp"
00034 #include "Thyra_MultiVectorDefaultBase.hpp"
00035 #include "Thyra_SingleScalarEuclideanLinearOpBase.hpp"
00036 #include "Thyra_SpmdVectorSpaceDefaultBase.hpp"
00037 #include "Thyra_DetachedMultiVectorView.hpp"
00038 #include "RTOpPack_SPMD_apply_op.hpp"
00039 #include "RTOp_parallel_helpers.h"
00040 #include "Teuchos_Workspace.hpp"
00041 #include "Teuchos_dyn_cast.hpp"
00042 #include "Teuchos_Time.hpp"
00043 #include "Teuchos_CommHelpers.hpp"
00044
00045
00046
00047
00048
00049
00050 namespace Thyra {
00051
00052
00053 template<class Scalar>
00054 SpmdMultiVectorBase<Scalar>::SpmdMultiVectorBase()
00055 :in_applyOp_(false)
00056 ,globalDim_(0)
00057 ,localOffset_(-1)
00058 ,localSubDim_(0)
00059 ,numCols_(0)
00060 ,nonconstLocalValuesViewPtr_(0)
00061 ,localValuesViewPtr_(0)
00062 {}
00063
00064
00065
00066
00067
00068 template<class Scalar>
00069 Teuchos::RCP< const ScalarProdVectorSpaceBase<Scalar> >
00070 SpmdMultiVectorBase<Scalar>::rangeScalarProdVecSpc() const
00071 {
00072 return Teuchos::rcp_dynamic_cast<const ScalarProdVectorSpaceBase<Scalar> >(
00073 spmdSpace(),true
00074 );
00075 }
00076
00077
00078
00079
00080
00081 template<class Scalar>
00082 void SpmdMultiVectorBase<Scalar>::apply(
00083 const ETransp M_trans
00084 ,const MultiVectorBase<Scalar> &X
00085 ,MultiVectorBase<Scalar> *Y
00086 ,const Scalar alpha
00087 ,const Scalar beta
00088 ) const
00089 {
00090 this->single_scalar_euclidean_apply_impl(M_trans,X,Y,alpha,beta);
00091 }
00092
00093
00094
00095
00096
00097 template<class Scalar>
00098 void SpmdMultiVectorBase<Scalar>::mvMultiReductApplyOpImpl(
00099 const RTOpPack::RTOpT<Scalar> &pri_op
00100 ,const int num_multi_vecs
00101 ,const MultiVectorBase<Scalar>*const multi_vecs[]
00102 ,const int num_targ_multi_vecs
00103 ,MultiVectorBase<Scalar>*const targ_multi_vecs[]
00104 ,RTOpPack::ReductTarget*const reduct_objs[]
00105 ,const Index pri_first_ele_offset_in
00106 ,const Index pri_sub_dim_in
00107 ,const Index pri_global_offset_in
00108 ,const Index sec_first_ele_offset_in
00109 ,const Index sec_sub_dim_in
00110 ) const
00111 {
00112 using Teuchos::dyn_cast;
00113 using Teuchos::Workspace;
00114 Teuchos::WorkspaceStore* wss = Teuchos::get_default_workspace_store().get();
00115 const Index numCols = this->domain()->dim();
00116 const SpmdVectorSpaceBase<Scalar> &spmdSpc = *spmdSpace();
00117 #ifdef TEUCHOS_DEBUG
00118 TEST_FOR_EXCEPTION(
00119 in_applyOp_, std::invalid_argument
00120 ,"SpmdMultiVectorBase<>::mvMultiReductApplyOpImpl(...): Error, this method is being entered recursively which is a "
00121 "clear sign that one of the methods acquireDetachedView(...), releaseDetachedView(...) or commitDetachedView(...) "
00122 "was not implemented properly!"
00123 );
00124 apply_op_validate_input(
00125 "SpmdMultiVectorBase<>::mvMultiReductApplyOpImpl(...)", *this->domain(), *this->range()
00126 ,pri_op,num_multi_vecs,multi_vecs,num_targ_multi_vecs,targ_multi_vecs
00127 ,reduct_objs,pri_first_ele_offset_in,pri_sub_dim_in,pri_global_offset_in
00128 ,sec_first_ele_offset_in,sec_sub_dim_in
00129 );
00130 #endif
00131
00132 in_applyOp_ = true;
00133
00134
00135 const bool locallyReplicated = (localSubDim_ == globalDim_);
00136
00137
00138 Teuchos_Index overlap_first_local_ele_off = 0;
00139 Teuchos_Index overlap_local_sub_dim = 0;
00140 Teuchos_Index overlap_global_offset = 0;
00141 RTOp_parallel_calc_overlap(
00142 globalDim_, localSubDim_, localOffset_, pri_first_ele_offset_in, pri_sub_dim_in, pri_global_offset_in
00143 ,&overlap_first_local_ele_off, &overlap_local_sub_dim, &overlap_global_offset
00144 );
00145 const Range1D
00146 local_rng = (
00147 overlap_first_local_ele_off>=0
00148 ? Range1D( localOffset_+overlap_first_local_ele_off, localOffset_+overlap_first_local_ele_off+overlap_local_sub_dim-1 )
00149 : Range1D::Invalid
00150 ),
00151 col_rng(
00152 sec_first_ele_offset_in
00153 ,sec_sub_dim_in >= 0 ? sec_first_ele_offset_in+sec_sub_dim_in-1 : numCols-1
00154 );
00155
00156 Workspace<RTOpPack::ConstSubMultiVectorView<Scalar> > sub_multi_vecs(wss,num_multi_vecs);
00157 Workspace<RTOpPack::SubMultiVectorView<Scalar> > targ_sub_multi_vecs(wss,num_targ_multi_vecs);
00158 if( overlap_first_local_ele_off >= 0 ) {
00159 for(int k = 0; k < num_multi_vecs; ++k ) {
00160 multi_vecs[k]->acquireDetachedView( local_rng, col_rng, &sub_multi_vecs[k] );
00161 sub_multi_vecs[k].setGlobalOffset( overlap_global_offset );
00162 }
00163 for(int k = 0; k < num_targ_multi_vecs; ++k ) {
00164 targ_multi_vecs[k]->acquireDetachedView( local_rng, col_rng, &targ_sub_multi_vecs[k] );
00165 targ_sub_multi_vecs[k].setGlobalOffset( overlap_global_offset );
00166 }
00167 }
00168
00169 RTOpPack::SPMD_apply_op(
00170 locallyReplicated ? NULL : spmdSpc.getComm().get()
00171 ,pri_op
00172 ,col_rng.size()
00173 ,num_multi_vecs
00174 ,num_multi_vecs && overlap_first_local_ele_off>=0 ? &sub_multi_vecs[0] : NULL
00175 ,num_targ_multi_vecs
00176 ,num_targ_multi_vecs && overlap_first_local_ele_off>=0 ? &targ_sub_multi_vecs[0] : NULL
00177 ,reduct_objs
00178 );
00179
00180 if( overlap_first_local_ele_off >= 0 ) {
00181 for(int k = 0; k < num_multi_vecs; ++k ) {
00182 sub_multi_vecs[k].setGlobalOffset(local_rng.lbound());
00183 multi_vecs[k]->releaseDetachedView( &sub_multi_vecs[k] );
00184 }
00185 for(int k = 0; k < num_targ_multi_vecs; ++k ) {
00186 targ_sub_multi_vecs[k].setGlobalOffset(local_rng.lbound());
00187 targ_multi_vecs[k]->commitDetachedView( &targ_sub_multi_vecs[k] );
00188 }
00189 }
00190
00191 in_applyOp_ = false;
00192 }
00193
00194
00195 template<class Scalar>
00196 void SpmdMultiVectorBase<Scalar>::acquireDetachedMultiVectorViewImpl(
00197 const Range1D &rowRng_in,
00198 const Range1D &colRng_in,
00199 RTOpPack::ConstSubMultiVectorView<Scalar> *sub_mv
00200 ) const
00201 {
00202 const Range1D rowRng = validateRowRange(rowRng_in);
00203 const Range1D colRng = validateColRange(colRng_in);
00204 if( rowRng.lbound() < localOffset_ || localOffset_+localSubDim_-1 < rowRng.ubound() ) {
00205
00206 MultiVectorDefaultBase<Scalar>::acquireDetachedMultiVectorViewImpl(
00207 rowRng_in,colRng_in,sub_mv
00208 );
00209 return;
00210 }
00211
00212
00213
00214
00215
00216
00217
00218
00219
00220
00221
00222
00223 const Scalar *localValues = NULL;
00224 int leadingDim = 0;
00225 this->getLocalData(&localValues,&leadingDim);
00226 localValuesViewPtr_ = localValues;
00227 sub_mv->initialize(
00228 rowRng.lbound()
00229 ,rowRng.size()
00230 ,colRng.lbound()
00231 ,colRng.size()
00232 ,localValues
00233 +(rowRng.lbound()-localOffset_)
00234 +colRng.lbound()*leadingDim
00235 ,leadingDim
00236 );
00237 }
00238
00239
00240 template<class Scalar>
00241 void SpmdMultiVectorBase<Scalar>::releaseDetachedMultiVectorViewImpl(
00242 RTOpPack::ConstSubMultiVectorView<Scalar>* sub_mv
00243 ) const
00244 {
00245 if(
00246 sub_mv->globalOffset() < localOffset_
00247 ||
00248 localOffset_+localSubDim_ < sub_mv->globalOffset()+sub_mv->subDim()
00249 )
00250 {
00251
00252 MultiVectorDefaultBase<Scalar>::releaseDetachedMultiVectorViewImpl(sub_mv);
00253 return;
00254 }
00255
00256
00257
00258
00259
00260
00261
00262
00263 sub_mv->set_uninitialized();
00264 }
00265
00266
00267 template<class Scalar>
00268 void SpmdMultiVectorBase<Scalar>::acquireNonconstDetachedMultiVectorViewImpl(
00269 const Range1D &rowRng_in,
00270 const Range1D &colRng_in,
00271 RTOpPack::SubMultiVectorView<Scalar> *sub_mv
00272 )
00273 {
00274 const Range1D rowRng = validateRowRange(rowRng_in);
00275 const Range1D colRng = validateColRange(colRng_in);
00276 if(
00277 rowRng.lbound() < localOffset_
00278 ||
00279 localOffset_+localSubDim_-1 < rowRng.ubound()
00280 )
00281 {
00282
00283 MultiVectorDefaultBase<Scalar>::acquireNonconstDetachedMultiVectorViewImpl(
00284 rowRng_in, colRng_in, sub_mv
00285 );
00286 return;
00287 }
00288
00289
00290
00291
00292
00293
00294
00295
00296 Scalar *localValues = NULL;
00297 int leadingDim = 0;
00298 this->getLocalData(&localValues,&leadingDim);
00299 nonconstLocalValuesViewPtr_ = localValues;
00300 sub_mv->initialize(
00301 rowRng.lbound()
00302 ,rowRng.size()
00303 ,colRng.lbound()
00304 ,colRng.size()
00305 ,localValues
00306 +(rowRng.lbound()-localOffset_)
00307 +colRng.lbound()*leadingDim
00308 ,leadingDim
00309 );
00310 }
00311
00312
00313 template<class Scalar>
00314 void SpmdMultiVectorBase<Scalar>::commitNonconstDetachedMultiVectorViewImpl(
00315 RTOpPack::SubMultiVectorView<Scalar>* sub_mv
00316 )
00317 {
00318 if(
00319 sub_mv->globalOffset() < localOffset_
00320 ||
00321 localOffset_+localSubDim_ < sub_mv->globalOffset()+sub_mv->subDim()
00322 )
00323 {
00324
00325 MultiVectorDefaultBase<Scalar>::commitNonconstDetachedMultiVectorViewImpl(sub_mv);
00326 return;
00327 }
00328
00329
00330
00331
00332
00333
00334
00335
00336 sub_mv->set_uninitialized();
00337 }
00338
00339
00340
00341
00342
00343 template<class Scalar>
00344 void SpmdMultiVectorBase<Scalar>::euclideanApply(
00345 const ETransp M_trans
00346 ,const MultiVectorBase<Scalar> &X
00347 ,MultiVectorBase<Scalar> *Y
00348 ,const Scalar alpha
00349 ,const Scalar beta
00350 ) const
00351 {
00352 typedef Teuchos::ScalarTraits<Scalar> ST;
00353 using Teuchos::Workspace;
00354 Teuchos::WorkspaceStore* wss = Teuchos::get_default_workspace_store().get();
00355
00356 #ifdef THYRA_SPMD_MULTI_VECTOR_BASE_PRINT_TIMES
00357 Teuchos::Time timerTotal("dummy",true);
00358 Teuchos::Time timer("dummy");
00359 #endif
00360
00361
00362
00363
00364
00365
00366
00367
00368
00369
00370
00371
00372
00373
00374
00375
00376
00377
00378
00379
00380
00381
00382
00383
00384
00385
00386 const SpmdVectorSpaceBase<Scalar> &spmdSpc = *this->spmdSpace();
00387
00388
00389 const Teuchos::RCP<const Teuchos::Comm<Index> >
00390 comm = spmdSpc.getComm();
00391 #ifdef TEUCHOS_DEBUG
00392 const VectorSpaceBase<Scalar>
00393 &Y_range = *Y->range(),
00394 &X_range = *X.range();
00395
00396 TEST_FOR_EXCEPTION(
00397 ( globalDim_ > localSubDim_ ) && comm.get()==NULL, std::logic_error
00398 ,"SpmdMultiVectorBase<Scalar>::apply(...MultiVectorBase<Scalar>...): Error!"
00399 );
00400
00401
00402
00403 TEST_FOR_EXCEPTION(
00404 real_trans(M_trans)==NOTRANS && !spmdSpc.isCompatible(Y_range), Exceptions::IncompatibleVectorSpaces
00405 ,"SpmdMultiVectorBase<Scalar>::apply(...MultiVectorBase<Scalar>...): Error!"
00406 );
00407 TEST_FOR_EXCEPTION(
00408 real_trans(M_trans)==TRANS && !spmdSpc.isCompatible(X_range), Exceptions::IncompatibleVectorSpaces
00409 ,"SpmdMultiVectorBase<Scalar>::apply(...MultiVectorBase<Scalar>...): Error!"
00410 );
00411 #endif
00412
00413
00414
00415
00416
00417 #ifdef THYRA_SPMD_MULTI_VECTOR_BASE_PRINT_TIMES
00418 timer.start();
00419 #endif
00420
00421 DetachedMultiVectorView<Scalar>
00422 Y_local(
00423 *Y
00424 ,real_trans(M_trans)==NOTRANS ? Range1D(localOffset_,localOffset_+localSubDim_-1) : Range1D()
00425 ,Range1D()
00426 );
00427 ConstDetachedMultiVectorView<Scalar>
00428 M_local(
00429 *this
00430 ,Range1D(localOffset_,localOffset_+localSubDim_-1)
00431 ,Range1D()
00432 );
00433 ConstDetachedMultiVectorView<Scalar>
00434 X_local(
00435 X
00436 ,real_trans(M_trans)==NOTRANS ? Range1D() : Range1D(localOffset_,localOffset_+localSubDim_-1)
00437 ,Range1D()
00438 );
00439 #ifdef THYRA_SPMD_MULTI_VECTOR_BASE_PRINT_TIMES
00440 timer.stop();
00441 std::cout << "\nSpmdMultiVectorBase<Scalar>::apply(...): Time for getting view = " << timer.totalElapsedTime() << " seconds\n";
00442 #endif
00443 #ifdef TEUCHOS_DEBUG
00444 TEST_FOR_EXCEPTION(
00445 real_trans(M_trans)==NOTRANS && ( M_local.numSubCols() != X_local.subDim() || X_local.numSubCols() != Y_local.numSubCols() )
00446 , Exceptions::IncompatibleVectorSpaces
00447 ,"SpmdMultiVectorBase<Scalar>::apply(...MultiVectorBase<Scalar>...): Error!"
00448 );
00449 TEST_FOR_EXCEPTION(
00450 real_trans(M_trans)==TRANS && ( M_local.subDim() != X_local.subDim() || X_local.numSubCols() != Y_local.numSubCols() )
00451 , Exceptions::IncompatibleVectorSpaces
00452 ,"SpmdMultiVectorBase<Scalar>::apply(...MultiVectorBase<Scalar>...): Error!"
00453 );
00454 #endif
00455
00456
00457
00458
00459
00460
00461
00462
00463
00464
00465
00466
00467
00468
00469
00470
00471
00472
00473
00474
00475
00476
00477
00478
00479
00480
00481 #ifdef THYRA_SPMD_MULTI_VECTOR_BASE_PRINT_TIMES
00482 timer.start();
00483 #endif
00484
00485 Workspace<Scalar> Y_local_tmp_store(wss,Y_local.subDim()*Y_local.numSubCols(),false);
00486 RTOpPack::SubMultiVectorView<Scalar> Y_local_tmp;
00487 Scalar localBeta;
00488 if( real_trans(M_trans) == TRANS && globalDim_ > localSubDim_ ) {
00489
00490 Y_local_tmp.initialize(
00491 0, Y_local.subDim()
00492 ,0, Y_local.numSubCols()
00493 ,&Y_local_tmp_store[0], Y_local.subDim()
00494 );
00495 if( localOffset_ == 0 ) {
00496
00497 for( int j = 0; j < Y_local.numSubCols(); ++j ) {
00498 Scalar *Y_local_j = Y_local.values() + Y_local.leadingDim()*j;
00499 std::copy( Y_local_j, Y_local_j + Y_local.subDim(), Y_local_tmp.values() + Y_local_tmp.leadingDim()*j );
00500 }
00501 localBeta = beta;
00502 }
00503 else {
00504
00505 localBeta = 0.0;
00506 }
00507 }
00508 else {
00509
00510 Y_local_tmp = Y_local.smv();
00511 localBeta = beta;
00512 }
00513
00514 #ifdef THYRA_SPMD_MULTI_VECTOR_BASE_PRINT_TIMES
00515 timer.stop();
00516 std::cout << "\nSpmdMultiVectorBase<Scalar>::apply(...): Time for setting up Y_local_tmp and localBeta = " << timer.totalElapsedTime() << " seconds\n";
00517 #endif
00518
00519
00520
00521
00522
00523
00524
00525
00526
00527
00528
00529 #ifdef THYRA_SPMD_MULTI_VECTOR_BASE_PRINT_TIMES
00530 timer.start();
00531 #endif
00532 Teuchos::ETransp t_transp;
00533 if(ST::isComplex) {
00534 switch(M_trans) {
00535 case NOTRANS: t_transp = Teuchos::NO_TRANS; break;
00536 case TRANS: t_transp = Teuchos::TRANS; break;
00537 case CONJTRANS: t_transp = Teuchos::CONJ_TRANS; break;
00538 default: TEST_FOR_EXCEPT(true);
00539 }
00540 }
00541 else {
00542 switch(real_trans(M_trans)) {
00543 case NOTRANS: t_transp = Teuchos::NO_TRANS; break;
00544 case TRANS: t_transp = Teuchos::TRANS; break;
00545 default: TEST_FOR_EXCEPT(true);
00546 }
00547 }
00548 blas_.GEMM(
00549 t_transp
00550 ,Teuchos::NO_TRANS
00551 ,Y_local.subDim()
00552 ,Y_local.numSubCols()
00553 ,real_trans(M_trans)==NOTRANS ? M_local.numSubCols() : M_local.subDim()
00554 ,alpha
00555 ,const_cast<Scalar*>(M_local.values())
00556 ,M_local.leadingDim()
00557 ,const_cast<Scalar*>(X_local.values())
00558 ,X_local.leadingDim()
00559 ,localBeta
00560 ,Y_local_tmp.values()
00561 ,Y_local_tmp.leadingDim()
00562 );
00563 #ifdef THYRA_SPMD_MULTI_VECTOR_BASE_PRINT_TIMES
00564 timer.stop();
00565 std::cout
00566 << "\nSpmdMultiVectorBase<Scalar>::apply(...): Time for GEMM = "
00567 << timer.totalElapsedTime() << " seconds\n";
00568 #endif
00569
00570 if( comm.get() ) {
00571
00572
00573
00574
00575
00576 if( real_trans(M_trans)==TRANS && globalDim_ > localSubDim_ ) {
00577
00578 Workspace<Scalar> Y_local_final_buff(wss,Y_local.subDim()*Y_local.numSubCols(),false);
00579
00580 Teuchos::reduceAll<Index,Scalar>(
00581 *comm,Teuchos::REDUCE_SUM,Y_local_final_buff.size(),Y_local_tmp.values()
00582 ,&Y_local_final_buff[0]
00583 );
00584
00585 const Scalar *Y_local_final_buff_ptr = &Y_local_final_buff[0];
00586 for( int j = 0; j < Y_local.numSubCols(); ++j ) {
00587 Scalar *Y_local_ptr = Y_local.values() + Y_local.leadingDim()*j;
00588 for( int i = 0; i < Y_local.subDim(); ++i ) {
00589 (*Y_local_ptr++) = (*Y_local_final_buff_ptr++);
00590 }
00591 }
00592 }
00593 }
00594 else {
00595
00596
00597
00598
00599 }
00600
00601 #ifdef THYRA_SPMD_MULTI_VECTOR_BASE_PRINT_TIMES
00602 timer.stop();
00603 std::cout
00604 << "\nSpmdMultiVectorBase<Scalar>::apply(...): Total time = "
00605 << timerTotal.totalElapsedTime() << " seconds\n";
00606 #endif
00607
00608 }
00609
00610
00611
00612
00613
00614 template<class Scalar>
00615 bool SpmdMultiVectorBase<Scalar>::opSupported(ETransp M_trans) const
00616 {
00617 typedef Teuchos::ScalarTraits<Scalar> ST;
00618 return ( ST::isComplex ? ( M_trans!=CONJ ) : true );
00619 }
00620
00621 template<class Scalar>
00622 void SpmdMultiVectorBase<Scalar>::updateSpmdSpace()
00623 {
00624 if(globalDim_ == 0) {
00625 const SpmdVectorSpaceBase<Scalar> *spmdSpace = this->spmdSpace().get();
00626 if(spmdSpace) {
00627 globalDim_ = spmdSpace->dim();
00628 localOffset_ = spmdSpace->localOffset();
00629 localSubDim_ = spmdSpace->localSubDim();
00630 numCols_ = this->domain()->dim();
00631 }
00632 else {
00633 globalDim_ = 0;
00634 localOffset_ = -1;
00635 localSubDim_ = 0;
00636 numCols_ = 0;
00637 }
00638 }
00639 }
00640
00641
00642 template<class Scalar>
00643 Range1D SpmdMultiVectorBase<Scalar>::validateRowRange( const Range1D &rowRng_in ) const
00644 {
00645 const Range1D rowRng = Teuchos::full_range(rowRng_in,0,globalDim_-1);
00646 #ifdef TEUCHOS_DEBUG
00647 TEST_FOR_EXCEPTION(
00648 !( 0 <= rowRng.lbound() && rowRng.ubound() < globalDim_ ), std::invalid_argument
00649 ,"SpmdMultiVectorBase<Scalar>::validateRowRange(rowRng): Error, the range rowRng = ["
00650 <<rowRng.lbound()<<","<<rowRng.ubound()<<"] is not "
00651 "in the range [0,"<<(globalDim_-1)<<"]!"
00652 );
00653 #endif
00654 return rowRng;
00655 }
00656
00657
00658 template<class Scalar>
00659 Range1D SpmdMultiVectorBase<Scalar>::validateColRange( const Range1D &colRng_in ) const
00660 {
00661 const Range1D colRng = Teuchos::full_range(colRng_in,0,numCols_-1);
00662 #ifdef TEUCHOS_DEBUG
00663 TEST_FOR_EXCEPTION(
00664 !(0 <= colRng.lbound() && colRng.ubound() < numCols_), std::invalid_argument
00665 ,"SpmdMultiVectorBase<Scalar>::validateColRange(colRng): Error, the range colRng = ["
00666 <<colRng.lbound()<<","<<colRng.ubound()<<"] is not "
00667 "in the range [0,"<<(numCols_-1)<<"]!"
00668 );
00669 #endif
00670 return colRng;
00671 }
00672
00673
00674 }
00675
00676
00677 #endif // THYRA_SPMD_MULTI_VECTOR_BASE_HPP