00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029 #ifndef THYRA_MPI_MULTI_VECTOR_BASE_HPP
00030 #define THYRA_MPI_MULTI_VECTOR_BASE_HPP
00031
00032 #include "Thyra_MPIMultiVectorBaseDecl.hpp"
00033 #include "Thyra_MultiVectorDefaultBase.hpp"
00034 #include "Thyra_EuclideanLinearOpBase.hpp"
00035 #include "Thyra_MPIVectorSpaceBase.hpp"
00036 #include "Thyra_ExplicitMultiVectorView.hpp"
00037 #include "RTOpPack_MPI_apply_op.hpp"
00038 #include "RTOp_parallel_helpers.h"
00039 #include "Teuchos_Workspace.hpp"
00040 #include "Teuchos_dyn_cast.hpp"
00041 #include "Teuchos_Time.hpp"
00042 #ifdef RTOp_USE_MPI
00043 # include "Teuchos_RawMPITraits.hpp"
00044 #endif
00045
00046
00047
00048
00049 namespace Thyra {
00050
00051 template<class Scalar>
00052 MPIMultiVectorBase<Scalar>::MPIMultiVectorBase()
00053 :in_applyOp_(false)
00054 ,globalDim_(0)
00055 ,localOffset_(-1)
00056 ,localSubDim_(0)
00057 ,numCols_(0)
00058 {}
00059
00060
00061
00062 template<class Scalar>
00063 Teuchos::RefCountPtr< const ScalarProdVectorSpaceBase<Scalar> >
00064 MPIMultiVectorBase<Scalar>::rangeScalarProdVecSpc() const
00065 {
00066 return mpiSpace();
00067 }
00068
00069
00070
00071 template<class Scalar>
00072 void MPIMultiVectorBase<Scalar>::apply(
00073 const ETransp M_trans
00074 ,const MultiVectorBase<Scalar> &X
00075 ,MultiVectorBase<Scalar> *Y
00076 ,const Scalar alpha
00077 ,const Scalar beta
00078 ) const
00079 {
00080 this->single_scalar_euclidean_apply_impl(M_trans,X,Y,alpha,beta);
00081 }
00082
00083
00084
00085 template<class Scalar>
00086 void MPIMultiVectorBase<Scalar>::applyOp(
00087 const RTOpPack::RTOpT<Scalar> &pri_op
00088 ,const int num_multi_vecs
00089 ,const MultiVectorBase<Scalar>* multi_vecs[]
00090 ,const int num_targ_multi_vecs
00091 ,MultiVectorBase<Scalar>* targ_multi_vecs[]
00092 ,RTOpPack::ReductTarget* reduct_objs[]
00093 ,const Index pri_first_ele_in
00094 ,const Index pri_sub_dim_in
00095 ,const Index pri_global_offset_in
00096 ,const Index sec_first_ele_in
00097 ,const Index sec_sub_dim_in
00098 ) const
00099 {
00100 using Teuchos::dyn_cast;
00101 using Teuchos::Workspace;
00102 Teuchos::WorkspaceStore* wss = Teuchos::get_default_workspace_store().get();
00103 const Index numCols = this->domain()->dim();
00104 const MPIVectorSpaceBase<Scalar> &mpiSpc = *mpiSpace();
00105 #ifdef _DEBUG
00106 TEST_FOR_EXCEPTION(
00107 in_applyOp_, std::invalid_argument
00108 ,"MPIMultiVectorBase<>::applyOp(...): Error, this method is being entered recursively which is a "
00109 "clear sign that one of the methods getSubMultiVector(...), freeSubMultiVector(...) or commitSubMultiVector(...) "
00110 "was not implemented properly!"
00111 );
00112 apply_op_validate_input(
00113 "MPIMultiVectorBase<>::applyOp(...)", *this->domain(), *this->range()
00114 ,pri_op,num_multi_vecs,multi_vecs,num_targ_multi_vecs,targ_multi_vecs
00115 ,reduct_objs,pri_first_ele_in,pri_sub_dim_in,pri_global_offset_in
00116 ,sec_first_ele_in,sec_sub_dim_in
00117 );
00118 #endif
00119
00120 in_applyOp_ = true;
00121
00122
00123 const bool locallyReplicated = (localSubDim_ == globalDim_);
00124
00125
00126 RTOp_index_type overlap_first_local_ele = 0;
00127 RTOp_index_type overalap_local_sub_dim = 0;
00128 RTOp_index_type overlap_global_offset = 0;
00129 RTOp_parallel_calc_overlap(
00130 globalDim_, localSubDim_, localOffset_, pri_first_ele_in, pri_sub_dim_in, pri_global_offset_in
00131 ,&overlap_first_local_ele, &overalap_local_sub_dim, &overlap_global_offset
00132 );
00133 const Range1D
00134 local_rng = (
00135 overlap_first_local_ele!=0
00136 ? Range1D( localOffset_ + overlap_first_local_ele, localOffset_ + overlap_first_local_ele + overalap_local_sub_dim - 1 )
00137 : Range1D::Invalid
00138 ),
00139 col_rng(
00140 sec_first_ele_in
00141 ,sec_sub_dim_in ? sec_first_ele_in + sec_sub_dim_in - 1 : numCols
00142 );
00143
00144 Workspace<RTOpPack::SubMultiVectorT<Scalar> > sub_multi_vecs(wss,num_multi_vecs);
00145 Workspace<RTOpPack::MutableSubMultiVectorT<Scalar> > targ_sub_multi_vecs(wss,num_targ_multi_vecs);
00146 if( overlap_first_local_ele != 0 ) {
00147 for(int k = 0; k < num_multi_vecs; ++k ) {
00148 multi_vecs[k]->getSubMultiVector( local_rng, col_rng, &sub_multi_vecs[k] );
00149 sub_multi_vecs[k].setGlobalOffset( overlap_global_offset );
00150 }
00151 for(int k = 0; k < num_targ_multi_vecs; ++k ) {
00152 targ_multi_vecs[k]->getSubMultiVector( local_rng, col_rng, &targ_sub_multi_vecs[k] );
00153 targ_sub_multi_vecs[k].setGlobalOffset( overlap_global_offset );
00154 }
00155 }
00156
00157 RTOpPack::MPI_apply_op(
00158 locallyReplicated ? MPI_COMM_NULL : mpiSpc.mpiComm()
00159 ,pri_op
00160 ,-1
00161 ,col_rng.size()
00162 ,num_multi_vecs
00163 ,num_multi_vecs && overlap_first_local_ele ? &sub_multi_vecs[0] : NULL
00164 ,num_targ_multi_vecs
00165 ,num_targ_multi_vecs && overlap_first_local_ele ? &targ_sub_multi_vecs[0] : NULL
00166 ,reduct_objs
00167 );
00168
00169 if( overlap_first_local_ele != 0 ) {
00170 for(int k = 0; k < num_multi_vecs; ++k ) {
00171 sub_multi_vecs[k].setGlobalOffset(local_rng.lbound()-1);
00172 multi_vecs[k]->freeSubMultiVector( &sub_multi_vecs[k] );
00173 }
00174 for(int k = 0; k < num_targ_multi_vecs; ++k ) {
00175 targ_sub_multi_vecs[k].setGlobalOffset(local_rng.lbound()-1);
00176 targ_multi_vecs[k]->commitSubMultiVector( &targ_sub_multi_vecs[k] );
00177 }
00178 }
00179
00180 in_applyOp_ = false;
00181 }
00182
00183 template<class Scalar>
00184 void MPIMultiVectorBase<Scalar>::getSubMultiVector(
00185 const Range1D &rowRng_in
00186 ,const Range1D &colRng_in
00187 ,RTOpPack::SubMultiVectorT<Scalar> *sub_mv
00188 ) const
00189 {
00190 const Range1D rowRng = validateRowRange(rowRng_in);
00191 const Range1D colRng = validateColRange(colRng_in);
00192 if( rowRng.lbound() < localOffset_+1 || localOffset_+localSubDim_ < rowRng.ubound() ) {
00193
00194 MultiVectorBase<Scalar>::getSubMultiVector(rowRng_in,colRng_in,sub_mv);
00195 return;
00196 }
00197
00198 const Scalar *localValues = NULL;
00199 int leadingDim = 0;
00200 this->getLocalData(&localValues,&leadingDim);
00201 sub_mv->initialize(
00202 rowRng.lbound()-1
00203 ,rowRng.size()
00204 ,colRng.lbound()-1
00205 ,colRng.size()
00206 ,localValues
00207 +(rowRng.lbound()-localOffset_-1)
00208 +(colRng.lbound()-1)*leadingDim
00209 ,leadingDim
00210 );
00211 }
00212
00213 template<class Scalar>
00214 void MPIMultiVectorBase<Scalar>::freeSubMultiVector(
00215 RTOpPack::SubMultiVectorT<Scalar>* sub_mv
00216 ) const
00217 {
00218 if( sub_mv->globalOffset() < localOffset_ || localOffset_+localSubDim_ < sub_mv->globalOffset()+sub_mv->subDim() ) {
00219
00220 MultiVectorBase<Scalar>::freeSubMultiVector(sub_mv);
00221 return;
00222 }
00223 freeLocalData( sub_mv->values() );
00224 sub_mv->set_uninitialized();
00225 }
00226
00227 template<class Scalar>
00228 void MPIMultiVectorBase<Scalar>::getSubMultiVector(
00229 const Range1D &rowRng_in
00230 ,const Range1D &colRng_in
00231 ,RTOpPack::MutableSubMultiVectorT<Scalar> *sub_mv
00232 )
00233 {
00234 const Range1D rowRng = validateRowRange(rowRng_in);
00235 const Range1D colRng = validateColRange(colRng_in);
00236 if( rowRng.lbound() < localOffset_+1 || localOffset_+localSubDim_ < rowRng.ubound() ) {
00237
00238 MultiVectorBase<Scalar>::getSubMultiVector(rowRng_in,colRng_in,sub_mv);
00239 return;
00240 }
00241
00242 Scalar *localValues = NULL;
00243 int leadingDim = 0;
00244 this->getLocalData(&localValues,&leadingDim);
00245 sub_mv->initialize(
00246 rowRng.lbound()-1
00247 ,rowRng.size()
00248 ,colRng.lbound()-1
00249 ,colRng.size()
00250 ,localValues
00251 +(rowRng.lbound()-localOffset_-1)
00252 +(colRng.lbound()-1)*leadingDim
00253 ,leadingDim
00254 );
00255 }
00256
00257 template<class Scalar>
00258 void MPIMultiVectorBase<Scalar>::commitSubMultiVector(
00259 RTOpPack::MutableSubMultiVectorT<Scalar>* sub_mv
00260 )
00261 {
00262 if( sub_mv->globalOffset() < localOffset_ || localOffset_+localSubDim_ < sub_mv->globalOffset()+sub_mv->subDim() ) {
00263
00264 MultiVectorBase<Scalar>::commitSubMultiVector(sub_mv);
00265 return;
00266 }
00267 commitLocalData( sub_mv->values() );
00268 sub_mv->set_uninitialized();
00269 }
00270
00271
00272
00273 template<class Scalar>
00274 void MPIMultiVectorBase<Scalar>::euclideanApply(
00275 const ETransp M_trans
00276 ,const MultiVectorBase<Scalar> &X
00277 ,MultiVectorBase<Scalar> *Y
00278 ,const Scalar alpha
00279 ,const Scalar beta
00280 ) const
00281 {
00282 typedef Teuchos::ScalarTraits<Scalar> ST;
00283 typedef Teuchos::RawMPITraits<Scalar> RMT;
00284 using Teuchos::Workspace;
00285 Teuchos::WorkspaceStore* wss = Teuchos::get_default_workspace_store().get();
00286
00287 #ifdef THYRA_MPI_MULTI_VECTOR_BASE_PRINT_TIMES
00288 Teuchos::Time timerTotal("dummy",true);
00289 Teuchos::Time timer("dummy");
00290 #endif
00291
00292
00293
00294
00295
00296
00297
00298
00299
00300
00301
00302
00303
00304
00305
00306
00307
00308
00309
00310
00311
00312
00313
00314
00315
00316
00317 const MPIVectorSpaceBase<Scalar> &mpiSpc = *this->mpiSpace();
00318
00319
00320 MPI_Comm mpiComm = mpiSpc.mpiComm();
00321 #ifdef _DEBUG
00322 const VectorSpaceBase<Scalar>
00323 &Y_range = *Y->range(),
00324 &X_range = *X.range();
00325
00326 TEST_FOR_EXCEPTION(
00327 ( globalDim_ > localSubDim_ ) && mpiComm == MPI_COMM_NULL, std::logic_error
00328 ,"MPIMultiVectorBase<Scalar>::apply(...MultiVectorBase<Scalar>...): Error!"
00329 );
00330
00331
00332
00333 TEST_FOR_EXCEPTION(
00334 real_trans(M_trans)==NOTRANS && !mpiSpc.isCompatible(Y_range), Exceptions::IncompatibleVectorSpaces
00335 ,"MPIMultiVectorBase<Scalar>::apply(...MultiVectorBase<Scalar>...): Error!"
00336 );
00337 TEST_FOR_EXCEPTION(
00338 real_trans(M_trans)==TRANS && !mpiSpc.isCompatible(X_range), Exceptions::IncompatibleVectorSpaces
00339 ,"MPIMultiVectorBase<Scalar>::apply(...MultiVectorBase<Scalar>...): Error!"
00340 );
00341 #endif
00342
00343
00344
00345
00346
00347 #ifdef THYRA_MPI_MULTI_VECTOR_BASE_PRINT_TIMES
00348 timer.start();
00349 #endif
00350
00351 ExplicitMutableMultiVectorView<Scalar>
00352 Y_local(
00353 *Y
00354 ,real_trans(M_trans)==NOTRANS ? Range1D(localOffset_+1,localOffset_+localSubDim_) : Range1D()
00355 ,Range1D()
00356 );
00357 ExplicitMultiVectorView<Scalar>
00358 M_local(
00359 *this
00360 ,Range1D(localOffset_+1,localOffset_+localSubDim_)
00361 ,Range1D()
00362 );
00363 ExplicitMultiVectorView<Scalar>
00364 X_local(
00365 X
00366 ,real_trans(M_trans)==NOTRANS ? Range1D() : Range1D(localOffset_+1,localOffset_+localSubDim_)
00367 ,Range1D()
00368 );
00369 #ifdef THYRA_MPI_MULTI_VECTOR_BASE_PRINT_TIMES
00370 timer.stop();
00371 std::cout << "\nMPIMultiVectorBase<Scalar>::apply(...): Time for getting view = " << timer.totalElapsedTime() << " seconds\n";
00372 #endif
00373 #ifdef _DEBUG
00374 TEST_FOR_EXCEPTION(
00375 real_trans(M_trans)==NOTRANS && ( M_local.numSubCols() != X_local.subDim() || X_local.numSubCols() != Y_local.numSubCols() )
00376 , Exceptions::IncompatibleVectorSpaces
00377 ,"MPIMultiVectorBase<Scalar>::apply(...MultiVectorBase<Scalar>...): Error!"
00378 );
00379 TEST_FOR_EXCEPTION(
00380 real_trans(M_trans)==TRANS && ( M_local.subDim() != X_local.subDim() || X_local.numSubCols() != Y_local.numSubCols() )
00381 , Exceptions::IncompatibleVectorSpaces
00382 ,"MPIMultiVectorBase<Scalar>::apply(...MultiVectorBase<Scalar>...): Error!"
00383 );
00384 #endif
00385
00386
00387
00388
00389
00390
00391
00392
00393
00394
00395
00396
00397
00398
00399
00400
00401
00402
00403
00404
00405
00406
00407
00408
00409
00410
00411 #ifdef THYRA_MPI_MULTI_VECTOR_BASE_PRINT_TIMES
00412 timer.start();
00413 #endif
00414
00415 Workspace<Scalar> Y_local_tmp_store(wss,Y_local.subDim()*Y_local.numSubCols(),false);
00416 RTOpPack::MutableSubMultiVectorT<Scalar> Y_local_tmp;
00417 Scalar localBeta;
00418 if( real_trans(M_trans) == TRANS && globalDim_ > localSubDim_ ) {
00419
00420 Y_local_tmp.initialize(
00421 0, Y_local.subDim()
00422 ,0, Y_local.numSubCols()
00423 ,&Y_local_tmp_store[0], Y_local.subDim()
00424 );
00425 if( localOffset_ == 0 ) {
00426
00427 for( int j = 0; j < Y_local.numSubCols(); ++j ) {
00428 Scalar *Y_local_j = Y_local.values() + Y_local.leadingDim()*j;
00429 std::copy( Y_local_j, Y_local_j + Y_local.subDim(), Y_local_tmp.values() + Y_local_tmp.leadingDim()*j );
00430 }
00431 localBeta = beta;
00432 }
00433 else {
00434
00435 localBeta = 0.0;
00436 }
00437 }
00438 else {
00439
00440 Y_local_tmp = Y_local.smv();
00441 localBeta = beta;
00442 }
00443
00444 #ifdef THYRA_MPI_MULTI_VECTOR_BASE_PRINT_TIMES
00445 timer.stop();
00446 std::cout << "\nMPIMultiVectorBase<Scalar>::apply(...): Time for setting up Y_local_tmp and localBeta = " << timer.totalElapsedTime() << " seconds\n";
00447 #endif
00448
00449
00450
00451
00452
00453
00454
00455
00456
00457
00458
00459 #ifdef THYRA_MPI_MULTI_VECTOR_BASE_PRINT_TIMES
00460 timer.start();
00461 #endif
00462 Teuchos::ETransp t_transp;
00463 if(ST::isComplex) {
00464 switch(M_trans) {
00465 case NOTRANS: t_transp = Teuchos::NO_TRANS; break;
00466 case TRANS: t_transp = Teuchos::TRANS; break;
00467 case CONJTRANS: t_transp = Teuchos::CONJ_TRANS; break;
00468 default: TEST_FOR_EXCEPT(true);
00469 }
00470 }
00471 else {
00472 switch(real_trans(M_trans)) {
00473 case NOTRANS: t_transp = Teuchos::NO_TRANS; break;
00474 case TRANS: t_transp = Teuchos::TRANS; break;
00475 default: TEST_FOR_EXCEPT(true);
00476 }
00477 }
00478 blas_.GEMM(
00479 t_transp
00480 ,Teuchos::NO_TRANS
00481 ,Y_local.subDim()
00482 ,Y_local.numSubCols()
00483 ,real_trans(M_trans)==NOTRANS ? M_local.numSubCols() : M_local.subDim()
00484 ,alpha
00485 ,const_cast<Scalar*>(M_local.values())
00486 ,M_local.leadingDim()
00487 ,const_cast<Scalar*>(X_local.values())
00488 ,X_local.leadingDim()
00489 ,localBeta
00490 ,Y_local_tmp.values()
00491 ,Y_local_tmp.leadingDim()
00492 );
00493 #ifdef THYRA_MPI_MULTI_VECTOR_BASE_PRINT_TIMES
00494 timer.stop();
00495 std::cout << "\nMPIMultiVectorBase<Scalar>::apply(...): Time for GEMM = " << timer.totalElapsedTime() << " seconds\n";
00496 #endif
00497
00498 #ifdef RTOp_USE_MPI
00499 if( mpiComm != MPI_COMM_NULL ) {
00500
00501
00502
00503
00504
00505 if( real_trans(M_trans)==TRANS && globalDim_ > localSubDim_ ) {
00506
00507 Workspace<Scalar> Y_local_final_buff(wss,Y_local.subDim()*Y_local.numSubCols(),false);
00508
00509 MPI_Allreduce(
00510 Y_local_tmp.values()
00511 ,&Y_local_final_buff[0]
00512 ,RMT::adjustCount(Y_local_final_buff.size())
00513 ,RMT::type()
00514 ,RMT::sumOp()
00515 ,mpiComm
00516 );
00517
00518 const Scalar *Y_local_final_buff_ptr = &Y_local_final_buff[0];
00519 for( int j = 0; j < Y_local.numSubCols(); ++j ) {
00520 Scalar *Y_local_ptr = Y_local.values() + Y_local.leadingDim()*j;
00521 for( int i = 0; i < Y_local.subDim(); ++i ) {
00522 (*Y_local_ptr++) = (*Y_local_final_buff_ptr++);
00523 }
00524 }
00525 }
00526 }
00527 else {
00528 #endif // RTOp_USE_MPI
00529
00530
00531
00532
00533 #ifdef RTOp_USE_MPI
00534 }
00535 #endif
00536
00537 #ifdef THYRA_MPI_MULTI_VECTOR_BASE_PRINT_TIMES
00538 timer.stop();
00539 std::cout << "\nMPIMultiVectorBase<Scalar>::apply(...): Total time = " << timerTotal.totalElapsedTime() << " seconds\n";
00540 #endif
00541
00542 }
00543
00544
00545
00546 template<class Scalar>
00547 bool MPIMultiVectorBase<Scalar>::opSupported(ETransp M_trans) const
00548 {
00549 typedef Teuchos::ScalarTraits<Scalar> ST;
00550 return ( ST::isComplex ? ( M_trans!=CONJ ) : true );
00551 }
00552
00553 template<class Scalar>
00554 void MPIMultiVectorBase<Scalar>::updateMpiSpace()
00555 {
00556 if(globalDim_ == 0) {
00557 const MPIVectorSpaceBase<Scalar> *mpiSpace = this->mpiSpace().get();
00558 if(mpiSpace) {
00559 globalDim_ = mpiSpace->dim();
00560 localOffset_ = mpiSpace->localOffset();
00561 localSubDim_ = mpiSpace->localSubDim();
00562 numCols_ = this->domain()->dim();
00563 }
00564 else {
00565 globalDim_ = 0;
00566 localOffset_ = -1;
00567 localSubDim_ = 0;
00568 numCols_ = 0;
00569 }
00570 }
00571 }
00572
00573 template<class Scalar>
00574 Range1D MPIMultiVectorBase<Scalar>::validateRowRange( const Range1D &rowRng_in ) const
00575 {
00576 const Range1D rowRng = RangePack::full_range(rowRng_in,1,globalDim_);
00577 #ifdef _DEBUG
00578 TEST_FOR_EXCEPTION(
00579 rowRng.lbound() < 1 || globalDim_ < rowRng.ubound(), std::invalid_argument
00580 ,"MPIMultiVectorBase<Scalar>::validateRowRange(rowRng): Error, the range rowRng = ["
00581 <<rowRng.lbound()<<","<<rowRng.ubound()<<"] is not "
00582 "in the range [1,"<<globalDim_<<"]!"
00583 );
00584 #endif
00585 return rowRng;
00586 }
00587
00588 template<class Scalar>
00589 Range1D MPIMultiVectorBase<Scalar>::validateColRange( const Range1D &colRng_in ) const
00590 {
00591 const Range1D colRng = RangePack::full_range(colRng_in,1,numCols_);
00592 #ifdef _DEBUG
00593 TEST_FOR_EXCEPTION(
00594 colRng.lbound() < 1 || numCols_ < colRng.ubound(), std::invalid_argument
00595 ,"MPIMultiVectorBase<Scalar>::validateColRange(colRng): Error, the range colRng = ["
00596 <<colRng.lbound()<<","<<colRng.ubound()<<"] is not "
00597 "in the range [1,"<<numCols_<<"]!"
00598 );
00599 #endif
00600 return colRng;
00601 }
00602
00603 }
00604
00605 #endif // THYRA_MPI_MULTI_VECTOR_BASE_HPP