Thyra_MPIMultiVectorBase.hpp

00001 // @HEADER
00002 // ***********************************************************************
00003 // 
00004 //    Thyra: Interfaces and Support for Abstract Numerical Algorithms
00005 //                 Copyright (2004) Sandia Corporation
00006 // 
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 // 
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //  
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //  
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00025 // 
00026 // ***********************************************************************
00027 // @HEADER
00028 
00029 #ifndef THYRA_MPI_MULTI_VECTOR_BASE_HPP
00030 #define THYRA_MPI_MULTI_VECTOR_BASE_HPP
00031 
00032 #include "Thyra_MPIMultiVectorBaseDecl.hpp"
00033 #include "Thyra_MultiVectorDefaultBase.hpp"
00034 #include "Thyra_EuclideanLinearOpBase.hpp"
00035 #include "Thyra_MPIVectorSpaceBase.hpp"
00036 #include "Thyra_ExplicitMultiVectorView.hpp"
00037 #include "RTOpPack_MPI_apply_op.hpp"
00038 #include "RTOp_parallel_helpers.h"
00039 #include "Teuchos_Workspace.hpp"
00040 #include "Teuchos_dyn_cast.hpp"
00041 #include "Teuchos_Time.hpp"
00042 #ifdef RTOp_USE_MPI
00043 #  include "Teuchos_RawMPITraits.hpp"
00044 #endif
00045 
00046 // Define to see some timing output!
00047 //#define THYRA_MPI_MULTI_VECTOR_BASE_PRINT_TIMES
00048 
00049 namespace Thyra {
00050 
00051 template<class Scalar>
00052 MPIMultiVectorBase<Scalar>::MPIMultiVectorBase()
00053   :in_applyOp_(false)
00054   ,globalDim_(0)
00055   ,localOffset_(-1)
00056   ,localSubDim_(0)
00057   ,numCols_(0)
00058 {}
00059 
00060 // Overridden from EuclideanLinearOpBase
00061 
00062 template<class Scalar>
00063 Teuchos::RefCountPtr< const ScalarProdVectorSpaceBase<Scalar> >
00064 MPIMultiVectorBase<Scalar>::rangeScalarProdVecSpc() const
00065 {
00066   return mpiSpace();
00067 }
00068 
00069 // Overridden from LinearOpBase
00070 
00071 template<class Scalar>
00072 void MPIMultiVectorBase<Scalar>::apply(
00073   const ETransp                     M_trans
00074   ,const MultiVectorBase<Scalar>    &X
00075   ,MultiVectorBase<Scalar>          *Y
00076   ,const Scalar                     alpha
00077   ,const Scalar                     beta
00078   ) const
00079 {
00080   this->single_scalar_euclidean_apply_impl(M_trans,X,Y,alpha,beta);
00081 }
00082 
00083 // Overridden from MultiVectorBase
00084 
00085 template<class Scalar>
00086 void MPIMultiVectorBase<Scalar>::applyOp(
00087   const RTOpPack::RTOpT<Scalar>   &pri_op
00088   ,const int                      num_multi_vecs
00089   ,const MultiVectorBase<Scalar>* multi_vecs[]
00090   ,const int                      num_targ_multi_vecs
00091   ,MultiVectorBase<Scalar>*       targ_multi_vecs[]
00092   ,RTOpPack::ReductTarget*        reduct_objs[]
00093   ,const Index                    pri_first_ele_in
00094   ,const Index                    pri_sub_dim_in
00095   ,const Index                    pri_global_offset_in
00096   ,const Index                    sec_first_ele_in
00097   ,const Index                    sec_sub_dim_in
00098   ) const
00099 {
00100   using Teuchos::dyn_cast;
00101   using Teuchos::Workspace;
00102   Teuchos::WorkspaceStore* wss = Teuchos::get_default_workspace_store().get();
00103   const Index numCols = this->domain()->dim();
00104   const MPIVectorSpaceBase<Scalar> &mpiSpc = *mpiSpace();
00105 #ifdef _DEBUG
00106   TEST_FOR_EXCEPTION(
00107     in_applyOp_, std::invalid_argument
00108     ,"MPIMultiVectorBase<>::applyOp(...): Error, this method is being entered recursively which is a "
00109     "clear sign that one of the methods getSubMultiVector(...), freeSubMultiVector(...) or commitSubMultiVector(...) "
00110     "was not implemented properly!"
00111     );
00112   apply_op_validate_input(
00113     "MPIMultiVectorBase<>::applyOp(...)", *this->domain(), *this->range()
00114     ,pri_op,num_multi_vecs,multi_vecs,num_targ_multi_vecs,targ_multi_vecs
00115     ,reduct_objs,pri_first_ele_in,pri_sub_dim_in,pri_global_offset_in
00116     ,sec_first_ele_in,sec_sub_dim_in
00117     );
00118 #endif
00119   // Flag that we are in applyOp()
00120   in_applyOp_ = true;
00121   // First see if this is a locally replicated vector in which case
00122   // we treat this as a local operation only.
00123   const bool locallyReplicated = (localSubDim_ == globalDim_);
00124   // Get the overlap in the current process with the input logical sub-vector
00125   // from (first_ele_in,sub_dim_in,global_offset_in)
00126   RTOp_index_type  overlap_first_local_ele  = 0;
00127   RTOp_index_type  overalap_local_sub_dim   = 0;
00128   RTOp_index_type  overlap_global_offset    = 0;
00129   RTOp_parallel_calc_overlap(
00130     globalDim_, localSubDim_, localOffset_, pri_first_ele_in, pri_sub_dim_in, pri_global_offset_in
00131     ,&overlap_first_local_ele, &overalap_local_sub_dim, &overlap_global_offset
00132     );
00133   const Range1D
00134     local_rng = (
00135       overlap_first_local_ele!=0
00136       ? Range1D( localOffset_ + overlap_first_local_ele, localOffset_ + overlap_first_local_ele + overalap_local_sub_dim - 1 )
00137       : Range1D::Invalid
00138       ),
00139     col_rng(
00140       sec_first_ele_in
00141       ,sec_sub_dim_in ? sec_first_ele_in + sec_sub_dim_in - 1 : numCols
00142       );
00143   // Create sub-vector views of all of the *participating* local data
00144   Workspace<RTOpPack::SubMultiVectorT<Scalar> > sub_multi_vecs(wss,num_multi_vecs);
00145   Workspace<RTOpPack::MutableSubMultiVectorT<Scalar> > targ_sub_multi_vecs(wss,num_targ_multi_vecs);
00146   if( overlap_first_local_ele != 0 ) {
00147     for(int k = 0; k < num_multi_vecs; ++k ) {
00148       multi_vecs[k]->getSubMultiVector( local_rng, col_rng, &sub_multi_vecs[k] );
00149       sub_multi_vecs[k].setGlobalOffset( overlap_global_offset );
00150     }
00151     for(int k = 0; k < num_targ_multi_vecs; ++k ) {
00152       targ_multi_vecs[k]->getSubMultiVector( local_rng, col_rng, &targ_sub_multi_vecs[k] );
00153       targ_sub_multi_vecs[k].setGlobalOffset( overlap_global_offset );
00154     }
00155   }
00156   // Apply the RTOp operator object (all processors must participate)
00157   RTOpPack::MPI_apply_op(
00158     locallyReplicated ? MPI_COMM_NULL : mpiSpc.mpiComm()                               // comm
00159     ,pri_op                                                                            // op
00160     ,-1                                                                                // root_rank (perform an all-reduce)
00161     ,col_rng.size()                                                                    // num_cols
00162     ,num_multi_vecs                                                                    // num_multi_vecs
00163     ,num_multi_vecs && overlap_first_local_ele ? &sub_multi_vecs[0] : NULL             // sub_multi_vecs
00164     ,num_targ_multi_vecs                                                               // num_targ_multi_vecs
00165     ,num_targ_multi_vecs && overlap_first_local_ele ? &targ_sub_multi_vecs[0] : NULL   // targ_sub_multi_vecs
00166     ,reduct_objs                                                                       // reduct_objs
00167     );
00168   // Free and commit the local data
00169   if( overlap_first_local_ele != 0 ) {
00170     for(int k = 0; k < num_multi_vecs; ++k ) {
00171       sub_multi_vecs[k].setGlobalOffset(local_rng.lbound()-1);
00172       multi_vecs[k]->freeSubMultiVector( &sub_multi_vecs[k] );
00173     }
00174     for(int k = 0; k < num_targ_multi_vecs; ++k ) {
00175       targ_sub_multi_vecs[k].setGlobalOffset(local_rng.lbound()-1);
00176       targ_multi_vecs[k]->commitSubMultiVector( &targ_sub_multi_vecs[k] );
00177     }
00178   }
00179   // Flag that we are leaving applyOp()
00180   in_applyOp_ = false;
00181 }
00182 
00183 template<class Scalar>
00184 void MPIMultiVectorBase<Scalar>::getSubMultiVector(
00185   const Range1D                       &rowRng_in
00186   ,const Range1D                      &colRng_in
00187   ,RTOpPack::SubMultiVectorT<Scalar>  *sub_mv
00188   ) const
00189 {
00190   const Range1D rowRng = validateRowRange(rowRng_in);
00191   const Range1D colRng = validateColRange(colRng_in);
00192   if( rowRng.lbound() < localOffset_+1 || localOffset_+localSubDim_ < rowRng.ubound() ) {
00193     // rng consists of off-processor elements so use the default implementation!
00194     MultiVectorBase<Scalar>::getSubMultiVector(rowRng_in,colRng_in,sub_mv);
00195     return;
00196   }
00197   // rng consists of all local data so get it!
00198   const Scalar *localValues = NULL;
00199   int leadingDim = 0;
00200   this->getLocalData(&localValues,&leadingDim);
00201   sub_mv->initialize(
00202     rowRng.lbound()-1                             // globalOffset
00203     ,rowRng.size()                                // subDim
00204     ,colRng.lbound()-1                            // colOffset
00205     ,colRng.size()                                // numSubCols
00206     ,localValues
00207     +(rowRng.lbound()-localOffset_-1)
00208     +(colRng.lbound()-1)*leadingDim               // values
00209     ,leadingDim                                   // leadingDim
00210     );
00211 }
00212 
00213 template<class Scalar>
00214 void MPIMultiVectorBase<Scalar>::freeSubMultiVector(
00215   RTOpPack::SubMultiVectorT<Scalar>* sub_mv
00216   ) const
00217 {
00218   if( sub_mv->globalOffset() < localOffset_ || localOffset_+localSubDim_ < sub_mv->globalOffset()+sub_mv->subDim() ) {
00219     // Let the default implementation handle it!
00220     MultiVectorBase<Scalar>::freeSubMultiVector(sub_mv);
00221     return;
00222   }
00223   freeLocalData( sub_mv->values() );
00224   sub_mv->set_uninitialized();
00225 }
00226 
00227 template<class Scalar>
00228 void MPIMultiVectorBase<Scalar>::getSubMultiVector(
00229   const Range1D                                &rowRng_in
00230   ,const Range1D                               &colRng_in
00231   ,RTOpPack::MutableSubMultiVectorT<Scalar>    *sub_mv
00232   )
00233 {
00234   const Range1D rowRng = validateRowRange(rowRng_in);
00235   const Range1D colRng = validateColRange(colRng_in);
00236   if( rowRng.lbound() < localOffset_+1 || localOffset_+localSubDim_ < rowRng.ubound() ) {
00237     // rng consists of off-processor elements so use the default implementation!
00238     MultiVectorBase<Scalar>::getSubMultiVector(rowRng_in,colRng_in,sub_mv);
00239     return;
00240   }
00241   // rng consists of all local data so get it!
00242   Scalar *localValues = NULL;
00243   int leadingDim = 0;
00244   this->getLocalData(&localValues,&leadingDim);
00245   sub_mv->initialize(
00246     rowRng.lbound()-1                             // globalOffset
00247     ,rowRng.size()                                // subDim
00248     ,colRng.lbound()-1                            // colOffset
00249     ,colRng.size()                                // numSubCols
00250     ,localValues
00251     +(rowRng.lbound()-localOffset_-1)
00252     +(colRng.lbound()-1)*leadingDim               // values
00253     ,leadingDim                                   // leadingDim
00254     );
00255 }
00256 
00257 template<class Scalar>
00258 void MPIMultiVectorBase<Scalar>::commitSubMultiVector(
00259   RTOpPack::MutableSubMultiVectorT<Scalar>* sub_mv
00260   )
00261 {
00262   if( sub_mv->globalOffset() < localOffset_ || localOffset_+localSubDim_ < sub_mv->globalOffset()+sub_mv->subDim() ) {
00263     // Let the default implementation handle it!
00264     MultiVectorBase<Scalar>::commitSubMultiVector(sub_mv);
00265     return;
00266   }
00267   commitLocalData( sub_mv->values() );
00268   sub_mv->set_uninitialized();
00269 }
00270 
00271 // protected
00272 
00273 template<class Scalar>
00274 void MPIMultiVectorBase<Scalar>::euclideanApply(
00275   const ETransp                     M_trans
00276   ,const MultiVectorBase<Scalar>    &X
00277   ,MultiVectorBase<Scalar>          *Y
00278   ,const Scalar                     alpha
00279   ,const Scalar                     beta
00280   ) const
00281 {
00282   typedef Teuchos::ScalarTraits<Scalar> ST;
00283   typedef Teuchos::RawMPITraits<Scalar> RMT;
00284   using Teuchos::Workspace;
00285   Teuchos::WorkspaceStore* wss = Teuchos::get_default_workspace_store().get();
00286 
00287 #ifdef THYRA_MPI_MULTI_VECTOR_BASE_PRINT_TIMES
00288   Teuchos::Time timerTotal("dummy",true);
00289   Teuchos::Time timer("dummy");
00290 #endif
00291 
00292   //
00293   // This function performs one of two operations.
00294   //
00295   // The first operation (M_trans == NOTRANS) is:
00296   //
00297   //     Y = beta * Y + alpha * M * X
00298   //
00299   // where Y and M have compatible (distributed?) range vector
00300   // spaces and X is a locally replicated serial multi-vector.  This
00301   // operation does not require any global communication.
00302   //
00303   // The second operation (M_trans == TRANS) is:
00304   //
00305   //     Y = beta * Y + alpha * M' * X
00306   //
00307   // where M and X have compatible (distributed?) range vector spaces
00308   // and Y is a locally replicated serial multi-vector.  This operation
00309   // requires a local reduction.
00310   //
00311 
00312   //
00313   // Get spaces and validate compatibility
00314   //
00315 
00316   // Get the MPIVectorSpace
00317   const MPIVectorSpaceBase<Scalar> &mpiSpc = *this->mpiSpace();
00318 
00319   // Get the MPI communicator
00320   MPI_Comm mpiComm = mpiSpc.mpiComm();
00321 #ifdef _DEBUG
00322   const VectorSpaceBase<Scalar>
00323     &Y_range = *Y->range(),
00324     &X_range = *X.range();
00325 //  std::cout << "MPIMultiVectorBase<Scalar>::apply(...): mpiComm = " << mpiComm << std::endl;
00326   TEST_FOR_EXCEPTION(
00327     ( globalDim_ > localSubDim_ ) && mpiComm == MPI_COMM_NULL, std::logic_error
00328     ,"MPIMultiVectorBase<Scalar>::apply(...MultiVectorBase<Scalar>...): Error!"
00329     );
00330   // ToDo: Write a good general validation function that I can call that will replace
00331   // all of these TEST_FOR_EXCEPTION(...) uses
00332 
00333   TEST_FOR_EXCEPTION(
00334     real_trans(M_trans)==NOTRANS && !mpiSpc.isCompatible(Y_range), Exceptions::IncompatibleVectorSpaces
00335     ,"MPIMultiVectorBase<Scalar>::apply(...MultiVectorBase<Scalar>...): Error!"
00336     );
00337   TEST_FOR_EXCEPTION(
00338     real_trans(M_trans)==TRANS && !mpiSpc.isCompatible(X_range), Exceptions::IncompatibleVectorSpaces
00339     ,"MPIMultiVectorBase<Scalar>::apply(...MultiVectorBase<Scalar>...): Error!"
00340     );
00341 #endif
00342 
00343   //
00344   // Get explicit (local) views of Y, M and X
00345   //
00346 
00347 #ifdef THYRA_MPI_MULTI_VECTOR_BASE_PRINT_TIMES
00348   timer.start();
00349 #endif
00350     
00351   ExplicitMutableMultiVectorView<Scalar>
00352     Y_local(
00353       *Y
00354       ,real_trans(M_trans)==NOTRANS ? Range1D(localOffset_+1,localOffset_+localSubDim_) : Range1D()
00355       ,Range1D()
00356       );
00357   ExplicitMultiVectorView<Scalar>
00358     M_local(
00359       *this
00360       ,Range1D(localOffset_+1,localOffset_+localSubDim_)
00361       ,Range1D()
00362       );
00363   ExplicitMultiVectorView<Scalar>
00364     X_local(
00365       X
00366       ,real_trans(M_trans)==NOTRANS ? Range1D() : Range1D(localOffset_+1,localOffset_+localSubDim_)
00367       ,Range1D()
00368       );
00369 #ifdef THYRA_MPI_MULTI_VECTOR_BASE_PRINT_TIMES
00370   timer.stop();
00371   std::cout << "\nMPIMultiVectorBase<Scalar>::apply(...): Time for getting view = " << timer.totalElapsedTime() << " seconds\n";
00372 #endif
00373 #ifdef _DEBUG   
00374   TEST_FOR_EXCEPTION(
00375     real_trans(M_trans)==NOTRANS && ( M_local.numSubCols() != X_local.subDim() || X_local.numSubCols() != Y_local.numSubCols() )
00376     , Exceptions::IncompatibleVectorSpaces
00377     ,"MPIMultiVectorBase<Scalar>::apply(...MultiVectorBase<Scalar>...): Error!"
00378     );
00379   TEST_FOR_EXCEPTION(
00380     real_trans(M_trans)==TRANS && ( M_local.subDim() != X_local.subDim() || X_local.numSubCols() != Y_local.numSubCols() )
00381     , Exceptions::IncompatibleVectorSpaces
00382     ,"MPIMultiVectorBase<Scalar>::apply(...MultiVectorBase<Scalar>...): Error!"
00383     );
00384 #endif
00385 
00386   //
00387   // If nonlocal (i.e. M_trans==TRANS) then create temporary storage
00388   // for:
00389   //
00390   //     Y_local_tmp = alpha * M(local) * X(local)                 : on nonroot processes
00391   //
00392   // or
00393   //
00394   //     Y_local_tmp = beta*Y_local + alpha * M(local) * X(local)  : on root process (localOffset_==0)
00395   // 
00396   // and set
00397   //
00398   //     localBeta = ( localOffset_ == 0 ? beta : 0.0 )
00399   //
00400   // Above, we choose localBeta such that we will only perform
00401   // Y_local = beta * Y_local + ...  on one process (the root
00402   // process where localOffset_==0x).  Then, when we add up Y_local
00403   // on all of the processors and we will get the correct result.
00404   //
00405   // If strictly local (i.e. M_trans == NOTRANS) then set:
00406   //
00407   //      Y_local_tmp = Y_local
00408   //      localBeta = beta
00409   //
00410 
00411 #ifdef THYRA_MPI_MULTI_VECTOR_BASE_PRINT_TIMES
00412   timer.start();
00413 #endif
00414     
00415   Workspace<Scalar> Y_local_tmp_store(wss,Y_local.subDim()*Y_local.numSubCols(),false);
00416   RTOpPack::MutableSubMultiVectorT<Scalar> Y_local_tmp;
00417   Scalar localBeta;
00418   if( real_trans(M_trans) == TRANS && globalDim_ > localSubDim_ ) {
00419     // Nonlocal
00420     Y_local_tmp.initialize(
00421       0, Y_local.subDim()
00422       ,0, Y_local.numSubCols()
00423       ,&Y_local_tmp_store[0], Y_local.subDim() // leadingDim == subDim (columns are adjacent)
00424       );
00425     if( localOffset_ == 0 ) {
00426       // Root process: Must copy Y_local into Y_local_tmp
00427       for( int j = 0; j < Y_local.numSubCols(); ++j ) {
00428         Scalar *Y_local_j = Y_local.values() + Y_local.leadingDim()*j;
00429         std::copy( Y_local_j, Y_local_j + Y_local.subDim(), Y_local_tmp.values() + Y_local_tmp.leadingDim()*j );
00430       }
00431       localBeta = beta;
00432     }
00433     else {
00434       // Not the root process
00435       localBeta = 0.0;
00436     }
00437   }
00438   else {
00439     // Local
00440     Y_local_tmp = Y_local.smv(); // Shallow copy only!
00441     localBeta = beta;
00442   }
00443 
00444 #ifdef THYRA_MPI_MULTI_VECTOR_BASE_PRINT_TIMES
00445   timer.stop();
00446   std::cout << "\nMPIMultiVectorBase<Scalar>::apply(...): Time for setting up Y_local_tmp and localBeta = " << timer.totalElapsedTime() << " seconds\n";
00447 #endif
00448     
00449   //
00450   // Perform the local multiplication:
00451   //
00452   //     Y(local) = localBeta * Y(local) + alpha * op(M(local)) * X(local)
00453   //
00454   // or in BLAS lingo:
00455   //
00456   //     C        = beta      * C        + alpha * op(A)        * op(B)
00457   //
00458 
00459 #ifdef THYRA_MPI_MULTI_VECTOR_BASE_PRINT_TIMES
00460   timer.start();
00461 #endif
00462   Teuchos::ETransp t_transp;
00463   if(ST::isComplex) {
00464     switch(M_trans) {
00465       case NOTRANS:   t_transp = Teuchos::NO_TRANS;     break;
00466       case TRANS:     t_transp = Teuchos::TRANS;        break;
00467       case CONJTRANS: t_transp = Teuchos::CONJ_TRANS;   break;
00468       default: TEST_FOR_EXCEPT(true);
00469     }
00470   }
00471   else {
00472     switch(real_trans(M_trans)) {
00473       case NOTRANS:   t_transp = Teuchos::NO_TRANS;     break;
00474       case TRANS:     t_transp = Teuchos::TRANS;        break;
00475       default: TEST_FOR_EXCEPT(true);
00476     }
00477   }
00478   blas_.GEMM(
00479     t_transp                                                                  // TRANSA
00480     ,Teuchos::NO_TRANS                                                        // TRANSB
00481     ,Y_local.subDim()                                                         // M
00482     ,Y_local.numSubCols()                                                     // N
00483     ,real_trans(M_trans)==NOTRANS ? M_local.numSubCols() : M_local.subDim()   // K
00484     ,alpha                                                                    // ALPHA
00485     ,const_cast<Scalar*>(M_local.values())                                    // A
00486     ,M_local.leadingDim()                                                     // LDA
00487     ,const_cast<Scalar*>(X_local.values())                                    // B
00488     ,X_local.leadingDim()                                                     // LDB
00489     ,localBeta                                                                // BETA
00490     ,Y_local_tmp.values()                                                     // C
00491     ,Y_local_tmp.leadingDim()                                                 // LDC
00492     );
00493 #ifdef THYRA_MPI_MULTI_VECTOR_BASE_PRINT_TIMES
00494   timer.stop();
00495   std::cout << "\nMPIMultiVectorBase<Scalar>::apply(...): Time for GEMM = " << timer.totalElapsedTime() << " seconds\n";
00496 #endif
00497 
00498 #ifdef RTOp_USE_MPI
00499   if( mpiComm != MPI_COMM_NULL ) {
00500     
00501     //
00502     // Perform the global reduction of Y_local_tmp back into Y_local
00503     //
00504     
00505     if( real_trans(M_trans)==TRANS && globalDim_ > localSubDim_ ) {
00506       // Contiguous buffer for final reduction
00507       Workspace<Scalar> Y_local_final_buff(wss,Y_local.subDim()*Y_local.numSubCols(),false);
00508       // Perform the reduction
00509       MPI_Allreduce(
00510         Y_local_tmp.values()                           // sendbuff
00511         ,&Y_local_final_buff[0]                        // recvbuff
00512         ,RMT::adjustCount(Y_local_final_buff.size())   // count
00513         ,RMT::type()                                   // datatype
00514         ,RMT::sumOp()                                  // op
00515         ,mpiComm                                       // comm
00516         );
00517       // Load Y_local_final_buff back into Y_local
00518       const Scalar *Y_local_final_buff_ptr = &Y_local_final_buff[0];
00519       for( int j = 0; j < Y_local.numSubCols(); ++j ) {
00520         Scalar *Y_local_ptr = Y_local.values() + Y_local.leadingDim()*j;
00521         for( int i = 0; i < Y_local.subDim(); ++i ) {
00522           (*Y_local_ptr++) = (*Y_local_final_buff_ptr++);
00523         }
00524       }
00525     }
00526   }
00527   else {
00528 #endif // RTOp_USE_MPI
00529 
00530     // When you get here the view Y_local will be committed back to Y
00531     // in the destructor to Y_local
00532 
00533 #ifdef RTOp_USE_MPI
00534   }
00535 #endif
00536 
00537 #ifdef THYRA_MPI_MULTI_VECTOR_BASE_PRINT_TIMES
00538   timer.stop();
00539   std::cout << "\nMPIMultiVectorBase<Scalar>::apply(...): Total time = " << timerTotal.totalElapsedTime() << " seconds\n";
00540 #endif
00541 
00542 }
00543 
00544 // Overridden from SingleScalarEuclideanLinearOpBase
00545 
00546 template<class Scalar>
00547 bool MPIMultiVectorBase<Scalar>::opSupported(ETransp M_trans) const
00548 {
00549   typedef Teuchos::ScalarTraits<Scalar> ST;
00550   return ( ST::isComplex ? ( M_trans!=CONJ ) : true );
00551 }
00552 
00553 template<class Scalar>
00554 void MPIMultiVectorBase<Scalar>::updateMpiSpace()
00555 {
00556   if(globalDim_ == 0) {
00557     const MPIVectorSpaceBase<Scalar> *mpiSpace = this->mpiSpace().get();
00558     if(mpiSpace) {
00559       globalDim_    = mpiSpace->dim();
00560       localOffset_  = mpiSpace->localOffset();
00561       localSubDim_  = mpiSpace->localSubDim();
00562       numCols_      = this->domain()->dim();
00563     }
00564     else {
00565       globalDim_    = 0;
00566       localOffset_  = -1;
00567       localSubDim_  = 0;
00568       numCols_      = 0;
00569     }
00570   }
00571 }
00572 
00573 template<class Scalar>
00574 Range1D MPIMultiVectorBase<Scalar>::validateRowRange( const Range1D &rowRng_in ) const
00575 {
00576   const Range1D rowRng = RangePack::full_range(rowRng_in,1,globalDim_);
00577 #ifdef _DEBUG
00578   TEST_FOR_EXCEPTION(
00579     rowRng.lbound() < 1 || globalDim_ < rowRng.ubound(), std::invalid_argument
00580     ,"MPIMultiVectorBase<Scalar>::validateRowRange(rowRng): Error, the range rowRng = ["
00581     <<rowRng.lbound()<<","<<rowRng.ubound()<<"] is not "
00582     "in the range [1,"<<globalDim_<<"]!"
00583     );
00584 #endif
00585   return rowRng;
00586 }
00587 
00588 template<class Scalar>
00589 Range1D MPIMultiVectorBase<Scalar>::validateColRange( const Range1D &colRng_in ) const
00590 {
00591   const Range1D colRng = RangePack::full_range(colRng_in,1,numCols_);
00592 #ifdef _DEBUG
00593   TEST_FOR_EXCEPTION(
00594     colRng.lbound() < 1 || numCols_ < colRng.ubound(), std::invalid_argument
00595     ,"MPIMultiVectorBase<Scalar>::validateColRange(colRng): Error, the range colRng = ["
00596     <<colRng.lbound()<<","<<colRng.ubound()<<"] is not "
00597     "in the range [1,"<<numCols_<<"]!"
00598     );
00599 #endif
00600   return colRng;
00601 }
00602 
00603 } // end namespace Thyra
00604 
00605 #endif // THYRA_MPI_MULTI_VECTOR_BASE_HPP

Generated on Thu Sep 18 12:39:52 2008 for Thyra ANA Operator/VectorBase Interfaces and Related Software by doxygen 1.3.9.1