Thyra_SpmdMultiVectorBase.hpp

Go to the documentation of this file.
00001 // @HEADER
00002 // ***********************************************************************
00003 // 
00004 //    Thyra: Interfaces and Support for Abstract Numerical Algorithms
00005 //                 Copyright (2004) Sandia Corporation
00006 // 
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 // 
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //  
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //  
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00025 // 
00026 // ***********************************************************************
00027 // @HEADER
00028 
00029 #ifndef THYRA_SPMD_MULTI_VECTOR_BASE_HPP
00030 #define THYRA_SPMD_MULTI_VECTOR_BASE_HPP
00031 
00032 #include "Thyra_SpmdMultiVectorBaseDecl.hpp"
00033 #include "Thyra_MultiVectorDefaultBase.hpp"
00034 #include "Thyra_SingleScalarEuclideanLinearOpBase.hpp"
00035 #include "Thyra_SpmdVectorSpaceDefaultBase.hpp"
00036 #include "Thyra_DetachedMultiVectorView.hpp"
00037 #include "RTOpPack_SPMD_apply_op.hpp"
00038 #include "RTOp_parallel_helpers.h"
00039 #include "Teuchos_Workspace.hpp"
00040 #include "Teuchos_dyn_cast.hpp"
00041 #include "Teuchos_Time.hpp"
00042 #include "Teuchos_CommHelpers.hpp"
00043 
00044 // Define to see some timing output!
00045 //#define THYRA_SPMD_MULTI_VECTOR_BASE_PRINT_TIMES
00046 
00047 namespace Thyra {
00048 
00049 template<class Scalar>
00050 SpmdMultiVectorBase<Scalar>::SpmdMultiVectorBase()
00051   :in_applyOp_(false)
00052   ,globalDim_(0)
00053   ,localOffset_(-1)
00054   ,localSubDim_(0)
00055   ,numCols_(0)
00056 {}
00057 
00058 // Overridden from EuclideanLinearOpBase
00059 
00060 template<class Scalar>
00061 Teuchos::RefCountPtr< const ScalarProdVectorSpaceBase<Scalar> >
00062 SpmdMultiVectorBase<Scalar>::rangeScalarProdVecSpc() const
00063 {
00064   return Teuchos::rcp_dynamic_cast<const ScalarProdVectorSpaceBase<Scalar> >(
00065     spmdSpace(),true
00066     );
00067 }
00068 
00069 // Overridden from LinearOpBase
00070 
00071 template<class Scalar>
00072 void SpmdMultiVectorBase<Scalar>::apply(
00073   const ETransp                     M_trans
00074   ,const MultiVectorBase<Scalar>    &X
00075   ,MultiVectorBase<Scalar>          *Y
00076   ,const Scalar                     alpha
00077   ,const Scalar                     beta
00078   ) const
00079 {
00080   this->single_scalar_euclidean_apply_impl(M_trans,X,Y,alpha,beta);
00081 }
00082 
00083 // Overridden from MultiVectorBase
00084 
00085 template<class Scalar>
00086 void SpmdMultiVectorBase<Scalar>::applyOp(
00087   const RTOpPack::RTOpT<Scalar>         &pri_op
00088   ,const int                            num_multi_vecs
00089   ,const MultiVectorBase<Scalar>*const  multi_vecs[]
00090   ,const int                            num_targ_multi_vecs
00091   ,MultiVectorBase<Scalar>*const        targ_multi_vecs[]
00092   ,RTOpPack::ReductTarget*const         reduct_objs[]
00093   ,const Index                          pri_first_ele_offset_in
00094   ,const Index                          pri_sub_dim_in
00095   ,const Index                          pri_global_offset_in
00096   ,const Index                          sec_first_ele_offset_in
00097   ,const Index                          sec_sub_dim_in
00098   ) const
00099 {
00100   using Teuchos::dyn_cast;
00101   using Teuchos::Workspace;
00102   Teuchos::WorkspaceStore* wss = Teuchos::get_default_workspace_store().get();
00103   const Index numCols = this->domain()->dim();
00104   const SpmdVectorSpaceBase<Scalar> &spmdSpc = *spmdSpace();
00105 #ifdef TEUCHOS_DEBUG
00106   TEST_FOR_EXCEPTION(
00107     in_applyOp_, std::invalid_argument
00108     ,"SpmdMultiVectorBase<>::applyOp(...): Error, this method is being entered recursively which is a "
00109     "clear sign that one of the methods acquireDetachedView(...), releaseDetachedView(...) or commitDetachedView(...) "
00110     "was not implemented properly!"
00111     );
00112   apply_op_validate_input(
00113     "SpmdMultiVectorBase<>::applyOp(...)", *this->domain(), *this->range()
00114     ,pri_op,num_multi_vecs,multi_vecs,num_targ_multi_vecs,targ_multi_vecs
00115     ,reduct_objs,pri_first_ele_offset_in,pri_sub_dim_in,pri_global_offset_in
00116     ,sec_first_ele_offset_in,sec_sub_dim_in
00117     );
00118 #endif
00119   // Flag that we are in applyOp()
00120   in_applyOp_ = true;
00121   // First see if this is a locally replicated vector in which case
00122   // we treat this as a local operation only.
00123   const bool locallyReplicated = (localSubDim_ == globalDim_);
00124   // Get the overlap in the current process with the input logical sub-vector
00125   // from (first_ele_offset_in,sub_dim_in,global_offset_in)
00126   Teuchos_Index  overlap_first_local_ele_off  = 0;
00127   Teuchos_Index  overlap_local_sub_dim        = 0;
00128   Teuchos_Index  overlap_global_offset        = 0;
00129   RTOp_parallel_calc_overlap(
00130     globalDim_, localSubDim_, localOffset_, pri_first_ele_offset_in, pri_sub_dim_in, pri_global_offset_in
00131     ,&overlap_first_local_ele_off, &overlap_local_sub_dim, &overlap_global_offset
00132     );
00133   const Range1D
00134     local_rng = (
00135       overlap_first_local_ele_off>=0
00136       ? Range1D( localOffset_+overlap_first_local_ele_off, localOffset_+overlap_first_local_ele_off+overlap_local_sub_dim-1 )
00137       : Range1D::Invalid
00138       ),
00139     col_rng(
00140       sec_first_ele_offset_in
00141       ,sec_sub_dim_in >= 0 ? sec_first_ele_offset_in+sec_sub_dim_in-1 : numCols-1
00142       );
00143   // Create sub-vector views of all of the *participating* local data
00144   Workspace<RTOpPack::ConstSubMultiVectorView<Scalar> > sub_multi_vecs(wss,num_multi_vecs);
00145   Workspace<RTOpPack::SubMultiVectorView<Scalar> > targ_sub_multi_vecs(wss,num_targ_multi_vecs);
00146   if( overlap_first_local_ele_off >= 0 ) {
00147     for(int k = 0; k < num_multi_vecs; ++k ) {
00148       multi_vecs[k]->acquireDetachedView( local_rng, col_rng, &sub_multi_vecs[k] );
00149       sub_multi_vecs[k].setGlobalOffset( overlap_global_offset );
00150     }
00151     for(int k = 0; k < num_targ_multi_vecs; ++k ) {
00152       targ_multi_vecs[k]->acquireDetachedView( local_rng, col_rng, &targ_sub_multi_vecs[k] );
00153       targ_sub_multi_vecs[k].setGlobalOffset( overlap_global_offset );
00154     }
00155   }
00156   // Apply the RTOp operator object (all processors must participate)
00157   RTOpPack::SPMD_apply_op(
00158     locallyReplicated ? NULL : spmdSpc.getComm().get()                                 // comm
00159     ,pri_op                                                                            // op
00160     ,col_rng.size()                                                                    // num_cols
00161     ,num_multi_vecs                                                                    // num_multi_vecs
00162     ,num_multi_vecs && overlap_first_local_ele_off>=0 ? &sub_multi_vecs[0] : NULL      // sub_multi_vecs
00163     ,num_targ_multi_vecs                                                               // num_targ_multi_vecs
00164     ,num_targ_multi_vecs && overlap_first_local_ele_off>=0 ? &targ_sub_multi_vecs[0] : NULL// targ_sub_multi_vecs
00165     ,reduct_objs                                                                       // reduct_objs
00166     );
00167   // Free and commit the local data
00168   if( overlap_first_local_ele_off >= 0 ) {
00169     for(int k = 0; k < num_multi_vecs; ++k ) {
00170       sub_multi_vecs[k].setGlobalOffset(local_rng.lbound());
00171       multi_vecs[k]->releaseDetachedView( &sub_multi_vecs[k] );
00172     }
00173     for(int k = 0; k < num_targ_multi_vecs; ++k ) {
00174       targ_sub_multi_vecs[k].setGlobalOffset(local_rng.lbound());
00175       targ_multi_vecs[k]->commitDetachedView( &targ_sub_multi_vecs[k] );
00176     }
00177   }
00178   // Flag that we are leaving applyOp()
00179   in_applyOp_ = false;
00180 }
00181 
00182 template<class Scalar>
00183 void SpmdMultiVectorBase<Scalar>::acquireDetachedView(
00184   const Range1D                       &rowRng_in
00185   ,const Range1D                      &colRng_in
00186   ,RTOpPack::ConstSubMultiVectorView<Scalar>  *sub_mv
00187   ) const
00188 {
00189   const Range1D rowRng = validateRowRange(rowRng_in);
00190   const Range1D colRng = validateColRange(colRng_in);
00191   if( rowRng.lbound() < localOffset_ || localOffset_+localSubDim_-1 < rowRng.ubound() ) {
00192     // rng consists of off-processor elements so use the default implementation!
00193     MultiVectorDefaultBase<Scalar>::acquireDetachedView(rowRng_in,colRng_in,sub_mv);
00194     return;
00195   }
00196   // rng consists of all local data so get it!
00197   const Scalar *localValues = NULL;
00198   int leadingDim = 0;
00199   this->getLocalData(&localValues,&leadingDim);
00200   sub_mv->initialize(
00201     rowRng.lbound()                               // globalOffset
00202     ,rowRng.size()                                // subDim
00203     ,colRng.lbound()                              // colOffset
00204     ,colRng.size()                                // numSubCols
00205     ,localValues
00206     +(rowRng.lbound()-localOffset_)
00207     +colRng.lbound()*leadingDim                   // values
00208     ,leadingDim                                   // leadingDim
00209     );
00210 }
00211 
00212 template<class Scalar>
00213 void SpmdMultiVectorBase<Scalar>::releaseDetachedView(
00214   RTOpPack::ConstSubMultiVectorView<Scalar>* sub_mv
00215   ) const
00216 {
00217   if( sub_mv->globalOffset() < localOffset_ || localOffset_+localSubDim_ < sub_mv->globalOffset()+sub_mv->subDim() ) {
00218     // Let the default implementation handle it!
00219     MultiVectorDefaultBase<Scalar>::releaseDetachedView(sub_mv);
00220     return;
00221   }
00222   freeLocalData( sub_mv->values() );
00223   sub_mv->set_uninitialized();
00224 }
00225 
00226 template<class Scalar>
00227 void SpmdMultiVectorBase<Scalar>::acquireDetachedView(
00228   const Range1D                                &rowRng_in
00229   ,const Range1D                               &colRng_in
00230   ,RTOpPack::SubMultiVectorView<Scalar>    *sub_mv
00231   )
00232 {
00233   const Range1D rowRng = validateRowRange(rowRng_in);
00234   const Range1D colRng = validateColRange(colRng_in);
00235   if( rowRng.lbound() < localOffset_ || localOffset_+localSubDim_-1 < rowRng.ubound() ) {
00236     // rng consists of off-processor elements so use the default implementation!
00237     MultiVectorDefaultBase<Scalar>::acquireDetachedView(rowRng_in,colRng_in,sub_mv);
00238     return;
00239   }
00240   // rng consists of all local data so get it!
00241   Scalar *localValues = NULL;
00242   int leadingDim = 0;
00243   this->getLocalData(&localValues,&leadingDim);
00244   sub_mv->initialize(
00245     rowRng.lbound()                               // globalOffset
00246     ,rowRng.size()                                // subDim
00247     ,colRng.lbound()                              // colOffset
00248     ,colRng.size()                                // numSubCols
00249     ,localValues
00250     +(rowRng.lbound()-localOffset_)
00251     +colRng.lbound()*leadingDim                   // values
00252     ,leadingDim                                   // leadingDim
00253     );
00254 }
00255 
00256 template<class Scalar>
00257 void SpmdMultiVectorBase<Scalar>::commitDetachedView(
00258   RTOpPack::SubMultiVectorView<Scalar>* sub_mv
00259   )
00260 {
00261   if( sub_mv->globalOffset() < localOffset_ || localOffset_+localSubDim_ < sub_mv->globalOffset()+sub_mv->subDim() ) {
00262     // Let the default implementation handle it!
00263     MultiVectorDefaultBase<Scalar>::commitDetachedView(sub_mv);
00264     return;
00265   }
00266   commitLocalData( sub_mv->values() );
00267   sub_mv->set_uninitialized();
00268 }
00269 
00270 // protected
00271 
00272 template<class Scalar>
00273 void SpmdMultiVectorBase<Scalar>::euclideanApply(
00274   const ETransp                     M_trans
00275   ,const MultiVectorBase<Scalar>    &X
00276   ,MultiVectorBase<Scalar>          *Y
00277   ,const Scalar                     alpha
00278   ,const Scalar                     beta
00279   ) const
00280 {
00281   typedef Teuchos::ScalarTraits<Scalar> ST;
00282   using Teuchos::Workspace;
00283   Teuchos::WorkspaceStore* wss = Teuchos::get_default_workspace_store().get();
00284 
00285 #ifdef THYRA_SPMD_MULTI_VECTOR_BASE_PRINT_TIMES
00286   Teuchos::Time timerTotal("dummy",true);
00287   Teuchos::Time timer("dummy");
00288 #endif
00289 
00290   //
00291   // This function performs one of two operations.
00292   //
00293   // The first operation (M_trans == NOTRANS) is:
00294   //
00295   //     Y = beta * Y + alpha * M * X
00296   //
00297   // where Y and M have compatible (distributed?) range vector
00298   // spaces and X is a locally replicated serial multi-vector.  This
00299   // operation does not require any global communication.
00300   //
00301   // The second operation (M_trans == TRANS) is:
00302   //
00303   //     Y = beta * Y + alpha * M' * X
00304   //
00305   // where M and X have compatible (distributed?) range vector spaces
00306   // and Y is a locally replicated serial multi-vector.  This operation
00307   // requires a local reduction.
00308   //
00309 
00310   //
00311   // Get spaces and validate compatibility
00312   //
00313 
00314   // Get the SpmdVectorSpace
00315   const SpmdVectorSpaceBase<Scalar> &spmdSpc = *this->spmdSpace();
00316 
00317   // Get the Spmd communicator
00318   const Teuchos::RefCountPtr<const Teuchos::Comm<Index> >
00319     comm = spmdSpc.getComm();
00320 #ifdef TEUCHOS_DEBUG
00321   const VectorSpaceBase<Scalar>
00322     &Y_range = *Y->range(),
00323     &X_range = *X.range();
00324 //  std::cout << "SpmdMultiVectorBase<Scalar>::apply(...): comm = " << comm << std::endl;
00325   TEST_FOR_EXCEPTION(
00326     ( globalDim_ > localSubDim_ ) && comm.get()==NULL, std::logic_error
00327     ,"SpmdMultiVectorBase<Scalar>::apply(...MultiVectorBase<Scalar>...): Error!"
00328     );
00329   // ToDo: Write a good general validation function that I can call that will replace
00330   // all of these TEST_FOR_EXCEPTION(...) uses
00331 
00332   TEST_FOR_EXCEPTION(
00333     real_trans(M_trans)==NOTRANS && !spmdSpc.isCompatible(Y_range), Exceptions::IncompatibleVectorSpaces
00334     ,"SpmdMultiVectorBase<Scalar>::apply(...MultiVectorBase<Scalar>...): Error!"
00335     );
00336   TEST_FOR_EXCEPTION(
00337     real_trans(M_trans)==TRANS && !spmdSpc.isCompatible(X_range), Exceptions::IncompatibleVectorSpaces
00338     ,"SpmdMultiVectorBase<Scalar>::apply(...MultiVectorBase<Scalar>...): Error!"
00339     );
00340 #endif
00341 
00342   //
00343   // Get explicit (local) views of Y, M and X
00344   //
00345 
00346 #ifdef THYRA_SPMD_MULTI_VECTOR_BASE_PRINT_TIMES
00347   timer.start();
00348 #endif
00349     
00350   DetachedMultiVectorView<Scalar>
00351     Y_local(
00352       *Y
00353       ,real_trans(M_trans)==NOTRANS ? Range1D(localOffset_,localOffset_+localSubDim_-1) : Range1D()
00354       ,Range1D()
00355       );
00356   ConstDetachedMultiVectorView<Scalar>
00357     M_local(
00358       *this
00359       ,Range1D(localOffset_,localOffset_+localSubDim_-1)
00360       ,Range1D()
00361       );
00362   ConstDetachedMultiVectorView<Scalar>
00363     X_local(
00364       X
00365       ,real_trans(M_trans)==NOTRANS ? Range1D() : Range1D(localOffset_,localOffset_+localSubDim_-1)
00366       ,Range1D()
00367       );
00368 #ifdef THYRA_SPMD_MULTI_VECTOR_BASE_PRINT_TIMES
00369   timer.stop();
00370   std::cout << "\nSpmdMultiVectorBase<Scalar>::apply(...): Time for getting view = " << timer.totalElapsedTime() << " seconds\n";
00371 #endif
00372 #ifdef TEUCHOS_DEBUG    
00373   TEST_FOR_EXCEPTION(
00374     real_trans(M_trans)==NOTRANS && ( M_local.numSubCols() != X_local.subDim() || X_local.numSubCols() != Y_local.numSubCols() )
00375     , Exceptions::IncompatibleVectorSpaces
00376     ,"SpmdMultiVectorBase<Scalar>::apply(...MultiVectorBase<Scalar>...): Error!"
00377     );
00378   TEST_FOR_EXCEPTION(
00379     real_trans(M_trans)==TRANS && ( M_local.subDim() != X_local.subDim() || X_local.numSubCols() != Y_local.numSubCols() )
00380     , Exceptions::IncompatibleVectorSpaces
00381     ,"SpmdMultiVectorBase<Scalar>::apply(...MultiVectorBase<Scalar>...): Error!"
00382     );
00383 #endif
00384 
00385   //
00386   // If nonlocal (i.e. M_trans==TRANS) then create temporary storage
00387   // for:
00388   //
00389   //     Y_local_tmp = alpha * M(local) * X(local)                 : on nonroot processes
00390   //
00391   // or
00392   //
00393   //     Y_local_tmp = beta*Y_local + alpha * M(local) * X(local)  : on root process (localOffset_==0)
00394   // 
00395   // and set
00396   //
00397   //     localBeta = ( localOffset_ == 0 ? beta : 0.0 )
00398   //
00399   // Above, we choose localBeta such that we will only perform
00400   // Y_local = beta * Y_local + ...  on one process (the root
00401   // process where localOffset_==0x).  Then, when we add up Y_local
00402   // on all of the processors and we will get the correct result.
00403   //
00404   // If strictly local (i.e. M_trans == NOTRANS) then set:
00405   //
00406   //      Y_local_tmp = Y_local
00407   //      localBeta = beta
00408   //
00409 
00410 #ifdef THYRA_SPMD_MULTI_VECTOR_BASE_PRINT_TIMES
00411   timer.start();
00412 #endif
00413     
00414   Workspace<Scalar> Y_local_tmp_store(wss,Y_local.subDim()*Y_local.numSubCols(),false);
00415   RTOpPack::SubMultiVectorView<Scalar> Y_local_tmp;
00416   Scalar localBeta;
00417   if( real_trans(M_trans) == TRANS && globalDim_ > localSubDim_ ) {
00418     // Nonlocal
00419     Y_local_tmp.initialize(
00420       0, Y_local.subDim()
00421       ,0, Y_local.numSubCols()
00422       ,&Y_local_tmp_store[0], Y_local.subDim() // leadingDim == subDim (columns are adjacent)
00423       );
00424     if( localOffset_ == 0 ) {
00425       // Root process: Must copy Y_local into Y_local_tmp
00426       for( int j = 0; j < Y_local.numSubCols(); ++j ) {
00427         Scalar *Y_local_j = Y_local.values() + Y_local.leadingDim()*j;
00428         std::copy( Y_local_j, Y_local_j + Y_local.subDim(), Y_local_tmp.values() + Y_local_tmp.leadingDim()*j );
00429       }
00430       localBeta = beta;
00431     }
00432     else {
00433       // Not the root process
00434       localBeta = 0.0;
00435     }
00436   }
00437   else {
00438     // Local
00439     Y_local_tmp = Y_local.smv(); // Shallow copy only!
00440     localBeta = beta;
00441   }
00442 
00443 #ifdef THYRA_SPMD_MULTI_VECTOR_BASE_PRINT_TIMES
00444   timer.stop();
00445   std::cout << "\nSpmdMultiVectorBase<Scalar>::apply(...): Time for setting up Y_local_tmp and localBeta = " << timer.totalElapsedTime() << " seconds\n";
00446 #endif
00447     
00448   //
00449   // Perform the local multiplication:
00450   //
00451   //     Y(local) = localBeta * Y(local) + alpha * op(M(local)) * X(local)
00452   //
00453   // or in BLAS lingo:
00454   //
00455   //     C        = beta      * C        + alpha * op(A)        * op(B)
00456   //
00457 
00458 #ifdef THYRA_SPMD_MULTI_VECTOR_BASE_PRINT_TIMES
00459   timer.start();
00460 #endif
00461   Teuchos::ETransp t_transp;
00462   if(ST::isComplex) {
00463     switch(M_trans) {
00464       case NOTRANS:   t_transp = Teuchos::NO_TRANS;     break;
00465       case TRANS:     t_transp = Teuchos::TRANS;        break;
00466       case CONJTRANS: t_transp = Teuchos::CONJ_TRANS;   break;
00467       default: TEST_FOR_EXCEPT(true);
00468     }
00469   }
00470   else {
00471     switch(real_trans(M_trans)) {
00472       case NOTRANS:   t_transp = Teuchos::NO_TRANS;     break;
00473       case TRANS:     t_transp = Teuchos::TRANS;        break;
00474       default: TEST_FOR_EXCEPT(true);
00475     }
00476   }
00477   blas_.GEMM(
00478     t_transp                                                                  // TRANSA
00479     ,Teuchos::NO_TRANS                                                        // TRANSB
00480     ,Y_local.subDim()                                                         // M
00481     ,Y_local.numSubCols()                                                     // N
00482     ,real_trans(M_trans)==NOTRANS ? M_local.numSubCols() : M_local.subDim()   // K
00483     ,alpha                                                                    // ALPHA
00484     ,const_cast<Scalar*>(M_local.values())                                    // A
00485     ,M_local.leadingDim()                                                     // LDA
00486     ,const_cast<Scalar*>(X_local.values())                                    // B
00487     ,X_local.leadingDim()                                                     // LDB
00488     ,localBeta                                                                // BETA
00489     ,Y_local_tmp.values()                                                     // C
00490     ,Y_local_tmp.leadingDim()                                                 // LDC
00491     );
00492 #ifdef THYRA_SPMD_MULTI_VECTOR_BASE_PRINT_TIMES
00493   timer.stop();
00494   std::cout
00495     << "\nSpmdMultiVectorBase<Scalar>::apply(...): Time for GEMM = "
00496     << timer.totalElapsedTime() << " seconds\n";
00497 #endif
00498 
00499   if( comm.get() ) {
00500     
00501     //
00502     // Perform the global reduction of Y_local_tmp back into Y_local
00503     //
00504     
00505     if( real_trans(M_trans)==TRANS && globalDim_ > localSubDim_ ) {
00506       // Contiguous buffer for final reduction
00507       Workspace<Scalar> Y_local_final_buff(wss,Y_local.subDim()*Y_local.numSubCols(),false);
00508       // Perform the reduction
00509       Teuchos::reduceAll<Index,Scalar>(
00510         *comm,Teuchos::REDUCE_SUM,Y_local_final_buff.size(),Y_local_tmp.values()
00511         ,&Y_local_final_buff[0]
00512         );
00513       // Load Y_local_final_buff back into Y_local
00514       const Scalar *Y_local_final_buff_ptr = &Y_local_final_buff[0];
00515       for( int j = 0; j < Y_local.numSubCols(); ++j ) {
00516         Scalar *Y_local_ptr = Y_local.values() + Y_local.leadingDim()*j;
00517         for( int i = 0; i < Y_local.subDim(); ++i ) {
00518           (*Y_local_ptr++) = (*Y_local_final_buff_ptr++);
00519         }
00520       }
00521     }
00522   }
00523   else {
00524 
00525     // When you get here the view Y_local will be committed back to Y
00526     // in the destructor to Y_local
00527 
00528   }
00529 
00530 #ifdef THYRA_SPMD_MULTI_VECTOR_BASE_PRINT_TIMES
00531   timer.stop();
00532   std::cout 
00533     << "\nSpmdMultiVectorBase<Scalar>::apply(...): Total time = "
00534     << timerTotal.totalElapsedTime() << " seconds\n";
00535 #endif
00536 
00537 }
00538 
00539 // Overridden from SingleScalarEuclideanLinearOpBase
00540 
00541 template<class Scalar>
00542 bool SpmdMultiVectorBase<Scalar>::opSupported(ETransp M_trans) const
00543 {
00544   typedef Teuchos::ScalarTraits<Scalar> ST;
00545   return ( ST::isComplex ? ( M_trans!=CONJ ) : true );
00546 }
00547 
00548 template<class Scalar>
00549 void SpmdMultiVectorBase<Scalar>::updateSpmdSpace()
00550 {
00551   if(globalDim_ == 0) {
00552     const SpmdVectorSpaceBase<Scalar> *spmdSpace = this->spmdSpace().get();
00553     if(spmdSpace) {
00554       globalDim_    = spmdSpace->dim();
00555       localOffset_  = spmdSpace->localOffset();
00556       localSubDim_  = spmdSpace->localSubDim();
00557       numCols_      = this->domain()->dim();
00558     }
00559     else {
00560       globalDim_    = 0;
00561       localOffset_  = -1;
00562       localSubDim_  = 0;
00563       numCols_      = 0;
00564     }
00565   }
00566 }
00567 
00568 template<class Scalar>
00569 Range1D SpmdMultiVectorBase<Scalar>::validateRowRange( const Range1D &rowRng_in ) const
00570 {
00571   const Range1D rowRng = Teuchos::full_range(rowRng_in,0,globalDim_-1);
00572 #ifdef TEUCHOS_DEBUG
00573   TEST_FOR_EXCEPTION(
00574     !( 0 <= rowRng.lbound() && rowRng.ubound() < globalDim_ ), std::invalid_argument
00575     ,"SpmdMultiVectorBase<Scalar>::validateRowRange(rowRng): Error, the range rowRng = ["
00576     <<rowRng.lbound()<<","<<rowRng.ubound()<<"] is not "
00577     "in the range [0,"<<(globalDim_-1)<<"]!"
00578     );
00579 #endif
00580   return rowRng;
00581 }
00582 
00583 template<class Scalar>
00584 Range1D SpmdMultiVectorBase<Scalar>::validateColRange( const Range1D &colRng_in ) const
00585 {
00586   const Range1D colRng = Teuchos::full_range(colRng_in,0,numCols_-1);
00587 #ifdef TEUCHOS_DEBUG
00588   TEST_FOR_EXCEPTION(
00589     !(0 <= colRng.lbound() && colRng.ubound() < numCols_), std::invalid_argument
00590     ,"SpmdMultiVectorBase<Scalar>::validateColRange(colRng): Error, the range colRng = ["
00591     <<colRng.lbound()<<","<<colRng.ubound()<<"] is not "
00592     "in the range [0,"<<(numCols_-1)<<"]!"
00593     );
00594 #endif
00595   return colRng;
00596 }
00597 
00598 } // end namespace Thyra
00599 
00600 #endif // THYRA_SPMD_MULTI_VECTOR_BASE_HPP

Generated on Thu Sep 18 12:33:03 2008 for Thyra Package Browser (Single Doxygen Collection) by doxygen 1.3.9.1