Thyra Version of the Day
Thyra_SpmdMultiVectorBase_def.hpp
00001 // @HEADER
00002 // ***********************************************************************
00003 // 
00004 //    Thyra: Interfaces and Support for Abstract Numerical Algorithms
00005 //                 Copyright (2004) Sandia Corporation
00006 // 
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 // 
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Roscoe A. Bartlett (bartlettra@ornl.gov) 
00038 // 
00039 // ***********************************************************************
00040 // @HEADER
00041 
00042 #ifndef THYRA_SPMD_MULTI_VECTOR_BASE_DEF_HPP
00043 #define THYRA_SPMD_MULTI_VECTOR_BASE_DEF_HPP
00044 
00045 
00046 #include "Thyra_SpmdMultiVectorBase_decl.hpp"
00047 #include "Thyra_MultiVectorDefaultBase.hpp"
00048 #include "Thyra_MultiVectorAdapterBase.hpp"
00049 #include "Thyra_SpmdVectorSpaceDefaultBase.hpp"
00050 #include "Thyra_DetachedMultiVectorView.hpp"
00051 #include "Thyra_apply_op_helper.hpp"
00052 #include "RTOpPack_SPMD_apply_op.hpp"
00053 #include "RTOp_parallel_helpers.h"
00054 #include "Teuchos_Workspace.hpp"
00055 #include "Teuchos_dyn_cast.hpp"
00056 #include "Teuchos_Time.hpp"
00057 #include "Teuchos_CommHelpers.hpp"
00058 
00059 
00060 // Define to see some timing output!
00061 //#define THYRA_SPMD_MULTI_VECTOR_BASE_PRINT_TIMES
00062 
00063 
00064 namespace Thyra {
00065 
00066 
00067 template<class Scalar>
00068 SpmdMultiVectorBase<Scalar>::SpmdMultiVectorBase()
00069   :in_applyOp_(false),
00070    globalDim_(0),
00071    localOffset_(-1),
00072    localSubDim_(0),
00073    numCols_(0)
00074 {}
00075 
00076 
00077 // Overridden public functions from MultiVectorAdapterBase
00078 
00079 
00080 template<class Scalar>
00081 RCP< const ScalarProdVectorSpaceBase<Scalar> >
00082 SpmdMultiVectorBase<Scalar>::rangeScalarProdVecSpc() const
00083 {
00084   return Teuchos::rcp_dynamic_cast<const ScalarProdVectorSpaceBase<Scalar> >(
00085     spmdSpace(), true
00086     );
00087 }
00088 
00089 
00090 // Protected functions overridden from MultiVectorBase
00091 
00092 
00093 template<class Scalar>
00094 void SpmdMultiVectorBase<Scalar>::mvMultiReductApplyOpImpl(
00095   const RTOpPack::RTOpT<Scalar> &pri_op,
00096   const ArrayView<const Ptr<const MultiVectorBase<Scalar> > > &multi_vecs,
00097   const ArrayView<const Ptr<MultiVectorBase<Scalar> > > &targ_multi_vecs,
00098   const ArrayView<const Ptr<RTOpPack::ReductTarget> > &reduct_objs,
00099   const Ordinal pri_global_offset_in
00100   ) const
00101 {
00102 
00103   using Teuchos::dyn_cast;
00104   using Teuchos::Workspace;
00105 
00106   Teuchos::WorkspaceStore* wss = Teuchos::get_default_workspace_store().get();
00107 
00108   const Ordinal numCols = this->domain()->dim();
00109   const SpmdVectorSpaceBase<Scalar> &spmdSpc = *spmdSpace();
00110 
00111 #ifdef TEUCHOS_DEBUG
00112   TEUCHOS_TEST_FOR_EXCEPTION(
00113     in_applyOp_, std::invalid_argument,
00114     "SpmdMultiVectorBase<>::mvMultiReductApplyOpImpl(...): Error, this method is"
00115     " being entered recursively which is a clear sign that one of the methods"
00116     " acquireDetachedView(...), releaseDetachedView(...) or commitDetachedView(...)"
00117     " was not implemented properly!"
00118     );
00119   apply_op_validate_input(
00120     "SpmdMultiVectorBase<>::mvMultiReductApplyOpImpl(...)", *this->domain(),
00121     *this->range(), pri_op, multi_vecs, targ_multi_vecs, reduct_objs,
00122     pri_global_offset_in);
00123 #endif
00124 
00125   // Flag that we are in applyOp()
00126   in_applyOp_ = true;
00127 
00128   // First see if this is a locally replicated vector in which case
00129   // we treat this as a local operation only.
00130   const bool locallyReplicated = (localSubDim_ == globalDim_);
00131 
00132   const Range1D local_rng(localOffset_, localOffset_+localSubDim_-1);
00133   const Range1D col_rng(0, numCols-1);
00134 
00135   // Create sub-vector views of all of the *participating* local data
00136   Workspace<RTOpPack::ConstSubMultiVectorView<Scalar> >
00137     sub_multi_vecs(wss,multi_vecs.size());
00138   Workspace<RTOpPack::SubMultiVectorView<Scalar> >
00139     targ_sub_multi_vecs(wss,targ_multi_vecs.size());
00140   for(int k = 0; k < multi_vecs.size(); ++k ) {
00141     multi_vecs[k]->acquireDetachedView(local_rng, col_rng, &sub_multi_vecs[k]);
00142     sub_multi_vecs[k].setGlobalOffset(localOffset_+pri_global_offset_in);
00143   }
00144   for(int k = 0; k < targ_multi_vecs.size(); ++k ) {
00145     targ_multi_vecs[k]->acquireDetachedView(local_rng, col_rng, &targ_sub_multi_vecs[k]);
00146     targ_sub_multi_vecs[k].setGlobalOffset(localOffset_+pri_global_offset_in);
00147   }
00148   Workspace<RTOpPack::ReductTarget*> reduct_objs_ptr(wss, reduct_objs.size());
00149   for (int k = 0; k < reduct_objs.size(); ++k) {
00150     reduct_objs_ptr[k] = &*reduct_objs[k];
00151   }
00152 
00153   // Apply the RTOp operator object (all processors must participate)
00154   RTOpPack::SPMD_apply_op(
00155     locallyReplicated ? NULL : spmdSpc.getComm().get(), // comm
00156     pri_op, // op
00157     col_rng.size(), // num_cols
00158     multi_vecs.size(), // multi_vecs.size()
00159     multi_vecs.size() ? &sub_multi_vecs[0] : NULL, // sub_multi_vecs
00160     targ_multi_vecs.size(), // targ_multi_vecs.size()
00161     targ_multi_vecs.size() ? &targ_sub_multi_vecs[0] : NULL, // targ_sub_multi_vecs
00162     reduct_objs.size() ? &reduct_objs_ptr[0] : 0 // reduct_objs
00163     );
00164 
00165   // Free and commit the local data
00166   for(int k = 0; k < multi_vecs.size(); ++k ) {
00167     sub_multi_vecs[k].setGlobalOffset(local_rng.lbound());
00168     multi_vecs[k]->releaseDetachedView( &sub_multi_vecs[k] );
00169   }
00170   for(int k = 0; k < targ_multi_vecs.size(); ++k ) {
00171     targ_sub_multi_vecs[k].setGlobalOffset(local_rng.lbound());
00172     targ_multi_vecs[k]->commitDetachedView( &targ_sub_multi_vecs[k] );
00173   }
00174 
00175   // Flag that we are leaving applyOp()
00176   in_applyOp_ = false;
00177 
00178 }
00179 
00180 
00181 template<class Scalar>
00182 void SpmdMultiVectorBase<Scalar>::acquireDetachedMultiVectorViewImpl(
00183   const Range1D &rowRng_in,
00184   const Range1D &colRng_in,
00185   RTOpPack::ConstSubMultiVectorView<Scalar> *sub_mv
00186   ) const
00187 {
00188   using Teuchos::outArg;
00189   const Range1D rowRng = validateRowRange(rowRng_in);
00190   const Range1D colRng = validateColRange(colRng_in);
00191   if( rowRng.lbound() < localOffset_ || localOffset_+localSubDim_-1 < rowRng.ubound() ) {
00192     // rng consists of off-processor elements so use the default implementation!
00193     MultiVectorDefaultBase<Scalar>::acquireDetachedMultiVectorViewImpl(
00194       rowRng_in,colRng_in,sub_mv
00195       );
00196     return;
00197   }
00198   ArrayRCP<const Scalar> localValues;
00199   Ordinal leadingDim = 0;
00200   this->getLocalData(outArg(localValues), outArg(leadingDim));
00201   sub_mv->initialize(
00202     rowRng.lbound(), // globalOffset
00203     rowRng.size(), // subDim
00204     colRng.lbound(), // colOffset
00205     colRng.size(), // numSubCols
00206     localValues
00207     +(rowRng.lbound()-localOffset_)
00208     +colRng.lbound()*leadingDim, // values
00209     leadingDim // leadingDim
00210     );
00211 }
00212 
00213 
00214 template<class Scalar>
00215 void SpmdMultiVectorBase<Scalar>::releaseDetachedMultiVectorViewImpl(
00216   RTOpPack::ConstSubMultiVectorView<Scalar>* sub_mv
00217   ) const
00218 {
00219   if(
00220     sub_mv->globalOffset() < localOffset_ 
00221     ||
00222     localOffset_+localSubDim_ < sub_mv->globalOffset()+sub_mv->subDim()
00223     )
00224   {
00225     // Let the default implementation handle it!
00226     MultiVectorDefaultBase<Scalar>::releaseDetachedMultiVectorViewImpl(sub_mv);
00227     return;
00228   }
00229   sub_mv->uninitialize();
00230 }
00231 
00232 
00233 template<class Scalar>
00234 void SpmdMultiVectorBase<Scalar>::acquireNonconstDetachedMultiVectorViewImpl(
00235   const Range1D &rowRng_in,
00236   const Range1D &colRng_in,
00237   RTOpPack::SubMultiVectorView<Scalar> *sub_mv
00238   )
00239 {
00240   using Teuchos::outArg;
00241   const Range1D rowRng = validateRowRange(rowRng_in);
00242   const Range1D colRng = validateColRange(colRng_in);
00243   if(
00244     rowRng.lbound() < localOffset_
00245     ||
00246     localOffset_+localSubDim_-1 < rowRng.ubound()
00247     )
00248   {
00249     // rng consists of off-processor elements so use the default implementation!
00250     MultiVectorDefaultBase<Scalar>::acquireNonconstDetachedMultiVectorViewImpl(
00251       rowRng_in, colRng_in, sub_mv
00252       );
00253     return;
00254   }
00255   ArrayRCP<Scalar> localValues;
00256   Ordinal leadingDim = 0;
00257   this->getNonconstLocalData(outArg(localValues), outArg(leadingDim));
00258   sub_mv->initialize(
00259     rowRng.lbound() // globalOffset
00260     ,rowRng.size() // subDim
00261     ,colRng.lbound() // colOffset
00262     ,colRng.size() // numSubCols
00263     ,localValues
00264     +(rowRng.lbound()-localOffset_)
00265     +colRng.lbound()*leadingDim // values
00266     ,leadingDim // leadingDim
00267     );
00268 }
00269 
00270 
00271 template<class Scalar>
00272 void SpmdMultiVectorBase<Scalar>::commitNonconstDetachedMultiVectorViewImpl(
00273   RTOpPack::SubMultiVectorView<Scalar>* sub_mv
00274   )
00275 {
00276   if(
00277     sub_mv->globalOffset() < localOffset_
00278     ||
00279     localOffset_+localSubDim_ < sub_mv->globalOffset()+sub_mv->subDim()
00280     )
00281   {
00282     // Let the default implementation handle it!
00283     MultiVectorDefaultBase<Scalar>::commitNonconstDetachedMultiVectorViewImpl(sub_mv);
00284     return;
00285   }
00286   sub_mv->uninitialize();
00287 }
00288 
00289 
00290 // Protected functions overridden from MultiVectorAdapterBase
00291 
00292 
00293 template<class Scalar>
00294 void SpmdMultiVectorBase<Scalar>::euclideanApply(
00295   const EOpTransp M_trans,
00296   const MultiVectorBase<Scalar> &X,
00297   const Ptr<MultiVectorBase<Scalar> > &Y,
00298   const Scalar alpha,
00299   const Scalar beta
00300   ) const
00301 {
00302   typedef Teuchos::ScalarTraits<Scalar> ST;
00303   using Teuchos::Workspace;
00304   Teuchos::WorkspaceStore* wss = Teuchos::get_default_workspace_store().get();
00305 
00306 #ifdef THYRA_SPMD_MULTI_VECTOR_BASE_PRINT_TIMES
00307   Teuchos::Time timerTotal("dummy",true);
00308   Teuchos::Time timer("dummy");
00309 #endif
00310 
00311   //
00312   // This function performs one of two operations.
00313   //
00314   // The first operation (M_trans == NOTRANS) is:
00315   //
00316   // Y = beta * Y + alpha * M * X
00317   //
00318   // where Y and M have compatible (distributed?) range vector
00319   // spaces and X is a locally replicated serial multi-vector. This
00320   // operation does not require any global communication.
00321   //
00322   // The second operation (M_trans == TRANS) is:
00323   //
00324   // Y = beta * Y + alpha * M' * X
00325   //
00326   // where M and X have compatible (distributed?) range vector spaces
00327   // and Y is a locally replicated serial multi-vector. This operation
00328   // requires a local reduction.
00329   //
00330 
00331   //
00332   // Get spaces and validate compatibility
00333   //
00334 
00335   // Get the SpmdVectorSpace
00336   const SpmdVectorSpaceBase<Scalar> &spmdSpc = *this->spmdSpace();
00337 
00338   // Get the Spmd communicator
00339   const RCP<const Teuchos::Comm<Ordinal> >
00340     comm = spmdSpc.getComm();
00341 #ifdef TEUCHOS_DEBUG
00342   const VectorSpaceBase<Scalar>
00343     &Y_range = *Y->range(),
00344     &X_range = *X.range();
00345 //  std::cout << "SpmdMultiVectorBase<Scalar>::apply(...): comm = " << comm << std::endl;
00346   TEUCHOS_TEST_FOR_EXCEPTION(
00347     ( globalDim_ > localSubDim_ ) && comm.get()==NULL, std::logic_error
00348     ,"SpmdMultiVectorBase<Scalar>::apply(...MultiVectorBase<Scalar>...): Error!"
00349     );
00350   // ToDo: Write a good general validation function that I can call that will replace
00351   // all of these TEUCHOS_TEST_FOR_EXCEPTION(...) uses
00352 
00353   TEUCHOS_TEST_FOR_EXCEPTION(
00354     real_trans(M_trans)==NOTRANS && !spmdSpc.isCompatible(Y_range), Exceptions::IncompatibleVectorSpaces
00355     ,"SpmdMultiVectorBase<Scalar>::apply(...MultiVectorBase<Scalar>...): Error!"
00356     );
00357   TEUCHOS_TEST_FOR_EXCEPTION(
00358     real_trans(M_trans)==TRANS && !spmdSpc.isCompatible(X_range), Exceptions::IncompatibleVectorSpaces
00359     ,"SpmdMultiVectorBase<Scalar>::apply(...MultiVectorBase<Scalar>...): Error!"
00360     );
00361 #endif
00362 
00363   //
00364   // Get explicit (local) views of Y, M and X
00365   //
00366 
00367 #ifdef THYRA_SPMD_MULTI_VECTOR_BASE_PRINT_TIMES
00368   timer.start();
00369 #endif
00370  
00371   DetachedMultiVectorView<Scalar>
00372     Y_local(
00373       *Y,
00374       real_trans(M_trans)==NOTRANS ? Range1D(localOffset_,localOffset_+localSubDim_-1) : Range1D(),
00375       Range1D()
00376       );
00377   ConstDetachedMultiVectorView<Scalar>
00378     M_local(
00379       *this,
00380       Range1D(localOffset_,localOffset_+localSubDim_-1),
00381       Range1D()
00382       );
00383   ConstDetachedMultiVectorView<Scalar>
00384     X_local(
00385       X
00386       ,real_trans(M_trans)==NOTRANS ? Range1D() : Range1D(localOffset_,localOffset_+localSubDim_-1)
00387       ,Range1D()
00388       );
00389 #ifdef THYRA_SPMD_MULTI_VECTOR_BASE_PRINT_TIMES
00390   timer.stop();
00391   std::cout << "\nSpmdMultiVectorBase<Scalar>::apply(...): Time for getting view = " << timer.totalElapsedTime() << " seconds\n";
00392 #endif
00393 #ifdef TEUCHOS_DEBUG    
00394   TEUCHOS_TEST_FOR_EXCEPTION(
00395     real_trans(M_trans)==NOTRANS && ( M_local.numSubCols() != X_local.subDim() || X_local.numSubCols() != Y_local.numSubCols() )
00396     , Exceptions::IncompatibleVectorSpaces
00397     ,"SpmdMultiVectorBase<Scalar>::apply(...MultiVectorBase<Scalar>...): Error!"
00398     );
00399   TEUCHOS_TEST_FOR_EXCEPTION(
00400     real_trans(M_trans)==TRANS && ( M_local.subDim() != X_local.subDim() || X_local.numSubCols() != Y_local.numSubCols() )
00401     , Exceptions::IncompatibleVectorSpaces
00402     ,"SpmdMultiVectorBase<Scalar>::apply(...MultiVectorBase<Scalar>...): Error!"
00403     );
00404 #endif
00405 
00406   //
00407   // If nonlocal (i.e. M_trans==TRANS) then create temporary storage
00408   // for:
00409   //
00410   // Y_local_tmp = alpha * M(local) * X(local) : on nonroot processes
00411   //
00412   // or
00413   //
00414   // Y_local_tmp = beta*Y_local + alpha * M(local) * X(local) : on root process (localOffset_==0)
00415   // 
00416   // and set
00417   //
00418   // localBeta = ( localOffset_ == 0 ? beta : 0.0 )
00419   //
00420   // Above, we choose localBeta such that we will only perform
00421   // Y_local = beta * Y_local + ... on one process (the root
00422   // process where localOffset_==0x). Then, when we add up Y_local
00423   // on all of the processors and we will get the correct result.
00424   //
00425   // If strictly local (i.e. M_trans == NOTRANS) then set:
00426   //
00427   // Y_local_tmp = Y_local
00428   // localBeta = beta
00429   //
00430 
00431 #ifdef THYRA_SPMD_MULTI_VECTOR_BASE_PRINT_TIMES
00432   timer.start();
00433 #endif
00434  
00435   Workspace<Scalar> Y_local_tmp_store(wss, Y_local.subDim()*Y_local.numSubCols(), false);
00436   RTOpPack::SubMultiVectorView<Scalar> Y_local_tmp;
00437   Scalar localBeta;
00438   if( real_trans(M_trans) == TRANS && globalDim_ > localSubDim_ ) {
00439     // Nonlocal
00440     Y_local_tmp.initialize(
00441       0, Y_local.subDim(),
00442       0, Y_local.numSubCols(),
00443       Teuchos::arcpFromArrayView(Y_local_tmp_store()),
00444       Y_local.subDim() // leadingDim == subDim (columns are adjacent)
00445       );
00446     if( localOffset_ == 0 ) {
00447       // Root process: Must copy Y_local into Y_local_tmp
00448       for( int j = 0; j < Y_local.numSubCols(); ++j ) {
00449         Scalar *Y_local_j = Y_local.values() + Y_local.leadingDim()*j;
00450         std::copy( Y_local_j, Y_local_j + Y_local.subDim(), Y_local_tmp.values() + Y_local_tmp.leadingDim()*j );
00451       }
00452       localBeta = beta;
00453     }
00454     else {
00455       // Not the root process
00456       localBeta = 0.0;
00457     }
00458   }
00459   else {
00460     // Local
00461     Y_local_tmp = Y_local.smv(); // Shallow copy only!
00462     localBeta = beta;
00463   }
00464 
00465 #ifdef THYRA_SPMD_MULTI_VECTOR_BASE_PRINT_TIMES
00466   timer.stop();
00467   std::cout << "\nSpmdMultiVectorBase<Scalar>::apply(...): Time for setting up Y_local_tmp and localBeta = " << timer.totalElapsedTime() << " seconds\n";
00468 #endif
00469  
00470   //
00471   // Perform the local multiplication:
00472   //
00473   // Y(local) = localBeta * Y(local) + alpha * op(M(local)) * X(local)
00474   //
00475   // or in BLAS lingo:
00476   //
00477   // C = beta * C + alpha * op(A) * op(B)
00478   //
00479 
00480 #ifdef THYRA_SPMD_MULTI_VECTOR_BASE_PRINT_TIMES
00481   timer.start();
00482 #endif
00483   Teuchos::ETransp t_transp;
00484   if(ST::isComplex) {
00485     switch(M_trans) {
00486       case NOTRANS: t_transp = Teuchos::NO_TRANS; break;
00487       case TRANS: t_transp = Teuchos::TRANS; break;
00488       case CONJTRANS: t_transp = Teuchos::CONJ_TRANS; break;
00489       default: TEUCHOS_TEST_FOR_EXCEPT(true);
00490     }
00491   }
00492   else {
00493     switch(real_trans(M_trans)) {
00494       case NOTRANS: t_transp = Teuchos::NO_TRANS; break;
00495       case TRANS: t_transp = Teuchos::TRANS; break;
00496       default: TEUCHOS_TEST_FOR_EXCEPT(true);
00497     }
00498   }
00499   if (M_local.numSubCols() > 0) {
00500     // AGS: Added std::max on ld? below, following what is done in
00501     // Epetra_MultiVector Multiply use of GEMM. Allows for 0 length.
00502     blas_.GEMM(
00503       t_transp // TRANSA
00504       ,Teuchos::NO_TRANS // TRANSB
00505       ,Y_local.subDim() // M
00506       ,Y_local.numSubCols() // N
00507       ,real_trans(M_trans)==NOTRANS ? M_local.numSubCols() : M_local.subDim() // K
00508       ,alpha // ALPHA
00509       ,const_cast<Scalar*>(M_local.values()) // A
00510       ,std::max((int) M_local.leadingDim(),1) // LDA
00511       ,const_cast<Scalar*>(X_local.values()) // B
00512       ,std::max((int) X_local.leadingDim(),1) // LDB
00513       ,localBeta // BETA
00514       ,Y_local_tmp.values().get() // C
00515       ,std::max((int) Y_local_tmp.leadingDim(),1) // LDC
00516       );
00517   }
00518   else {
00519     std::fill( Y_local_tmp.values().begin(), Y_local_tmp.values().end(),
00520       ST::zero() );
00521   }
00522 #ifdef THYRA_SPMD_MULTI_VECTOR_BASE_PRINT_TIMES
00523   timer.stop();
00524   std::cout
00525     << "\nSpmdMultiVectorBase<Scalar>::apply(...): Time for GEMM = "
00526     << timer.totalElapsedTime() << " seconds\n";
00527 #endif
00528 
00529   if( comm.get() ) {
00530  
00531     //
00532     // Perform the global reduction of Y_local_tmp back into Y_local
00533     //
00534  
00535     if( real_trans(M_trans)==TRANS && globalDim_ > localSubDim_ ) {
00536       // Contiguous buffer for final reduction
00537       Workspace<Scalar> Y_local_final_buff(wss,Y_local.subDim()*Y_local.numSubCols(),false);
00538       // Perform the reduction
00539       Teuchos::reduceAll<Ordinal,Scalar>(
00540         *comm,Teuchos::REDUCE_SUM,Y_local_final_buff.size(),Y_local_tmp.values().get(),
00541         &Y_local_final_buff[0]
00542         );
00543       // Load Y_local_final_buff back into Y_local
00544       const Scalar *Y_local_final_buff_ptr = &Y_local_final_buff[0];
00545       for( int j = 0; j < Y_local.numSubCols(); ++j ) {
00546         Scalar *Y_local_ptr = Y_local.values() + Y_local.leadingDim()*j;
00547         for( int i = 0; i < Y_local.subDim(); ++i ) {
00548           (*Y_local_ptr++) = (*Y_local_final_buff_ptr++);
00549         }
00550       }
00551     }
00552   }
00553   else {
00554 
00555     // When you get here the view Y_local will be committed back to Y
00556     // in the destructor to Y_local
00557 
00558   }
00559 
00560 #ifdef THYRA_SPMD_MULTI_VECTOR_BASE_PRINT_TIMES
00561   timer.stop();
00562   std::cout 
00563     << "\nSpmdMultiVectorBase<Scalar>::apply(...): Total time = "
00564     << timerTotal.totalElapsedTime() << " seconds\n";
00565 #endif
00566 
00567 }
00568 
00569 
00570 // Protected functions for subclasses to call
00571 
00572 
00573 template<class Scalar>
00574 void SpmdMultiVectorBase<Scalar>::updateSpmdSpace()
00575 {
00576   if(globalDim_ == 0) {
00577     const SpmdVectorSpaceBase<Scalar> *l_spmdSpace = this->spmdSpace().get();
00578     if(l_spmdSpace) {
00579       globalDim_ = l_spmdSpace->dim();
00580       localOffset_ = l_spmdSpace->localOffset();
00581       localSubDim_ = l_spmdSpace->localSubDim();
00582       numCols_ = this->domain()->dim();
00583     }
00584     else {
00585       globalDim_ = 0;
00586       localOffset_ = -1;
00587       localSubDim_ = 0;
00588       numCols_ = 0;
00589     }
00590   }
00591 }
00592 
00593 
00594 template<class Scalar>
00595 Range1D SpmdMultiVectorBase<Scalar>::validateRowRange( const Range1D &rowRng_in ) const
00596 {
00597   const Range1D rowRng = Teuchos::full_range(rowRng_in,0,globalDim_-1);
00598 #ifdef TEUCHOS_DEBUG
00599   TEUCHOS_TEST_FOR_EXCEPTION(
00600     !( 0 <= rowRng.lbound() && rowRng.ubound() < globalDim_ ), std::invalid_argument
00601     ,"SpmdMultiVectorBase<Scalar>::validateRowRange(rowRng): Error, the range rowRng = ["
00602     <<rowRng.lbound()<<","<<rowRng.ubound()<<"] is not "
00603     "in the range [0,"<<(globalDim_-1)<<"]!"
00604     );
00605 #endif
00606   return rowRng;
00607 }
00608 
00609 
00610 template<class Scalar>
00611 Range1D SpmdMultiVectorBase<Scalar>::validateColRange( const Range1D &colRng_in ) const
00612 {
00613   const Range1D colRng = Teuchos::full_range(colRng_in,0,numCols_-1);
00614 #ifdef TEUCHOS_DEBUG
00615   TEUCHOS_TEST_FOR_EXCEPTION(
00616     !(0 <= colRng.lbound() && colRng.ubound() < numCols_), std::invalid_argument
00617     ,"SpmdMultiVectorBase<Scalar>::validateColRange(colRng): Error, the range colRng = ["
00618     <<colRng.lbound()<<","<<colRng.ubound()<<"] is not "
00619     "in the range [0,"<<(numCols_-1)<<"]!"
00620     );
00621 #endif
00622   return colRng;
00623 }
00624 
00625 
00626 } // end namespace Thyra
00627 
00628 
00629 #endif // THYRA_SPMD_MULTI_VECTOR_BASE_DEF_HPP
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines