Tpetra Matrix/Vector Services Version of the Day
Tpetra_MultiVector_def.hpp
00001 // @HEADER
00002 // ***********************************************************************
00003 //
00004 //          Tpetra: Templated Linear Algebra Services Package
00005 //                 Copyright (2008) Sandia Corporation
00006 //
00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00008 // the U.S. Government retains certain rights in this software.
00009 //
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00038 //
00039 // ************************************************************************
00040 // @HEADER
00041 
00042 #ifndef TPETRA_MULTIVECTOR_DEF_HPP
00043 #define TPETRA_MULTIVECTOR_DEF_HPP
00044 
00045 #include <Kokkos_DefaultArithmetic.hpp>
00046 #include <Kokkos_NodeTrace.hpp>
00047 #include <Teuchos_Assert.hpp>
00048 #include <Teuchos_as.hpp>
00049 #include <Tpetra_Util.hpp>
00050 #include <Tpetra_Vector.hpp>
00051 
00052 #if TPETRA_USE_KOKKOS_DISTOBJECT
00053 #include <Tpetra_Details_MultiVectorDistObjectKernels.hpp>
00054 #endif
00055 
00056 #ifdef DOXYGEN_USE_ONLY
00057   #include "Tpetra_MultiVector_decl.hpp"
00058 #endif
00059 
00060 namespace Tpetra {
00061 
00062   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00063   bool
00064   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00065   vectorIndexOutOfRange(size_t VectorIndex) const {
00066     return (VectorIndex < 1 && VectorIndex != 0) || VectorIndex >= getNumVectors();
00067   }
00068 
00069   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00070   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00071   MultiVector (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> >& map,
00072                size_t NumVectors,
00073                bool zeroOut) : /* default is true */
00074     DO (map),
00075     lclMV_ (map->getNode ())
00076   {
00077     using Teuchos::ArrayRCP;
00078     using Teuchos::RCP;
00079 
00080     TEUCHOS_TEST_FOR_EXCEPTION(NumVectors < 1, std::invalid_argument,
00081       "Tpetra::MultiVector::MultiVector(): NumVectors must be strictly positive.");
00082     const size_t myLen = getLocalLength();
00083     if (myLen > 0) {
00084       RCP<Node> node = map->getNode();
00085       // On host-type Kokkos Nodes, allocBuffer() just calls the
00086       // one-argument version of arcp to allocate memory.  This should
00087       // not fill the memory by default, otherwise we would lose the
00088       // first-touch allocation optimization.
00089       ArrayRCP<Scalar> data = node->template allocBuffer<Scalar>(myLen*NumVectors);
00090       MVT::initializeValues(lclMV_,myLen,NumVectors,data,myLen);
00091       if (zeroOut) {
00092         // MVT uses the Kokkos Node's parallel_for in this case, for
00093         // first-touch allocation (across rows).
00094         MVT::Init(lclMV_, Teuchos::ScalarTraits<Scalar>::zero());
00095       }
00096     }
00097     else {
00098       MVT::initializeValues(lclMV_,0,NumVectors,Teuchos::null,0);
00099     }
00100   }
00101 
00102   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00103   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00104   MultiVector (const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& source) :
00105     DO (source),
00106     lclMV_ (MVT::getNode (source.lclMV_))
00107   {
00108     using Teuchos::ArrayRCP;
00109     using Teuchos::RCP;
00110 
00111     // copy data from the source MultiVector into this multivector
00112     RCP<Node> node = MVT::getNode(source.lclMV_);
00113     const LocalOrdinal myLen = source.getLocalLength();
00114     const size_t numVecs = source.getNumVectors();
00115 
00116     // On host-type Kokkos Nodes, allocBuffer() just calls the
00117     // one-argument version of arcp to allocate memory.  This should
00118     // not fill the memory by default, otherwise we would lose the
00119     // first-touch allocation optimization.
00120     ArrayRCP<Scalar> data = (myLen > 0) ?
00121       node->template allocBuffer<Scalar> (myLen * numVecs) :
00122       Teuchos::null;
00123     const size_t stride = (myLen > 0) ? myLen : size_t (0);
00124 
00125     // This just sets the dimensions, pointer, and stride of lclMV_.
00126     MVT::initializeValues (lclMV_, myLen, numVecs, data, stride);
00127     // This actually copies the data.  It uses the Node's
00128     // parallel_for to copy, which should ensure first-touch
00129     // allocation on systems that support it.
00130     if (source.isConstantStride ()) {
00131       MVT::Assign (lclMV_, source.lclMV_);
00132     }
00133     else {
00134       MVT::Assign (lclMV_, source.lclMV_, source.whichVectors_);
00135     }
00136   }
00137 
00138   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00139   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00140   MultiVector (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> >& map,
00141                const KokkosClassic::MultiVector<Scalar,Node>& localMultiVector,
00142                EPrivateComputeViewConstructor /* dummy */) :
00143     DO (map),
00144     lclMV_ (localMultiVector)
00145   {
00146     const size_t localNumElts = map->getNodeNumElements ();
00147     TEUCHOS_TEST_FOR_EXCEPTION(
00148       localMultiVector.getNumRows () < localNumElts,
00149       std::invalid_argument,
00150       "Tpetra::MultiVector constructor: The Map contains " << localNumElts
00151       << " on process " << map->getComm ()->getRank () << ", but the given "
00152       "KokkosClassic::MultiVector only has " << localMultiVector.getNumRows ()
00153       << " rows.");
00154   }
00155 
00156   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00157   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00158   MultiVector (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> >& map,
00159                const Teuchos::ArrayRCP<Scalar>& view,
00160                size_t LDA,
00161                size_t NumVectors,
00162                EPrivateHostViewConstructor /* dummy */) :
00163     DO (map),
00164     lclMV_ (map->getNode ())
00165   {
00166     using Teuchos::as;
00167     using Teuchos::ArrayRCP;
00168     using Teuchos::RCP;
00169 
00170     const char tfecfFuncName[] = "MultiVector(view,LDA,NumVector)";
00171     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(NumVectors < 1, std::invalid_argument,
00172       ": NumVectors must be strictly positive, but you specified NumVectors = "
00173       << NumVectors << ".");
00174     const size_t myLen = getLocalLength();
00175     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(LDA < myLen, std::invalid_argument,
00176       ": LDA must be large enough to accomodate the local entries.");
00177     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00178       as<size_t> (view.size ()) < LDA*(NumVectors - 1) + myLen,
00179       std::invalid_argument,
00180       ": view does not contain enough data to specify the entries in this.");
00181     MVT::initializeValues (lclMV_, myLen, NumVectors, view, myLen);
00182   }
00183 
00184   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00185   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00186   MultiVector (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > &map,
00187                Teuchos::ArrayRCP<Scalar> data,
00188                size_t LDA,
00189                size_t NumVectors,
00190                EPrivateComputeViewConstructor /* dummy */) :
00191     DO (map),
00192     lclMV_ (map->getNode ())
00193   {
00194     using Teuchos::as;
00195     using Teuchos::ArrayRCP;
00196     using Teuchos::RCP;
00197 
00198     const char tfecfFuncName[] = "MultiVector(data,LDA,NumVector)";
00199     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(NumVectors < 1, std::invalid_argument,
00200       ": NumVectors must be strictly positive.");
00201     const size_t myLen = getLocalLength();
00202 #ifdef HAVE_TPETRA_DEBUG
00203     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00204       as<size_t> (data.size ()) < LDA*(NumVectors - 1) + myLen,
00205       std::runtime_error,
00206       ": data does not contain enough data to specify the entries in this.");
00207 #endif
00208     MVT::initializeValues(lclMV_,myLen,NumVectors,data,LDA);
00209   }
00210 
00211   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00212   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00213   MultiVector (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > &map,
00214                Teuchos::ArrayRCP<Scalar> data,
00215                size_t LDA,
00216                Teuchos::ArrayView<const size_t> WhichVectors,
00217                EPrivateComputeViewConstructor /* dummy */) :
00218     DO (map),
00219     lclMV_ (map->getNode ()),
00220     whichVectors_ (WhichVectors)
00221   {
00222     using Teuchos::as;
00223     using Teuchos::ArrayRCP;
00224     using Teuchos::RCP;
00225 
00226     const char tfecfFuncName[] = "MultiVector(data,LDA,WhichVectors)";
00227     const size_t myLen = getLocalLength();
00228     size_t maxVector = *std::max_element(WhichVectors.begin(), WhichVectors.end());
00229     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(LDA < myLen, std::runtime_error,
00230       ": LDA must be large enough to accomodate the local entries.");
00231 #ifdef HAVE_TPETRA_DEBUG
00232     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00233       as<size_t> (data.size ()) < LDA * maxVector + myLen, std::runtime_error,
00234       ": data does not contain enough data to specify the entries in this.");
00235 #endif
00236     if (WhichVectors.size () == 1) {
00237       // shift data so that desired vector is vector 0
00238       maxVector = 0;
00239       data += LDA * WhichVectors[0];
00240       // kill whichVectors_; we are constant stride
00241       whichVectors_.clear ();
00242     }
00243     if (myLen > 0) {
00244       MVT::initializeValues(lclMV_,myLen,maxVector+1,data,LDA);
00245     }
00246     else {
00247       MVT::initializeValues(lclMV_,0,WhichVectors.size(),Teuchos::null,0);
00248     }
00249   }
00250 
00251   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00252   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00253   MultiVector (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> >& map,
00254                const Teuchos::ArrayRCP<Scalar>& data,
00255                const size_t LDA,
00256                const size_t numVecs) :
00257     DO (map),
00258     lclMV_ (map->getNode ())
00259   {
00260     const char tfecfFuncName[] = "MultiVector(map,data,LDA,numVecs)";
00261     const size_t numRows = this->getLocalLength ();
00262     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(LDA < numRows, std::runtime_error,
00263       ": LDA = " << LDA << " < numRows = " << numRows << ".");
00264     MVT::initializeValues (lclMV_, numRows, numVecs, data, LDA);
00265   }
00266 
00267   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00268   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00269   MultiVector (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > &map,
00270                const KokkosClassic::MultiVector<Scalar,Node>& localMultiVector,
00271                Teuchos::ArrayView<const size_t> WhichVectors,
00272                EPrivateComputeViewConstructor /* dummy */) :
00273     DO (map),
00274     lclMV_ (localMultiVector),
00275     whichVectors_ (WhichVectors)
00276   {
00277     const size_t localNumElts = map->getNodeNumElements ();
00278     TEUCHOS_TEST_FOR_EXCEPTION(
00279       localMultiVector.getNumRows () < localNumElts,
00280       std::invalid_argument,
00281       "Tpetra::MultiVector constructor: The Map contains " << localNumElts
00282       << " on process " << map->getComm ()->getRank () << ", but the given "
00283       "KokkosClassic::MultiVector only has " << localMultiVector.getNumRows ()
00284       << " rows.");
00285     TEUCHOS_TEST_FOR_EXCEPTION(
00286       WhichVectors.size () == 0,
00287       std::invalid_argument,
00288       "Tpetra::MultiVector constructor: WhichVectors.size() == 0.  "
00289       "The noncontiguous column view constructor only makes sense for a "
00290       "nonzero number of columns to view.");
00291 
00292     // Optimization for the case of a single column: just make a
00293     // contiguous ("constant stride") view of the one column.
00294     if (WhichVectors.size () == 1) {
00295       const size_t offsetRow = 0;
00296       const size_t offsetCol = WhichVectors[0];
00297       lclMV_ = lclMV_.offsetViewNonConst (localNumElts, 1, offsetRow, offsetCol);
00298       whichVectors_.clear (); // This being empty defines "constant stride."
00299     }
00300   }
00301 
00302 
00303   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00304   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00305   MultiVector (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> >& map,
00306                const Teuchos::ArrayView<const Scalar>& A,
00307                size_t LDA,
00308                size_t NumVectors) :
00309     DO (map),
00310     lclMV_ (map->getNode ())
00311   {
00312     using Teuchos::as;
00313     using Teuchos::ArrayRCP;
00314     using Teuchos::ArrayView;
00315     using Teuchos::RCP;
00316 
00317     const char tfecfFuncName[] = "MultiVector(A,LDA)";
00318     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(NumVectors < 1, std::invalid_argument,
00319       ": NumVectors must be strictly positive.");
00320     const size_t myLen = getLocalLength();
00321     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(LDA < myLen, std::runtime_error,
00322       ": LDA must be large enough to accomodate the local entries.");
00323 #ifdef HAVE_TPETRA_DEBUG
00324     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00325       as<size_t> (A.size ()) < LDA*(NumVectors - 1) + myLen,
00326       std::runtime_error,
00327       ": A does not contain enough data to specify the entries in this.");
00328 #endif
00329     if (myLen > 0) {
00330       RCP<Node> node = MVT::getNode(lclMV_);
00331       ArrayRCP<Scalar> mydata = node->template allocBuffer<Scalar>(myLen*NumVectors);
00332       MVT::initializeValues(lclMV_,myLen,NumVectors,mydata,myLen);
00333       // FIXME (mfh 13 Sep 2012) It would be better to use the Kokkos
00334       // Node's copyToBuffer method to push data directly to the
00335       // device pointer, rather than using an intermediate host
00336       // buffer.  Also, we should have an optimization for the
00337       // contiguous storage case (constant stride, and the stride
00338       // equals the local number of rows).
00339       KOKKOS_NODE_TRACE("MultiVector::MultiVector(1D)")
00340       ArrayRCP<Scalar> myview = node->template viewBufferNonConst<Scalar>(KokkosClassic::WriteOnly,myLen*NumVectors,mydata);
00341       typename ArrayView<const Scalar>::iterator srcit = A.begin();
00342       for (size_t j = 0; j < NumVectors; ++j) {
00343         std::copy(srcit,srcit+myLen,myview);
00344         srcit += LDA;
00345         myview += myLen;
00346       }
00347       mydata = Teuchos::null;
00348       myview = Teuchos::null;
00349     }
00350     else {
00351       MVT::initializeValues(lclMV_,0,NumVectors,Teuchos::null,0);
00352     }
00353   }
00354 
00355   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00356   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00357   MultiVector (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> >& map,
00358                const Teuchos::ArrayView<const ArrayView<const Scalar> >& ArrayOfPtrs,
00359                size_t NumVectors) :
00360     DO (map),
00361     lclMV_ (map->getNode ())
00362   {
00363     using Teuchos::as;
00364     using Teuchos::ArrayRCP;
00365     using Teuchos::ArrayView;
00366     using Teuchos::RCP;
00367 
00368     const char tfecfFuncName[] = "MultiVector(ArrayOfPtrs)";
00369     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00370       NumVectors < 1 || NumVectors != as<size_t>(ArrayOfPtrs.size()),
00371       std::runtime_error,
00372       ": ArrayOfPtrs.size() must be strictly positive and as large as ArrayOfPtrs.");
00373     const size_t myLen = getLocalLength();
00374     if (myLen > 0) {
00375       RCP<Node> node = MVT::getNode(lclMV_);
00376       ArrayRCP<Scalar> mydata = node->template allocBuffer<Scalar>(myLen*NumVectors);
00377       MVT::initializeValues(lclMV_,myLen,NumVectors,mydata,myLen);
00378       KOKKOS_NODE_TRACE("MultiVector::MultiVector(2D)")
00379       ArrayRCP<Scalar> myview = node->template viewBufferNonConst<Scalar>(KokkosClassic::WriteOnly,myLen*NumVectors,mydata);
00380       for (size_t j = 0; j < NumVectors; ++j) {
00381 #ifdef HAVE_TPETRA_DEBUG
00382         TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00383           as<size_t>(ArrayOfPtrs[j].size()) != getLocalLength(),
00384           std::runtime_error,
00385           ": ArrayOfPtrs[" << j << "].size() (== " << ArrayOfPtrs[j].size()
00386           << ") is not equal to getLocalLength() (== " << getLocalLength());
00387 #endif
00388         typename ArrayView<const Scalar>::iterator src = ArrayOfPtrs[j].begin();
00389         std::copy(src,src+myLen,myview);
00390         myview += myLen;
00391       }
00392       myview = Teuchos::null;
00393       mydata = Teuchos::null;
00394     }
00395     else {
00396       MVT::initializeValues(lclMV_,0,NumVectors,Teuchos::null,0);
00397     }
00398   }
00399 
00400   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00401   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::~MultiVector() {
00402   }
00403 
00404 
00405   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00406   bool MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::isConstantStride() const {
00407     return whichVectors_.empty();
00408   }
00409 
00410 
00411   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00412   size_t MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getLocalLength() const {
00413     return this->getMap()->getNodeNumElements();
00414   }
00415 
00416 
00417   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00418   global_size_t MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getGlobalLength() const {
00419     return this->getMap()->getGlobalNumElements();
00420   }
00421 
00422 
00423   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00424   size_t MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getStride() const {
00425     if (isConstantStride()) {
00426       return MVT::getStride(lclMV_);
00427     }
00428     return 0;
00429   }
00430 
00431 
00432   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00433   bool
00434   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00435   checkSizes (const SrcDistObject& sourceObj)
00436   {
00437     // Check whether the source object is a MultiVector.  If not, then
00438     // we can't even compare sizes, so it's definitely not OK to
00439     // Import or Export from it.
00440     typedef MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> this_type;
00441     const this_type* src = dynamic_cast<const this_type*> (&sourceObj);
00442     if (src == NULL) {
00443       return false;
00444     } else {
00445       // The target of the Import or Export calls checkSizes() in
00446       // DistObject::doTransfer().  By that point, we've already
00447       // constructed an Import or Export object using the two
00448       // multivectors' Maps, which means that (hopefully) we've
00449       // already checked other attributes of the multivectors.  Thus,
00450       // all we need to do here is check the number of columns.
00451       return src->getNumVectors () == this->getNumVectors ();
00452     }
00453   }
00454 
00455   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00456   size_t
00457   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00458   constantNumberOfPackets () const {
00459     return this->getNumVectors ();
00460   }
00461 
00462 #if TPETRA_USE_KOKKOS_DISTOBJECT
00463 
00464   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00465   void
00466   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00467   copyAndPermute (
00468     const SrcDistObject& sourceObj,
00469     size_t numSameIDs,
00470     const Kokkos::View<const LocalOrdinal*, device_type> &permuteToLIDs,
00471     const Kokkos::View<const LocalOrdinal*, device_type> &permuteFromLIDs)
00472   {
00473     using Teuchos::ArrayRCP;
00474     using Teuchos::ArrayView;
00475     using Teuchos::RCP;
00476     using Kokkos::Compat::getKokkosViewDeepCopy;
00477     typedef MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> MV;
00478     typedef typename ArrayView<const LocalOrdinal>::size_type size_type;
00479     const char tfecfFuncName[] = "copyAndPermute";
00480 
00481     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00482       permuteToLIDs.size() != permuteFromLIDs.size(), std::runtime_error,
00483       ": permuteToLIDs and permuteFromLIDs must have the same size."
00484       << std::endl << "permuteToLIDs.size() = " << permuteToLIDs.size ()
00485       << " != permuteFromLIDs.size() = " << permuteFromLIDs.size () << ".");
00486 
00487     // We've already called checkSizes(), so this cast must succeed.
00488     const MV& sourceMV = dynamic_cast<const MV&> (sourceObj);
00489 
00490     const size_t numCols = this->getNumVectors ();
00491     const size_t stride = MVT::getStride (lclMV_);
00492 
00493     // Copy rows [0, numSameIDs-1] of the local multivectors.
00494     //
00495     // For GPU Nodes: All of this happens using device pointers; this
00496     // does not require host views of either source or destination.
00497     if (isConstantStride ()) {
00498       const size_t numSrcCols = MVT::getNumCols (sourceMV.lclMV_);
00499       const size_t numDestCols = MVT::getNumCols (lclMV_);
00500 
00501       const KMV src = sourceMV.lclMV_.offsetView (numSameIDs, numSrcCols, 0, 0);
00502       KMV dest = lclMV_.offsetViewNonConst (numSameIDs, numDestCols, 0, 0);
00503       if (sourceMV.isConstantStride ()) {
00504         MVT::Assign (dest, src);
00505       }
00506       else {
00507         MVT::Assign (dest, src, sourceMV.whichVectors_ ());
00508       }
00509     }
00510     else {
00511       // Copy the columns one at a time, since MVT doesn't have an
00512       // Assign method for noncontiguous access to the destination
00513       // multivector.
00514       for (size_t j = 0; j < numCols; ++j) {
00515         const size_t destCol = isConstantStride () ? j : whichVectors_[j];
00516         const size_t srcCol = sourceMV.isConstantStride () ?
00517           j : sourceMV.whichVectors_[j];
00518         KMV dest_j = lclMV_.offsetViewNonConst (numSameIDs, 1, 0, destCol);
00519         const KMV src_j = sourceMV.lclMV_.offsetView (numSameIDs, 1, 0, srcCol);
00520         MVT::Assign (dest_j, src_j); // Copy src_j into dest_j
00521       }
00522     }
00523 
00524     // For the remaining GIDs, execute the permutations.  This may
00525     // involve noncontiguous access of both source and destination
00526     // vectors, depending on the LID lists.
00527     //
00528     // FIXME (mfh 20 June 2012) For an Export with duplicate GIDs on
00529     // the same process, this merges their values by replacement of
00530     // the last encountered GID, not by the specified merge rule
00531     // (such as ADD).
00532 
00533     // If there are no permutations, we are done
00534     if (permuteFromLIDs.size() == 0 || permuteToLIDs.size() == 0)
00535       return;
00536 
00537     if (this->isConstantStride ()) {
00538       Details::PermuteArrayMultiColumnConstantStride<Scalar,LocalOrdinal,device_type> perm;
00539       perm.permuteToLIDs = permuteToLIDs;
00540       perm.permuteFromLIDs = permuteFromLIDs;
00541       perm.src = sourceMV.getKokkosView();
00542       perm.dest = getKokkosViewNonConst();
00543       perm.src_stride = MVT::getStride (sourceMV.lclMV_);
00544       perm.dest_stride = stride;
00545       perm.numCols = numCols;
00546 
00547       perm.permute();
00548     }
00549     else {
00550       Details::PermuteArrayMultiColumnVariableStride<Scalar,LocalOrdinal,device_type> perm;
00551       perm.permuteToLIDs = permuteToLIDs;
00552       perm.permuteFromLIDs = permuteFromLIDs;
00553       perm.src = sourceMV.getKokkosView();
00554       perm.dest = getKokkosViewNonConst();
00555       perm.src_whichVectors =
00556         getKokkosViewDeepCopy<device_type>(sourceMV.whichVectors_ ());
00557       perm.dest_whichVectors =
00558         getKokkosViewDeepCopy<device_type>(whichVectors_ ());
00559       perm.src_stride = MVT::getStride (sourceMV.lclMV_);
00560       perm.dest_stride = stride;
00561       perm.numCols = numCols;
00562 
00563       perm.permute();
00564     }
00565   }
00566 
00567   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00568   void
00569   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00570   packAndPrepare (
00571     const SrcDistObject& sourceObj,
00572     const Kokkos::View<const LocalOrdinal*, device_type> &exportLIDs,
00573     Kokkos::View<Scalar*, device_type> &exports,
00574     const Kokkos::View<size_t*, device_type> &numExportPacketsPerLID,
00575     size_t& constantNumPackets,
00576     Distributor & /* distor */ )
00577   {
00578     using Teuchos::Array;
00579     using Teuchos::ArrayView;
00580     using Teuchos::as;
00581     using Kokkos::Compat::getKokkosViewDeepCopy;
00582     typedef MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> MV;
00583     typedef Array<size_t>::size_type size_type;
00584 
00585     // If we have no exports, there is nothing to do
00586     if (exportLIDs.size() == 0)
00587       return;
00588 
00589     // We've already called checkSizes(), so this cast must succeed.
00590     const MV& sourceMV = dynamic_cast<const MV&> (sourceObj);
00591 
00592     // We don't need numExportPacketsPerLID; forestall "unused
00593     // variable" compile warnings.
00594     (void) numExportPacketsPerLID;
00595 
00596     /* The layout in the export for MultiVectors is as follows:
00597        exports = { all of the data from row exportLIDs.front() ;
00598                    ....
00599                    all of the data from row exportLIDs.back() }
00600       This doesn't have the best locality, but is necessary because
00601       the data for a Packet (all data associated with an LID) is
00602       required to be contiguous. */
00603 
00604     const size_t numCols = sourceMV.getNumVectors ();
00605     const size_t stride = MVT::getStride (sourceMV.lclMV_);
00606     // This spares us from needing to fill numExportPacketsPerLID.
00607     // Setting constantNumPackets to a nonzero value signals that
00608     // all packets have the same number of entries.
00609     constantNumPackets = numCols;
00610 
00611     const size_t numExportLIDs = exportLIDs.size ();
00612     const size_t newExportsSize = numCols * numExportLIDs;
00613     if (exports.size () != newExportsSize) {
00614       Kokkos::Compat::realloc(exports, newExportsSize);
00615     }
00616 
00617     if (numCols == 1) { // special case for one column only
00618       // MultiVector always represents a single column with constant
00619       // stride, but it doesn't hurt to implement both cases anyway.
00620       if (sourceMV.isConstantStride ()) {
00621         Details::PackArraySingleColumnConstantStride<Scalar,LocalOrdinal,device_type> pack;
00622         pack.exportLIDs = exportLIDs;
00623         pack.exports = exports;
00624         pack.src = sourceMV.getKokkosView();
00625         pack.pack();
00626       }
00627       else {
00628         Details::PackArraySingleColumnOffset<Scalar,LocalOrdinal,device_type> pack;
00629         pack.exportLIDs = exportLIDs;
00630         pack.exports = exports;
00631         pack.src = sourceMV.getKokkosView();
00632         pack.offset = sourceMV.whichVectors_[0] * stride;
00633         pack.pack();
00634       }
00635     }
00636     else { // the source MultiVector has multiple columns
00637       if (sourceMV.isConstantStride ()) {
00638         Details::PackArrayMultiColumnConstantStride<Scalar,LocalOrdinal,device_type> pack;
00639         pack.exportLIDs = exportLIDs;
00640         pack.exports = exports;
00641         pack.src = sourceMV.getKokkosView();
00642         pack.stride = stride;
00643         pack.numCols = numCols;
00644         pack.pack ();
00645       }
00646       else {
00647         Details::PackArrayMultiColumnVariableStride<Scalar,LocalOrdinal,device_type> pack;
00648         pack.exportLIDs = exportLIDs;
00649         pack.exports = exports;
00650         pack.src = sourceMV.getKokkosView();
00651         pack.srcWhichVectors =
00652           getKokkosViewDeepCopy<device_type>(sourceMV.whichVectors_ ());
00653         pack.stride = stride;
00654         pack.numCols = numCols;
00655         pack.pack ();
00656       }
00657     }
00658   }
00659 
00660 
00661   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00662   void
00663   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00664   unpackAndCombine (
00665     const Kokkos::View<const LocalOrdinal*, device_type> &importLIDs,
00666     const Kokkos::View<const Scalar*, device_type> &imports,
00667     const Kokkos::View<size_t*, device_type> &numPacketsPerLID,
00668     size_t constantNumPackets,
00669     Distributor & /* distor */,
00670     CombineMode CM)
00671   {
00672     using Teuchos::ArrayView;
00673     using Teuchos::as;
00674     using Kokkos::Compat::getKokkosViewDeepCopy;
00675     typedef Teuchos::ScalarTraits<Scalar> SCT;
00676     typedef typename ArrayView<const LocalOrdinal>::size_type size_type;
00677     const char tfecfFuncName[] = "unpackAndCombine";
00678 
00679     // If we have no imports, there is nothing to do
00680     if (importLIDs.size() == 0)
00681       return;
00682 
00683     // We don't need numPacketsPerLID; forestall "unused variable"
00684     // compile warnings.
00685     (void) numPacketsPerLID;
00686 
00687     /* The layout in the export for MultiVectors is as follows:
00688        imports = { all of the data from row exportLIDs.front() ;
00689                    ....
00690                    all of the data from row exportLIDs.back() }
00691       This doesn't have the best locality, but is necessary because
00692       the data for a Packet (all data associated with an LID) is
00693       required to be contiguous. */
00694 
00695     const size_t numVecs = getNumVectors ();
00696 
00697 #ifdef HAVE_TPETRA_DEBUG
00698     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00699       as<size_t> (imports.size()) != getNumVectors()*importLIDs.size(),
00700       std::runtime_error,
00701       ": 'imports' buffer size must be consistent with the amount of data to "
00702       "be sent.  " << std::endl << "imports.size() = " << imports.size()
00703       << " != getNumVectors()*importLIDs.size() = " << getNumVectors() << "*"
00704       << importLIDs.size() << " = " << getNumVectors() * importLIDs.size()
00705       << ".");
00706 
00707     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00708       as<size_t> (constantNumPackets) == as<size_t> (0), std::runtime_error,
00709       ": constantNumPackets input argument must be nonzero.");
00710 
00711     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00712       as<size_t> (numVecs) != as<size_t> (constantNumPackets),
00713       std::runtime_error, ": constantNumPackets must equal numVecs.");
00714 #endif // HAVE_TPETRA_DEBUG
00715 
00716     if (numVecs > 0 && importLIDs.size () > 0) {
00717       const size_t myStride = MVT::getStride (lclMV_);
00718 
00719       // NOTE (mfh 10 Mar 2012) If you want to implement custom
00720       // combine modes, start editing here.  Also, if you trust
00721       // inlining, it would be nice to condense this code by using a
00722       // binary function object f:
00723       //
00724       // ncview_[...] = f (ncview_[...], *impptr++);
00725       if (CM == INSERT || CM == REPLACE) {
00726         if (isConstantStride()) {
00727           Details::UnpackArrayMultiColumnConstantStride<Scalar,LocalOrdinal,Details::InsertOp,device_type> unpack;
00728           unpack.importLIDs = importLIDs;
00729           unpack.imports = imports;
00730           unpack.dest = getKokkosViewNonConst();
00731           unpack.stride = myStride;
00732           unpack.numCols = numVecs;
00733           unpack.op = Details::InsertOp();
00734 
00735           unpack.unpack();
00736         }
00737         else {
00738           Details::UnpackArrayMultiColumnVariableStride<Scalar,LocalOrdinal,Details::InsertOp,device_type> unpack;
00739           unpack.importLIDs = importLIDs;
00740           unpack.imports = imports;
00741           unpack.dest = getKokkosViewNonConst();
00742           unpack.whichVectors =
00743             getKokkosViewDeepCopy<device_type>(whichVectors_ ());
00744           unpack.stride = myStride;
00745           unpack.numCols = numVecs;
00746           unpack.op = Details::InsertOp();
00747 
00748           unpack.unpack();
00749         }
00750       }
00751       else if (CM == ADD) {
00752         if (isConstantStride()) {
00753           Details::UnpackArrayMultiColumnConstantStride<Scalar,LocalOrdinal,Details::AddOp,device_type> unpack;
00754           unpack.importLIDs = importLIDs;
00755           unpack.imports = imports;
00756           unpack.dest = getKokkosViewNonConst();
00757           unpack.stride = myStride;
00758           unpack.numCols = numVecs;
00759           unpack.op = Details::AddOp();
00760 
00761           unpack.unpack();
00762         }
00763         else {
00764           Details::UnpackArrayMultiColumnVariableStride<Scalar,LocalOrdinal,Details::AddOp,device_type> unpack;
00765           unpack.importLIDs = importLIDs;
00766           unpack.imports = imports;
00767           unpack.dest = getKokkosViewNonConst();
00768           unpack.whichVectors =
00769             getKokkosViewDeepCopy<device_type>(whichVectors_ ());
00770           unpack.stride = myStride;
00771           unpack.numCols = numVecs;
00772           unpack.op = Details::AddOp();
00773 
00774           unpack.unpack();
00775         }
00776       }
00777       else if (CM == ABSMAX) {
00778         if (isConstantStride()) {
00779           Details::UnpackArrayMultiColumnConstantStride<Scalar,LocalOrdinal,Details::AbsMaxOp,device_type> unpack;
00780           unpack.importLIDs = importLIDs;
00781           unpack.imports = imports;
00782           unpack.dest = getKokkosViewNonConst();
00783           unpack.stride = myStride;
00784           unpack.numCols = numVecs;
00785           unpack.op = Details::AbsMaxOp();
00786 
00787           unpack.unpack();
00788         }
00789         else {
00790           Details::UnpackArrayMultiColumnVariableStride<Scalar,LocalOrdinal,Details::AbsMaxOp,device_type> unpack;
00791           unpack.importLIDs = importLIDs;
00792           unpack.imports = imports;
00793           unpack.dest = getKokkosViewNonConst();
00794           unpack.whichVectors =
00795             getKokkosViewDeepCopy<device_type>(whichVectors_ ());
00796           unpack.stride = myStride;
00797           unpack.numCols = numVecs;
00798           unpack.op = Details::AbsMaxOp();
00799 
00800           unpack.unpack();
00801         }
00802       }
00803       else {
00804         TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00805           CM != ADD && CM != REPLACE && CM != INSERT && CM != ABSMAX,
00806           std::invalid_argument, ": Invalid CombineMode: " << CM << ".  Valid "
00807           "CombineMode values are ADD, REPLACE, INSERT, and ABSMAX.");
00808       }
00809     }
00810   }
00811 
00812 #else
00813 
00814   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00815   void
00816   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00817   copyAndPermute (const SrcDistObject& sourceObj,
00818                   size_t numSameIDs,
00819                   const Teuchos::ArrayView<const LocalOrdinal> &permuteToLIDs,
00820                   const Teuchos::ArrayView<const LocalOrdinal> &permuteFromLIDs)
00821   {
00822     using Teuchos::ArrayRCP;
00823     using Teuchos::ArrayView;
00824     using Teuchos::RCP;
00825     typedef MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> MV;
00826     typedef typename ArrayView<const LocalOrdinal>::size_type size_type;
00827     const char tfecfFuncName[] = "copyAndPermute";
00828 
00829     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00830       permuteToLIDs.size() != permuteFromLIDs.size(), std::runtime_error,
00831       ": permuteToLIDs and permuteFromLIDs must have the same size."
00832       << std::endl << "permuteToLIDs.size() = " << permuteToLIDs.size ()
00833       << " != permuteFromLIDs.size() = " << permuteFromLIDs.size () << ".");
00834 
00835     // We've already called checkSizes(), so this cast must succeed.
00836     const MV& sourceMV = dynamic_cast<const MV&> (sourceObj);
00837 
00838     const size_t numCols = this->getNumVectors ();
00839 
00840     // Copy rows [0, numSameIDs-1] of the local multivectors.
00841     //
00842     // For GPU Nodes: All of this happens using device pointers; this
00843     // does not require host views of either source or destination.
00844     //
00845     // mfh 10 Jan 2013: In the current implementation, device data has
00846     // already been copied to the host views.  If we do a
00847     // device-device copy here, it won't synch the host views
00848     // correctly.  Thus, we replace this step with a host copy, if
00849     // Node is a GPU Node.  That ensures correctness for now, though
00850     // it's really something we need to fix if we plan to run
00851     // efficiently on GPUs.
00852     if (Node::isHostNode) {
00853       if (isConstantStride ()) {
00854         const size_t numSrcCols = MVT::getNumCols (sourceMV.lclMV_);
00855         const size_t numDestCols = MVT::getNumCols (lclMV_);
00856 
00857         const KMV src = sourceMV.lclMV_.offsetView (numSameIDs, numSrcCols, 0, 0);
00858         KMV dest = lclMV_.offsetViewNonConst (numSameIDs, numDestCols, 0, 0);
00859         if (sourceMV.isConstantStride ()) {
00860           MVT::Assign (dest, src);
00861         }
00862         else {
00863           MVT::Assign (dest, src, sourceMV.whichVectors_ ());
00864         }
00865       }
00866       else {
00867         // Copy the columns one at a time, since MVT doesn't have an
00868         // Assign method for noncontiguous access to the destination
00869         // multivector.
00870         for (size_t j = 0; j < numCols; ++j) {
00871           const size_t destCol = isConstantStride () ? j : whichVectors_[j];
00872           const size_t srcCol = sourceMV.isConstantStride () ?
00873             j : sourceMV.whichVectors_[j];
00874           KMV dest_j = lclMV_.offsetViewNonConst (numSameIDs, 1, 0, destCol);
00875           const KMV src_j = sourceMV.lclMV_.offsetView (numSameIDs, 1, 0, srcCol);
00876           MVT::Assign (dest_j, src_j); // Copy src_j into dest_j
00877         }
00878       }
00879     }
00880 
00881     // Copy the vectors one at a time.  This works whether or not the
00882     // multivectors have constant stride.
00883     for (size_t j = 0; j < numCols; ++j) {
00884       // Get host views of the current source and destination column.
00885       //
00886       // FIXME (mfh 10 Mar 2012) Copying should really be done on the
00887       // device.  There's no need to bring everything back to the
00888       // host, copy there, and then copy back.
00889       ArrayView<const Scalar> srcView =
00890         sourceMV.getSubArrayRCP (sourceMV.cview_, j) ();
00891       ArrayView<Scalar> dstView = getSubArrayRCP (ncview_, j) ();
00892 
00893       if (! Node::isHostNode) {
00894         // The first numSameIDs IDs are the same between source and
00895         // target, so we can just copy the data.  (This favors faster
00896         // contiguous access whenever we can do it.)
00897         std::copy (srcView.begin (), srcView.begin () + numSameIDs,
00898                    dstView.begin ());
00899       }
00900 
00901       // For the remaining GIDs, execute the permutations.  This may
00902       // involve noncontiguous access of both source and destination
00903       // vectors, depending on the LID lists.
00904       //
00905       // FIXME (mfh 09 Apr 2012) Permutation should be done on the
00906       // device, not on the host.
00907       //
00908       // FIXME (mfh 20 June 2012) For an Export with duplicate GIDs on
00909       // the same process, this merges their values by replacement of
00910       // the last encountered GID, not by the specified merge rule
00911       // (such as ADD).
00912       const size_type numPermuteLIDs =
00913         std::min (permuteToLIDs.size (), permuteFromLIDs.size ());
00914       for (size_type k = 0; k < numPermuteLIDs; ++k) {
00915         dstView[permuteToLIDs[k]] = srcView[permuteFromLIDs[k]];
00916       }
00917     }
00918   }
00919 
00920   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00921   void
00922   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00923   packAndPrepare (const SrcDistObject& sourceObj,
00924                   const ArrayView<const LocalOrdinal> &exportLIDs,
00925                   Array<Scalar> &exports,
00926                   const ArrayView<size_t> &numExportPacketsPerLID,
00927                   size_t& constantNumPackets,
00928                   Distributor & /* distor */ )
00929   {
00930     using Teuchos::Array;
00931     using Teuchos::ArrayView;
00932     using Teuchos::as;
00933     typedef MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> MV;
00934     typedef Array<size_t>::size_type size_type;
00935 
00936     // We've already called checkSizes(), so this cast must succeed.
00937     const MV& sourceMV = dynamic_cast<const MV&> (sourceObj);
00938 
00939     // We don't need numExportPacketsPerLID; forestall "unused
00940     // variable" compile warnings.
00941     (void) numExportPacketsPerLID;
00942 
00943     /* The layout in the export for MultiVectors is as follows:
00944        exports = { all of the data from row exportLIDs.front() ;
00945                    ....
00946                    all of the data from row exportLIDs.back() }
00947       This doesn't have the best locality, but is necessary because
00948       the data for a Packet (all data associated with an LID) is
00949       required to be contiguous. */
00950 
00951     const size_t numCols = sourceMV.getNumVectors ();
00952     const size_t stride = MVT::getStride (sourceMV.lclMV_);
00953     // This spares us from needing to fill numExportPacketsPerLID.
00954     // Setting constantNumPackets to a nonzero value signals that
00955     // all packets have the same number of entries.
00956     constantNumPackets = numCols;
00957 
00958     const size_type numExportLIDs = exportLIDs.size ();
00959     const size_type newExportsSize = numCols * numExportLIDs;
00960     if (exports.size () != newExportsSize) {
00961       exports.resize (newExportsSize);
00962     }
00963 
00964     ArrayView<const Scalar> srcView = sourceMV.cview_ ();
00965     if (numCols == 1) { // special case for one column only
00966       // MultiVector always represents a single column with constant
00967       // stride, but it doesn't hurt to implement both cases anyway.
00968       if (sourceMV.isConstantStride ()) {
00969         for (size_type k = 0; k < numExportLIDs; ++k) {
00970           exports[k] = srcView[exportLIDs[k]];
00971         }
00972       }
00973       else {
00974         const size_t offset = sourceMV.whichVectors_[0] * stride;
00975         for (size_type k = 0; k < numExportLIDs; ++k) {
00976           exports[k] = srcView[exportLIDs[k] + offset];
00977         }
00978       }
00979     }
00980     else { // the source MultiVector has multiple columns
00981       typename Array<Scalar>::iterator expptr = exports.begin ();
00982       if (sourceMV.isConstantStride ()) {
00983         for (size_type k = 0; k < numExportLIDs; ++k) {
00984           const size_t localRow = as<size_t> (exportLIDs[k]);
00985           for (size_t j = 0; j < numCols; ++j) {
00986             *expptr++ = srcView[localRow + j*stride];
00987           }
00988         }
00989       }
00990       else {
00991         ArrayView<const size_t> srcWhichVectors = sourceMV.whichVectors_ ();
00992         for (size_type k = 0; k < numExportLIDs; ++k) {
00993           const size_t localRow = as<size_t> (exportLIDs[k]);
00994           for (size_t j = 0; j < numCols; ++j) {
00995             *expptr++ = srcView[localRow + srcWhichVectors[j]*stride];
00996           }
00997         }
00998       }
00999     }
01000   }
01001 
01002 
01003   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01004   void
01005   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01006   unpackAndCombine (const ArrayView<const LocalOrdinal> &importLIDs,
01007                     const ArrayView<const Scalar> &imports,
01008                     const ArrayView<size_t> &numPacketsPerLID,
01009                     size_t constantNumPackets,
01010                     Distributor & /* distor */,
01011                     CombineMode CM)
01012   {
01013     using Teuchos::ArrayView;
01014     using Teuchos::as;
01015     typedef Teuchos::ScalarTraits<Scalar> SCT;
01016     typedef typename ArrayView<const LocalOrdinal>::size_type size_type;
01017     const char tfecfFuncName[] = "unpackAndCombine";
01018 
01019     // We don't need numPacketsPerLID; forestall "unused variable"
01020     // compile warnings.
01021     (void) numPacketsPerLID;
01022 
01023     /* The layout in the export for MultiVectors is as follows:
01024        imports = { all of the data from row exportLIDs.front() ;
01025                    ....
01026                    all of the data from row exportLIDs.back() }
01027       This doesn't have the best locality, but is necessary because
01028       the data for a Packet (all data associated with an LID) is
01029       required to be contiguous. */
01030 
01031     const size_t numVecs = getNumVectors ();
01032 
01033 #ifdef HAVE_TPETRA_DEBUG
01034     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
01035       as<size_t> (imports.size()) != getNumVectors()*importLIDs.size(),
01036       std::runtime_error,
01037       ": 'imports' buffer size must be consistent with the amount of data to "
01038       "be sent.  " << std::endl << "imports.size() = " << imports.size()
01039       << " != getNumVectors()*importLIDs.size() = " << getNumVectors() << "*"
01040       << importLIDs.size() << " = " << getNumVectors() * importLIDs.size()
01041       << ".");
01042 
01043     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
01044       as<size_t> (constantNumPackets) == as<size_t> (0), std::runtime_error,
01045       ": constantNumPackets input argument must be nonzero.");
01046 
01047     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
01048       as<size_t> (numVecs) != as<size_t> (constantNumPackets),
01049       std::runtime_error, ": constantNumPackets must equal numVecs.");
01050 #endif // HAVE_TPETRA_DEBUG
01051 
01052     if (numVecs > 0 && importLIDs.size () > 0) {
01053       typename ArrayView<const Scalar>::iterator impptr = imports.begin ();
01054       ArrayView<Scalar> destView = ncview_ ();
01055       const size_type numImportLIDs = importLIDs.size ();
01056       const size_t myStride = MVT::getStride (lclMV_);
01057 
01058       // NOTE (mfh 10 Mar 2012) If you want to implement custom
01059       // combine modes, start editing here.  Also, if you trust
01060       // inlining, it would be nice to condense this code by using a
01061       // binary function object f:
01062       //
01063       // ncview_[...] = f (ncview_[...], *impptr++);
01064       if (CM == INSERT || CM == REPLACE) {
01065         if (isConstantStride()) {
01066           for (size_type k = 0; k < numImportLIDs; ++k) {
01067             const size_t localRow = as<size_t> (importLIDs[k]);
01068             for (size_t j = 0; j < numVecs; ++j) {
01069               destView[localRow + myStride*j] = *impptr++;
01070             }
01071           }
01072         }
01073         else {
01074           for (size_type k = 0; k < numImportLIDs; ++k) {
01075             const size_t localRow = as<size_t> (importLIDs[k]);
01076             for (size_t j = 0; j < numVecs; ++j) {
01077               destView[localRow + myStride*whichVectors_[j]] = *impptr++;
01078             }
01079           }
01080         }
01081       }
01082       else if (CM == ADD) {
01083         if (isConstantStride()) {
01084           for (size_type k = 0; k < numImportLIDs; ++k) {
01085             const size_t localRow = as<size_t> (importLIDs[k]);
01086             for (size_t j = 0; j < numVecs; ++j) {
01087               destView[localRow + myStride*j] += *impptr++;
01088             }
01089           }
01090         }
01091         else {
01092           for (size_type k = 0; k < numImportLIDs; ++k) {
01093             const size_t localRow = as<size_t> (importLIDs[k]);
01094             for (size_t j = 0; j < numVecs; ++j) {
01095               destView[localRow + myStride*whichVectors_[j]] += *impptr++;
01096             }
01097           }
01098         }
01099       }
01100       else if (CM == ABSMAX) {
01101         if (isConstantStride()) {
01102           for (size_type k = 0; k < numImportLIDs; ++k) {
01103             const size_t localRow = as<size_t> (importLIDs[k]);
01104             for (size_t j = 0; j < numVecs; ++j) {
01105               Scalar &curval       = destView[localRow + myStride*j];
01106               const Scalar &newval = *impptr++;
01107               curval = std::max( SCT::magnitude(curval), SCT::magnitude(newval) );
01108             }
01109           }
01110         }
01111         else {
01112           for (size_type k = 0; k < numImportLIDs; ++k) {
01113             const size_t localRow = as<size_t> (importLIDs[k]);
01114             for (size_t j = 0; j < numVecs; ++j) {
01115               Scalar &curval       = destView[localRow + myStride*whichVectors_[j]];
01116               const Scalar &newval = *impptr++;
01117               curval = std::max( SCT::magnitude(curval), SCT::magnitude(newval) );
01118             }
01119           }
01120         }
01121       }
01122       else {
01123         TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
01124           CM != ADD && CM != REPLACE && CM != INSERT && CM != ABSMAX,
01125           std::invalid_argument, ": Invalid CombineMode: " << CM << ".  Valid "
01126           "CombineMode values are ADD, REPLACE, INSERT, and ABSMAX.");
01127       }
01128     }
01129   }
01130 
01131 #endif
01132 
01133   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01134   inline size_t MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getNumVectors() const {
01135     if (isConstantStride()) {
01136       return MVT::getNumCols(lclMV_);
01137     }
01138     else {
01139       return whichVectors_.size();
01140     }
01141   }
01142 
01143 
01144   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01145   void
01146   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01147   dot (const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &A,
01148        const Teuchos::ArrayView<Scalar> &dots) const
01149   {
01150     using Teuchos::Array;
01151     using Teuchos::ArrayRCP;
01152     using Teuchos::as;
01153     using Teuchos::arcp_const_cast;
01154 
01155     const char tfecfFuncName[] = "dot()";
01156     const size_t myLen   = getLocalLength();
01157     const size_t numVecs = getNumVectors();
01158 #ifdef HAVE_TPETRA_DEBUG
01159     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( !this->getMap()->isCompatible(*A.getMap()), std::runtime_error,
01160         ": MultiVectors do not have compatible Maps:" << std::endl
01161         << "this->getMap(): " << std::endl << *this->getMap()
01162         << "A.getMap(): " << std::endl << *A.getMap() << std::endl);
01163 #else
01164     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( getLocalLength() != A.getLocalLength(), std::runtime_error,
01165         ": MultiVectors do not have the same local length.");
01166 #endif
01167     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(A.getNumVectors() != numVecs, std::runtime_error,
01168         ": MultiVectors must have the same number of vectors.");
01169     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(Teuchos::as<size_t>(dots.size()) != numVecs, std::runtime_error,
01170         ": dots.size() must be as large as the number of vectors in *this and A.");
01171     if (isConstantStride() && A.isConstantStride()) {
01172       MVT::Dot(lclMV_,A.lclMV_,dots);
01173     }
01174     else {
01175       KMV v(MVT::getNode(lclMV_)), a(MVT::getNode(lclMV_));
01176       ArrayRCP<Scalar> vptr  = arcp_const_cast<Scalar>(MVT::getValues(lclMV_));
01177       ArrayRCP<Scalar> avptr = arcp_const_cast<Scalar>(MVT::getValues(A.lclMV_));
01178       for (size_t j=0; j < numVecs; ++j) {
01179         ArrayRCP<Scalar> vj  =   getSubArrayRCP( vptr,j);
01180         ArrayRCP<Scalar> avj = A.getSubArrayRCP(avptr,j);
01181         MVT::initializeValues(a,myLen, 1, avj, myLen);
01182         MVT::initializeValues(v,myLen, 1,  vj, myLen);
01183         dots[j] = MVT::Dot((const KMV&)v,(const KMV &)a);
01184       }
01185     }
01186     if (this->isDistributed()) {
01187       Array<Scalar> ldots(dots);
01188       Teuchos::reduceAll(*this->getMap()->getComm(),Teuchos::REDUCE_SUM,as<int>(numVecs),ldots.getRawPtr(),dots.getRawPtr());
01189     }
01190   }
01191 
01192 
01193   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01194   void
01195   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01196   norm2 (const Teuchos::ArrayView<typename Teuchos::ScalarTraits<Scalar>::magnitudeType>& norms) const
01197   {
01198     using Teuchos::arcp_const_cast;
01199     using Teuchos::Array;
01200     using Teuchos::ArrayRCP;
01201     using Teuchos::ArrayView;
01202     using Teuchos::as;
01203     using Teuchos::reduceAll;
01204     using Teuchos::REDUCE_SUM;
01205     using Teuchos::ScalarTraits;
01206     typedef typename ScalarTraits<Scalar>::magnitudeType MT;
01207     typedef ScalarTraits<MT> STM;
01208 
01209     const size_t numVecs = this->getNumVectors();
01210     TEUCHOS_TEST_FOR_EXCEPTION(as<size_t>(norms.size()) != numVecs,
01211       std::runtime_error,
01212       "Tpetra::MultiVector::norm2(norms): norms.size() must be as large as the number of vectors in *this.");
01213     if (isConstantStride ()) {
01214       MVT::Norm2Squared (lclMV_,norms);
01215     }
01216     else {
01217       KMV v (MVT::getNode (lclMV_));
01218       ArrayRCP<Scalar> vi;
01219       for (size_t i=0; i < numVecs; ++i) {
01220         vi = arcp_const_cast<Scalar> (MVT::getValues (lclMV_, whichVectors_[i]));
01221         MVT::initializeValues (v, MVT::getNumRows (lclMV_), 1, vi, MVT::getStride (lclMV_));
01222         norms[i] = MVT::Norm2Squared (v);
01223       }
01224     }
01225     if (this->isDistributed ()) {
01226       Array<MT> lnorms (norms);
01227       // FIXME (mfh 25 Apr 2012) Somebody explain to me why we're
01228       // calling Teuchos::reduceAll when MultiVector has a perfectly
01229       // good reduce() function.
01230       reduceAll (*this->getMap ()->getComm (), REDUCE_SUM, as<int> (numVecs),
01231                  lnorms.getRawPtr (), norms.getRawPtr ());
01232     }
01233     for (typename ArrayView<MT>::iterator n = norms.begin(); n != norms.begin()+numVecs; ++n) {
01234       (*n) = STM::squareroot (*n);
01235     }
01236   }
01237 
01238 
01239   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01240   void
01241   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01242   normWeighted (const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& weights,
01243                 const Teuchos::ArrayView<typename Teuchos::ScalarTraits<Scalar>::magnitudeType> &norms) const
01244   {
01245     using Teuchos::arcp_const_cast;
01246     using Teuchos::Array;
01247     using Teuchos::ArrayRCP;
01248     using Teuchos::ArrayView;
01249     using Teuchos::as;
01250     using Teuchos::reduceAll;
01251     using Teuchos::REDUCE_SUM;
01252     using Teuchos::ScalarTraits;
01253     typedef ScalarTraits<Scalar> SCT;
01254     typedef typename SCT::magnitudeType Mag;
01255 
01256     const char tfecfFuncName[] = "normWeighted()";
01257 
01258     const Mag OneOverN = ScalarTraits<Mag>::one() / getGlobalLength();
01259     bool OneW = false;
01260     const size_t numVecs = this->getNumVectors();
01261     if (weights.getNumVectors() == 1) {
01262       OneW = true;
01263     }
01264     else {
01265       TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(weights.getNumVectors() != numVecs, std::runtime_error,
01266         ": MultiVector of weights must contain either one vector or the same number of vectors as this.");
01267     }
01268 #ifdef HAVE_TPETRA_DEBUG
01269     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( !this->getMap()->isCompatible(*weights.getMap()), std::runtime_error,
01270       ": MultiVectors do not have compatible Maps:" << std::endl
01271       << "this->getMap(): " << std::endl << *this->getMap()
01272       << "weights.getMap(): " << std::endl << *weights.getMap() << std::endl);
01273 #else
01274     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( getLocalLength() != weights.getLocalLength(), std::runtime_error,
01275       ": MultiVectors do not have the same local length.");
01276 #endif
01277     const size_t myLen = getLocalLength();
01278 
01279     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(as<size_t>(norms.size()) != numVecs, std::runtime_error,
01280       ": norms.size() must be as large as the number of vectors in *this.");
01281     if (isConstantStride() && weights.isConstantStride()) {
01282       MVT::WeightedNorm(lclMV_,weights.lclMV_,norms);
01283     }
01284     else {
01285       KMV v(MVT::getNode(lclMV_)), w(MVT::getNode(lclMV_));
01286       ArrayRCP<Scalar> vptr = arcp_const_cast<Scalar> (MVT::getValues (lclMV_));
01287       ArrayRCP<Scalar> wptr = arcp_const_cast<Scalar> (MVT::getValues (weights.lclMV_));
01288       ArrayRCP<Scalar> wj   = wptr.persistingView (0,myLen);
01289       MVT::initializeValues (w,myLen, 1, wj, myLen);
01290       for (size_t j=0; j < numVecs; ++j) {
01291         ArrayRCP<Scalar> vj = getSubArrayRCP (vptr, j);
01292         MVT::initializeValues(v,myLen, 1,  vj, myLen);
01293         if (! OneW) {
01294           wj = weights.getSubArrayRCP(wptr,j);
01295           MVT::initializeValues (w, myLen, 1, wj, myLen);
01296         }
01297         norms[j] = MVT::WeightedNorm ((const KMV&)v, (const KMV &)w);
01298       }
01299     }
01300     if (this->isDistributed ()) {
01301       Array<Mag> lnorms(norms);
01302       reduceAll (*this->getMap ()->getComm (), REDUCE_SUM, as<int>(numVecs),
01303                  lnorms.getRawPtr (), norms.getRawPtr ());
01304     }
01305     for (typename ArrayView<Mag>::iterator n = norms.begin(); n != norms.begin()+numVecs; ++n) {
01306       *n = ScalarTraits<Mag>::squareroot(*n * OneOverN);
01307     }
01308   }
01309 
01310 
01311   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01312   void
01313   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01314   norm1 (const ArrayView<typename Teuchos::ScalarTraits<Scalar>::magnitudeType> &norms) const
01315   {
01316     using Teuchos::Array;
01317     using Teuchos::ArrayRCP;
01318     using Teuchos::arcp_const_cast;
01319     using Teuchos::as;
01320     using Teuchos::reduceAll;
01321     using Teuchos::REDUCE_SUM;
01322     typedef typename Teuchos::ScalarTraits<Scalar>::magnitudeType Mag;
01323 
01324     const size_t numVecs = this->getNumVectors();
01325     TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::as<size_t>(norms.size()) != numVecs, std::runtime_error,
01326         "Tpetra::MultiVector::norm1(norms): norms.size() must be as large as the number of vectors in *this.");
01327     if (isConstantStride()) {
01328       MVT::Norm1(lclMV_,norms);
01329     }
01330     else {
01331       KMV v(MVT::getNode(lclMV_));
01332       ArrayRCP<Scalar> vj;
01333       for (size_t j=0; j < numVecs; ++j) {
01334         vj = arcp_const_cast<Scalar> (MVT::getValues(lclMV_,whichVectors_[j]) );
01335         MVT::initializeValues (v, MVT::getNumRows(lclMV_), 1, vj, MVT::getStride (lclMV_));
01336         norms[j] = MVT::Norm1 ((const KMV&)v);
01337       }
01338     }
01339     if (this->isDistributed ()) {
01340       Array<Mag> lnorms (norms);
01341       reduceAll (*this->getMap ()->getComm (), REDUCE_SUM, as<int> (numVecs), lnorms.getRawPtr (), norms.getRawPtr ());
01342     }
01343   }
01344 
01345 
01346   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01347   void
01348   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01349   normInf (const Teuchos::ArrayView<typename Teuchos::ScalarTraits<Scalar>::magnitudeType> &norms) const
01350   {
01351     using Teuchos::Array;
01352     using Teuchos::ArrayRCP;
01353     using Teuchos::arcp_const_cast;
01354     using Teuchos::as;
01355     using Teuchos::reduceAll;
01356     using Teuchos::REDUCE_MAX;
01357     typedef typename Teuchos::ScalarTraits<Scalar>::magnitudeType Mag;
01358 
01359     const size_t numVecs = this->getNumVectors();
01360     TEUCHOS_TEST_FOR_EXCEPTION(as<size_t>(norms.size()) != numVecs, std::runtime_error,
01361       "Tpetra::MultiVector::normInf(norms): norms.size() must be as large as the number of vectors in *this.");
01362     if (isConstantStride ()) {
01363       MVT::NormInf(lclMV_,norms);
01364     }
01365     else {
01366       KMV v(MVT::getNode(lclMV_));
01367       ArrayRCP<Scalar> vj;
01368       for (size_t j=0; j < numVecs; ++j) {
01369         vj = Teuchos::arcp_const_cast<Scalar>( MVT::getValues(lclMV_,whichVectors_[j]) );
01370         MVT::initializeValues(v,MVT::getNumRows(lclMV_), 1, vj, MVT::getStride(lclMV_));
01371         norms[j] = MVT::NormInf((const KMV&)v);
01372       }
01373     }
01374     if (this->isDistributed()) {
01375       Array<Mag> lnorms(norms);
01376       reduceAll (*this->getMap ()->getComm (), REDUCE_MAX, as<int> (numVecs), lnorms.getRawPtr (), norms.getRawPtr ());
01377     }
01378   }
01379 
01380 
01381   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01382   void
01383   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01384   meanValue (const Teuchos::ArrayView<Scalar> &means) const
01385   {
01386     using Teuchos::Array;
01387     using Teuchos::ArrayRCP;
01388     using Teuchos::arcp_const_cast;
01389     using Teuchos::as;
01390     using Teuchos::arcp_const_cast;
01391     using Teuchos::reduceAll;
01392     using Teuchos::REDUCE_SUM;
01393     typedef Teuchos::ScalarTraits<Scalar> SCT;
01394 
01395     const size_t numVecs = getNumVectors();
01396     const size_t myLen   = getLocalLength();
01397     TEUCHOS_TEST_FOR_EXCEPTION(as<size_t>(means.size()) != numVecs, std::runtime_error,
01398       "Tpetra::MultiVector::meanValue(): means.size() must be as large as the number of vectors in *this.");
01399     // compute local components of the means
01400     // sum these across all nodes
01401     if (isConstantStride()) {
01402       MVT::Sum(lclMV_,means);
01403     }
01404     else {
01405       KMV v(MVT::getNode(lclMV_));
01406       ArrayRCP<Scalar> vptr = arcp_const_cast<Scalar>(MVT::getValues(lclMV_));
01407       for (size_t j=0; j < numVecs; ++j) {
01408         ArrayRCP<Scalar> vj =   getSubArrayRCP( vptr,j);
01409         MVT::initializeValues(v,myLen, 1,  vj, myLen);
01410         means[j] = MVT::Sum((const KMV &)v);
01411       }
01412     }
01413     if (this->isDistributed()) {
01414       Array<Scalar> lmeans(means);
01415       // only combine if we are a distributed MV
01416       reduceAll (*this->getMap ()->getComm (), REDUCE_SUM, as<int> (numVecs),
01417                  lmeans.getRawPtr (), means.getRawPtr ());
01418     }
01419     // mfh 12 Apr 2012: Don't take out the cast from the ordinal type
01420     // to the magnitude type, since operator/ (std::complex<T>, int)
01421     // isn't necessarily defined.
01422     const Scalar OneOverN =
01423       SCT::one() / as<typename SCT::magnitudeType> (getGlobalLength ());
01424     for (typename ArrayView<Scalar>::iterator i = means.begin();
01425          i != means.begin() + numVecs;
01426          ++i)
01427     {
01428       (*i) = (*i) * OneOverN;
01429     }
01430   }
01431 
01432 
01433   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01434   void
01435   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01436   randomize()
01437   {
01438     if (isConstantStride ()) {
01439       MVT::Random (lclMV_);
01440     }
01441     else {
01442       const size_t numVecs = this->getNumVectors ();
01443       KMV v (MVT::getNode (lclMV_));
01444       Teuchos::ArrayRCP<Scalar> vj;
01445       for (size_t j = 0; j < numVecs; ++j) {
01446         vj = MVT::getValuesNonConst (lclMV_, whichVectors_[j]);
01447         MVT::initializeValues (v, MVT::getNumRows (lclMV_), 1, vj, MVT::getStride (lclMV_));
01448         MVT::Random (v);
01449       }
01450     }
01451   }
01452 
01453 
01454   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01455   void
01456   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01457   putScalar (const Scalar &alpha)
01458   {
01459     const size_t numVecs = getNumVectors();
01460     if (isConstantStride()) {
01461       MVT::Init(lclMV_,alpha);
01462     }
01463     else {
01464       KMV v(MVT::getNode(lclMV_));
01465       Teuchos::ArrayRCP<Scalar> vj;
01466       for (size_t j=0; j < numVecs; ++j) {
01467         vj = MVT::getValuesNonConst(lclMV_,whichVectors_[j]);
01468         MVT::initializeValues(v,MVT::getNumRows(lclMV_), 1, vj, MVT::getStride(lclMV_));
01469         MVT::Init(v,alpha);
01470       }
01471     }
01472   }
01473 
01474 
01475   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01476   void
01477   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01478   replaceMap (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> >& newMap)
01479   {
01480     using Teuchos::ArrayRCP;
01481     using Teuchos::Comm;
01482     using Teuchos::RCP;
01483 
01484     // mfh 28 Mar 2013: This method doesn't forget whichVectors_, so
01485     // it might work if the MV is a column view of another MV.
01486     // However, things might go wrong when restoring the original
01487     // Map, so we don't allow this case for now.
01488     TEUCHOS_TEST_FOR_EXCEPTION(
01489       ! this->isConstantStride (), std::logic_error,
01490       "Tpetra::MultiVector::replaceMap: This method does not currently work "
01491       "if the MultiVector is a column view of another MultiVector (that is, if "
01492       "isConstantStride() == false).");
01493 
01494     // Case 1: current Map and new Map are both nonnull on this process.
01495     // Case 2: current Map is nonnull, new Map is null.
01496     // Case 3: current Map is null, new Map is nonnull.
01497     // Case 4: both Maps are null: forbidden.
01498     //
01499     // Case 1 means that we don't have to do anything on this process,
01500     // other than assign the new Map.  (We always have to do that.)
01501     // It's an error for the user to supply a Map that requires
01502     // resizing in this case.
01503     //
01504     // Case 2 means that the calling process is in the current Map's
01505     // communicator, but will be excluded from the new Map's
01506     // communicator.  We don't have to do anything on the calling
01507     // process; just leave whatever data it may have alone.
01508     //
01509     // Case 3 means that the calling process is excluded from the
01510     // current Map's communicator, but will be included in the new
01511     // Map's communicator.  This means we need to (re)allocate the
01512     // local (KokkosClassic::)MultiVector if it does not have the right
01513     // number of rows.  If the new number of rows is nonzero, we'll
01514     // fill the newly allocated local data with zeros, as befits a
01515     // projection operation.
01516     //
01517     // The typical use case for Case 3 is that the MultiVector was
01518     // first created with the Map with more processes, then that Map
01519     // was replaced with a Map with fewer processes, and finally the
01520     // original Map was restored on this call to replaceMap.
01521 
01522 #ifdef HAVE_TEUCHOS_DEBUG
01523     // mfh 28 Mar 2013: We can't check for compatibility across the
01524     // whole communicator, unless we know that the current and new
01525     // Maps are nonnull on _all_ participating processes.
01526     // TEUCHOS_TEST_FOR_EXCEPTION(
01527     //   origNumProcs == newNumProcs && ! this->getMap ()->isCompatible (*map),
01528     //   std::invalid_argument, "Tpetra::MultiVector::project: "
01529     //   "If the input Map's communicator is compatible (has the same number of "
01530     //   "processes as) the current Map's communicator, then the two Maps must be "
01531     //   "compatible.  The replaceMap() method is not for data redistribution; "
01532     //   "use Import or Export for that purpose.");
01533 
01534     // TODO (mfh 28 Mar 2013) Add compatibility checks for projections
01535     // of the Map, in case the process counts don't match.
01536 #endif // HAVE_TEUCHOS_DEBUG
01537 
01538     if (this->getMap ().is_null ()) { // current Map is null
01539       TEUCHOS_TEST_FOR_EXCEPTION(
01540         newMap.is_null (), std::invalid_argument,
01541         "Tpetra::MultiVector::replaceMap: both current and new Maps are null.  "
01542         "This probably means that the input Map is incorrect.");
01543       // Case 3: current Map is null, new Map is nonnull.
01544       const size_t newNumRows = newMap->getNodeNumElements ();
01545       const size_t origNumRows = lclMV_.getNumRows ();
01546       const size_t numCols = getNumVectors ();
01547 
01548       if (origNumRows != newNumRows) {
01549         RCP<Node> node = newMap->getNode ();
01550         ArrayRCP<Scalar> data = newNumRows == 0 ? Teuchos::null :
01551           node->template allocBuffer<Scalar> (newNumRows * numCols);
01552         const size_t stride = newNumRows;
01553         MVT::initializeValues (lclMV_, newNumRows, numCols, data, stride);
01554         if (newNumRows > 0) {
01555           MVT::Init (lclMV_, Teuchos::ScalarTraits<Scalar>::zero ());
01556         }
01557       }
01558     }
01559     else if (newMap.is_null ()) { // Case 2: current Map is nonnull, new Map is null
01560       // I am an excluded process.  Reinitialize my data so that I
01561       // have 0 rows.  Keep the number of columns as before.
01562       const size_t numVecs = getNumVectors ();
01563       MVT::initializeValues (lclMV_, 0, numVecs, Teuchos::null, 0);
01564     }
01565     this->map_ = newMap;
01566   }
01567 
01568 
01569   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01570   void
01571   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01572   scale (const Scalar &alpha)
01573   {
01574     using Teuchos::arcp_const_cast;
01575     using Teuchos::ArrayRCP;
01576 
01577     // NOTE: can't substitute putScalar(0.0) for scale(0.0), because
01578     //       the former will overwrite NaNs present in the MultiVector, while the
01579     //       semantics of this call require multiplying them by 0, which IEEE requires to be NaN
01580     const size_t numVecs = getNumVectors();
01581     if (alpha == Teuchos::ScalarTraits<Scalar>::one()) {
01582       // do nothing
01583     }
01584     else if (isConstantStride()) {
01585       MVT::Scale(lclMV_,alpha);
01586     }
01587     else {
01588       KMV v(MVT::getNode(lclMV_));
01589       ArrayRCP<Scalar> vj;
01590       for (size_t j=0; j < numVecs; ++j) {
01591         vj = arcp_const_cast<Scalar> (MVT::getValues(lclMV_, whichVectors_[j]));
01592         MVT::initializeValues (v, MVT::getNumRows (lclMV_), 1, vj, MVT::getStride (lclMV_));
01593         MVT::Scale (v, alpha);
01594       }
01595     }
01596   }
01597 
01598 
01599   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01600   void
01601   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01602   scale (Teuchos::ArrayView<const Scalar> alphas)
01603   {
01604     using Teuchos::arcp_const_cast;
01605     using Teuchos::ArrayRCP;
01606     using Teuchos::as;
01607 
01608     const size_t numVecs = this->getNumVectors();
01609     TEUCHOS_TEST_FOR_EXCEPTION(as<size_t>(alphas.size()) != numVecs, std::runtime_error,
01610       "Tpetra::MultiVector::scale(alphas): alphas.size() must be as large as the number of vectors in *this.");
01611     KMV vec(MVT::getNode(lclMV_));
01612     const size_t myLen = MVT::getNumRows(lclMV_);
01613     if (myLen == 0) return;
01614     ArrayRCP<Scalar> mybuf = MVT::getValuesNonConst(lclMV_);
01615     for (size_t j = 0; j < numVecs; ++j) {
01616       if (alphas[j] == Teuchos::ScalarTraits<Scalar>::one()) {
01617         // do nothing: NaN * 1.0 == NaN, Number*1.0 == Number
01618       }
01619       else {
01620         ArrayRCP<Scalar> mybufj = getSubArrayRCP(mybuf,j);
01621         MVT::initializeValues(vec,myLen,1,mybufj,myLen);
01622         MVT::Scale(vec,alphas[j]);
01623       }
01624     }
01625   }
01626 
01627 
01628   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01629   void
01630   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01631   scale (const Scalar &alpha, const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &A)
01632   {
01633     using Teuchos::arcp_const_cast;
01634     using Teuchos::ArrayRCP;
01635     using Teuchos::as;
01636 
01637     const char tfecfFuncName[] = "scale(alpha,A)";
01638 
01639     const size_t numVecs = getNumVectors(),
01640                  myLen   = getLocalLength();
01641 #ifdef HAVE_TPETRA_DEBUG
01642     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(! this->getMap ()->isCompatible (*A.getMap ()),
01643       std::runtime_error, ": MultiVectors do not have compatible Maps:" << std::endl
01644         << "this->getMap(): " << std::endl << *(this->getMap ())
01645         << "A.getMap(): " << std::endl << *(A.getMap ()) << std::endl);
01646 #else
01647     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( getLocalLength() != A.getLocalLength(), std::runtime_error,
01648       ": MultiVectors do not have the same local length.");
01649 #endif
01650     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(A.getNumVectors() != numVecs, std::runtime_error,
01651       ": MultiVectors must have the same number of vectors.");
01652     if (isConstantStride() && A.isConstantStride()) {
01653       // set me == alpha*A
01654       MVT::Scale(lclMV_,alpha,(const KMV&)A.lclMV_);
01655     }
01656     else {
01657       KMV v(MVT::getNode(lclMV_)), a(MVT::getNode(lclMV_));
01658       ArrayRCP<Scalar> vptr  = MVT::getValuesNonConst (lclMV_);
01659       ArrayRCP<Scalar> avptr = arcp_const_cast<Scalar> (MVT::getValues (A.lclMV_));
01660       for (size_t j=0; j < numVecs; ++j) {
01661         ArrayRCP<Scalar>  vj =   getSubArrayRCP (vptr,  j);
01662         ArrayRCP<Scalar> avj = A.getSubArrayRCP (avptr, j);
01663         MVT::initializeValues (a,myLen, 1, avj, myLen);
01664         MVT::initializeValues (v,myLen, 1,  vj, myLen);
01665         MVT::Scale (v, alpha, (const KMV &)a);
01666       }
01667     }
01668   }
01669 
01670 
01671   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01672   void
01673   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01674   reciprocal (const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &A)
01675   {
01676     using Teuchos::arcp_const_cast;
01677     using Teuchos::ArrayRCP;
01678     using Teuchos::as;
01679 
01680     const char tfecfFuncName[] = "reciprocal()";
01681 
01682 #ifdef HAVE_TPETRA_DEBUG
01683     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( !this->getMap()->isCompatible(*A.getMap()), std::runtime_error,
01684         ": MultiVectors do not have compatible Maps:" << std::endl
01685         << "this->getMap(): " << std::endl << *this->getMap()
01686         << "A.getMap(): " << std::endl << *A.getMap() << std::endl);
01687 #else
01688     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( getLocalLength() != A.getLocalLength(), std::runtime_error,
01689         ": MultiVectors do not have the same local length.");
01690 #endif
01691     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(A.getNumVectors() != this->getNumVectors(), std::runtime_error,
01692         ": MultiVectors must have the same number of vectors.");
01693     const size_t numVecs = getNumVectors();
01694     const size_t myLen = getLocalLength();
01695     try {
01696       if (isConstantStride() && A.isConstantStride()) {
01697         MVT::Recip(lclMV_,(const KMV&)A.lclMV_);
01698       }
01699       else {
01700         KMV v(MVT::getNode(lclMV_)), a(MVT::getNode(lclMV_));
01701         ArrayRCP<Scalar> vptr = MVT::getValuesNonConst(lclMV_),
01702                         avptr = arcp_const_cast<Scalar>(MVT::getValues(A.lclMV_));
01703         for (size_t j=0; j < numVecs; ++j) {
01704           ArrayRCP<Scalar> vj =   getSubArrayRCP( vptr,j),
01705                           avj = A.getSubArrayRCP(avptr,j);
01706           MVT::initializeValues(a,myLen, 1, avj, myLen);
01707           MVT::initializeValues(v,myLen, 1,  vj, myLen);
01708           MVT::Recip(v,(const KMV &)a);
01709         }
01710       }
01711     }
01712     catch (std::runtime_error &e) {
01713       TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true,std::runtime_error,
01714           ": caught exception from Kokkos:" << std::endl
01715           << e.what() << std::endl);
01716     }
01717   }
01718 
01719 
01720   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01721   void MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::abs(const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &A) {
01722     using Teuchos::arcp_const_cast;
01723     using Teuchos::ArrayRCP;
01724     using Teuchos::as;
01725 
01726     const char tfecfFuncName[] = "abs()";
01727 #ifdef HAVE_TPETRA_DEBUG
01728     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( !this->getMap()->isCompatible(*A.getMap()), std::runtime_error,
01729         ": MultiVectors do not have compatible Maps:" << std::endl
01730         << "this->getMap(): " << std::endl << *this->getMap()
01731         << "A.getMap(): " << std::endl << *A.getMap() << std::endl);
01732 #else
01733     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( getLocalLength() != A.getLocalLength(), std::runtime_error,
01734         ": MultiVectors do not have the same local length.");
01735 #endif
01736     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(A.getNumVectors() != this->getNumVectors(), std::runtime_error,
01737         ": MultiVectors must have the same number of vectors.");
01738     const size_t myLen = getLocalLength();
01739     const size_t numVecs = getNumVectors();
01740     if (isConstantStride() && A.isConstantStride()) {
01741       MVT::Abs(lclMV_,(const KMV&)A.lclMV_);
01742     }
01743     else {
01744       KMV v(MVT::getNode(lclMV_)), a(MVT::getNode(lclMV_));
01745       ArrayRCP<Scalar> vptr = MVT::getValuesNonConst(lclMV_),
01746                       avptr = arcp_const_cast<Scalar>(MVT::getValues(A.lclMV_));
01747       for (size_t j=0; j < numVecs; ++j) {
01748         ArrayRCP<Scalar> vj =   getSubArrayRCP( vptr,j),
01749                         avj = A.getSubArrayRCP(avptr,j);
01750         MVT::initializeValues(a,myLen, 1, avj, myLen);
01751         MVT::initializeValues(v,myLen, 1,  vj, myLen);
01752         MVT::Abs(v,(const KMV &)a);
01753       }
01754     }
01755   }
01756 
01757 
01758   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01759   void MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::update(
01760                       const Scalar &alpha, const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &A,
01761                       const Scalar &beta)
01762   {
01763     using Teuchos::arcp_const_cast;
01764     using Teuchos::ArrayRCP;
01765     using Teuchos::as;
01766 
01767     const char tfecfFuncName[] = "update()";
01768     // this = beta*this + alpha*A
01769     // must support case where &this == &A
01770     // can't short circuit on alpha==0.0 or beta==0.0, because 0.0*NaN != 0.0
01771 #ifdef HAVE_TPETRA_DEBUG
01772     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( !this->getMap()->isCompatible(*A.getMap()), std::runtime_error,
01773         ": MultiVectors do not have compatible Maps:" << std::endl
01774         << "this->getMap(): " << std::endl << this->getMap()
01775         << "A.getMap(): " << std::endl << *A.getMap() << std::endl);
01776 #else
01777     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( getLocalLength() != A.getLocalLength(), std::runtime_error,
01778         ": MultiVectors do not have the same local length.");
01779 #endif
01780     const size_t myLen = getLocalLength();
01781     const size_t numVecs = getNumVectors();
01782     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(A.getNumVectors() != this->getNumVectors(), std::runtime_error,
01783         ": MultiVectors must have the same number of vectors.");
01784     if (isConstantStride() && A.isConstantStride()) {
01785       MVT::GESUM(lclMV_,alpha,(const KMV&)A.lclMV_,beta);
01786     }
01787     else {
01788       KMV v(MVT::getNode(lclMV_)), a(MVT::getNode(lclMV_));
01789       ArrayRCP<Scalar> vptr = MVT::getValuesNonConst(lclMV_),
01790                       avptr = arcp_const_cast<Scalar>(MVT::getValues(A.lclMV_));
01791       for (size_t j=0; j < numVecs; ++j) {
01792         ArrayRCP<Scalar> vj =   getSubArrayRCP( vptr,j),
01793                         avj = A.getSubArrayRCP(avptr,j);
01794         MVT::initializeValues(a,myLen, 1, avj, myLen);
01795         MVT::initializeValues(v,myLen, 1,  vj, myLen);
01796         MVT::GESUM(v,alpha,(const KMV &)a,beta);
01797       }
01798     }
01799   }
01800 
01801 
01802   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01803   void MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::update(
01804                       const Scalar &alpha, const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &A,
01805                       const Scalar &beta,  const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &B,
01806                       const Scalar &gamma)
01807   {
01808     using Teuchos::arcp_const_cast;
01809     using Teuchos::ArrayRCP;
01810     using Teuchos::as;
01811 
01812     const char tfecfFuncName[] = "update()";
01813     // this = alpha*A + beta*B + gamma*this
01814     // must support case where &this == &A or &this == &B
01815     // can't short circuit on alpha==0.0 or beta==0.0 or gamma==0.0, because 0.0*NaN != 0.0
01816 #ifdef HAVE_TPETRA_DEBUG
01817     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( !this->getMap()->isCompatible(*A.getMap()) || !this->getMap()->isCompatible(*B.getMap()),
01818         std::runtime_error,
01819         ": MultiVectors do not have compatible Maps:" << std::endl
01820         << "this->getMap(): " << std::endl << *this->getMap()
01821         << "A.getMap(): " << std::endl << *A.getMap() << std::endl
01822         << "B.getMap(): " << std::endl << *B.getMap() << std::endl);
01823 #else
01824     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( getLocalLength() != A.getLocalLength() || getLocalLength() != B.getLocalLength(), std::runtime_error,
01825         ": MultiVectors do not have the same local length.");
01826 #endif
01827     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(A.getNumVectors() != this->getNumVectors() || B.getNumVectors() != this->getNumVectors(), std::runtime_error,
01828         ": MultiVectors must have the same number of vectors.");
01829     const size_t myLen = getLocalLength();
01830     const size_t numVecs = getNumVectors();
01831     if (isConstantStride() && A.isConstantStride() && B.isConstantStride()) {
01832       MVT::GESUM(lclMV_,alpha,(const KMV&)A.lclMV_,beta,(const KMV&)B.lclMV_,gamma);
01833     }
01834     else {
01835       KMV v(MVT::getNode(lclMV_)), a(MVT::getNode(lclMV_)), b(MVT::getNode(lclMV_));
01836       ArrayRCP<Scalar> vptr = MVT::getValuesNonConst(lclMV_),
01837                       avptr = arcp_const_cast<Scalar>(MVT::getValues(A.lclMV_)),
01838                       bvptr = arcp_const_cast<Scalar>(MVT::getValues(B.lclMV_));
01839       for (size_t j=0; j < numVecs; ++j) {
01840         ArrayRCP<Scalar> vj =   getSubArrayRCP( vptr,j),
01841                         avj = A.getSubArrayRCP(avptr,j),
01842                         bvj = B.getSubArrayRCP(bvptr,j);
01843         MVT::initializeValues(b,myLen, 1, bvj, myLen);
01844         MVT::initializeValues(a,myLen, 1, avj, myLen);
01845         MVT::initializeValues(v,myLen, 1,  vj, myLen);
01846         MVT::GESUM(v,alpha,(const KMV&)a,beta,(const KMV&)b,gamma);
01847       }
01848     }
01849   }
01850 
01851 
01852   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01853   Teuchos::ArrayRCP<const Scalar>
01854   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01855   getData (size_t j) const
01856   {
01857     Teuchos::RCP<Node> node = MVT::getNode(lclMV_);
01858     KOKKOS_NODE_TRACE("MultiVector::getData()")
01859     return node->template viewBuffer<Scalar> (getLocalLength (), getSubArrayRCP (MVT::getValues(lclMV_), j));
01860   }
01861 
01862 
01863   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01864   Teuchos::ArrayRCP<Scalar>
01865   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01866   getDataNonConst(size_t j)
01867   {
01868     Teuchos::RCP<Node> node = MVT::getNode(lclMV_);
01869     KOKKOS_NODE_TRACE("MultiVector::getDataNonConst()")
01870     return node->template viewBufferNonConst<Scalar> (KokkosClassic::ReadWrite, getLocalLength (), getSubArrayRCP (MVT::getValuesNonConst (lclMV_), j));
01871   }
01872 
01873 
01874   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01875   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>&
01876   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01877   operator= (const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &source)
01878   {
01879 #ifdef HAVE_TPETRA_DEBUG
01880     using Teuchos::outArg;
01881     using Teuchos::REDUCE_MIN;
01882     using Teuchos::reduceAll;
01883     using std::endl;
01884 #endif // HAVE_TPETRA_DEBUG
01885 
01886     const char tfecfFuncName[] = "operator=";
01887     // Check for special case of this=Source, in which case we do nothing.
01888     if (this != &source) {
01889       // Whether the input and *this are compatible on the calling process.
01890       const int locallyCompat =
01891         (this->getLocalLength () == source.getLocalLength ()) ? 1 : 0;
01892 #ifdef HAVE_TPETRA_DEBUG
01893       int globallyCompat = 1; // innocent until proven guilty
01894       reduceAll<int, int> (* (this->getMap ()->getComm ()), REDUCE_MIN,
01895                            locallyCompat, outArg (globallyCompat));
01896       TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
01897         globallyCompat == 0, std::invalid_argument,
01898         ": MultiVectors do not have the same local length on all processes.");
01899 #else
01900       TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
01901         locallyCompat == 0, std::invalid_argument,
01902         ": MultiVectors do not have the same local length on the calling "
01903         "process " << this->getMap ()->getComm ()->getSize () << ".  *this "
01904         "has " << this->getLocalLength () << " local rows, and source has "
01905         << source.getLocalLength () << " rows.");
01906 #endif // HAVE_TPETRA_DEBUG
01907 
01908       TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
01909         source.getNumVectors() != getNumVectors(), std::invalid_argument,
01910         ": MultiVectors must have the same number of vectors.");
01911 
01912       Teuchos::RCP<Node> node = MVT::getNode (lclMV_);
01913       const size_t numVecs = getNumVectors();
01914       if (isConstantStride () && source.isConstantStride () &&
01915           getLocalLength () == getStride () &&
01916           source.getLocalLength ()== source.getStride ()) {
01917         // Both multivectors' data are stored contiguously, so we can
01918         // copy in one call.
01919         KOKKOS_NODE_TRACE("MultiVector::operator=()")
01920         node->template copyBuffers<Scalar> (getLocalLength () * numVecs,
01921                                             MVT::getValues (source.lclMV_),
01922                                             MVT::getValuesNonConst (lclMV_));
01923       }
01924       else {
01925         // We have to copy the columns one at a time.
01926         for (size_t j=0; j < numVecs; ++j) {
01927           KOKKOS_NODE_TRACE("MultiVector::operator=()")
01928           node->template copyBuffers<Scalar> (getLocalLength (),
01929                                               source.getSubArrayRCP (MVT::getValues (source.lclMV_), j),
01930                                               getSubArrayRCP (MVT::getValuesNonConst(lclMV_), j));
01931         }
01932       }
01933     }
01934     return *this;
01935   }
01936 
01937 
01938   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01939   Teuchos::RCP<MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> >
01940   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01941   subCopy (const Teuchos::ArrayView<const size_t> &cols) const
01942   {
01943     using Teuchos::RCP;
01944     using Teuchos::rcp;
01945 
01946     TEUCHOS_TEST_FOR_EXCEPTION(cols.size() < 1, std::runtime_error,
01947         "Tpetra::MultiVector::subCopy(cols): cols must contain at least one column.");
01948     size_t numCopyVecs = cols.size();
01949     const bool zeroData = false;
01950     RCP<Node> node = MVT::getNode(lclMV_);
01951     RCP<MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> > mv;
01952     // mv is allocated with constant stride
01953     mv = rcp (new MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> (this->getMap (), numCopyVecs, zeroData));
01954     // copy data from *this into mv
01955     for (size_t j=0; j<numCopyVecs; ++j) {
01956       KOKKOS_NODE_TRACE("MultiVector::subCopy()")
01957       node->template copyBuffers<Scalar> (getLocalLength (),
01958                                           getSubArrayRCP (MVT::getValues (lclMV_), cols[j]),
01959                                           MVT::getValuesNonConst (mv->lclMV_, j));
01960     }
01961     return mv;
01962   }
01963 
01964 
01965   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01966   Teuchos::RCP<MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> >
01967   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01968   subCopy (const Teuchos::Range1D &colRng) const
01969   {
01970     using Teuchos::RCP;
01971     using Teuchos::rcp;
01972 
01973     TEUCHOS_TEST_FOR_EXCEPTION(colRng.size() == 0, std::runtime_error,
01974         "Tpetra::MultiVector::subCopy(Range1D): range must include at least one vector.");
01975     size_t numCopyVecs = colRng.size();
01976     const bool zeroData = false;
01977     RCP<Node> node = MVT::getNode(lclMV_);
01978     RCP<MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> > mv;
01979     // mv is allocated with constant stride
01980     mv = rcp (new MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> (this->getMap (), numCopyVecs, zeroData));
01981     // copy data from *this into mv
01982     for (size_t js=colRng.lbound(), jd=0; jd<numCopyVecs; ++jd, ++js) {
01983       KOKKOS_NODE_TRACE("MultiVector::subCopy()")
01984       node->template copyBuffers<Scalar> (getLocalLength (),
01985                                           getSubArrayRCP (MVT::getValues (lclMV_), js),
01986                                           MVT::getValuesNonConst (mv->lclMV_, jd));
01987     }
01988     return mv;
01989   }
01990 
01991 
01992   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01993   Teuchos::RCP<const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> >
01994   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01995   offsetView (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> >& subMap,
01996               size_t offset) const
01997   {
01998     using Teuchos::RCP;
01999     using Teuchos::rcp;
02000     typedef MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> MV;
02001 
02002     const size_t newNumRows = subMap->getNodeNumElements();
02003     const bool tooManyElts = newNumRows + offset > lclMV_.getOrigNumRows ();
02004     if (tooManyElts) {
02005       const int myRank = this->getMap ()->getComm ()->getRank ();
02006       TEUCHOS_TEST_FOR_EXCEPTION(
02007         newNumRows + offset > MVT::getNumRows (lclMV_),
02008         std::runtime_error,
02009         "Tpetra::MultiVector::offsetView: Invalid input Map.  Input Map owns "
02010         << subMap->getNodeNumElements() << " elements on process " << myRank
02011         << ".  offset = " << offset << ".  Yet, the MultiVector contains only "
02012         << lclMV_.getOrigNumRows () << " on this process.");
02013     }
02014     RCP<const MV> constViewMV;
02015     if (isConstantStride()) {
02016       KokkosClassic::MultiVector<Scalar, Node> newLocalMV =
02017         lclMV_.offsetView (newNumRows, lclMV_.getNumCols (), offset, 0);
02018       constViewMV = rcp (new MV (subMap, newLocalMV, COMPUTE_VIEW_CONSTRUCTOR));
02019     }
02020     else {
02021       // Compute the max column being viewed.  This tells us where to stop the view.
02022       const size_t maxCol = *std::max_element (whichVectors_.begin(), whichVectors_.end());
02023       KokkosClassic::MultiVector<Scalar, Node> newLocalMV =
02024         lclMV_.offsetView (newNumRows, maxCol+1, offset, 0);
02025       constViewMV = rcp (new MV (subMap, newLocalMV, whichVectors_, COMPUTE_VIEW_CONSTRUCTOR));
02026     }
02027     return constViewMV;
02028   }
02029 
02030 
02031   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02032   Teuchos::RCP<MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> >
02033   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
02034   offsetViewNonConst (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> >& subMap,
02035                       size_t offset)
02036   {
02037     using Teuchos::RCP;
02038     using Teuchos::rcp;
02039     typedef MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> MV;
02040 
02041     const size_t newNumRows = subMap->getNodeNumElements();
02042     const bool tooManyElts = newNumRows + offset > lclMV_.getOrigNumRows ();
02043     if (tooManyElts) {
02044       const int myRank = this->getMap ()->getComm ()->getRank ();
02045       TEUCHOS_TEST_FOR_EXCEPTION(
02046         newNumRows + offset > MVT::getNumRows (lclMV_),
02047         std::runtime_error,
02048         "Tpetra::MultiVector::offsetViewNonconst: Invalid input Map.  Input Map owns "
02049         << subMap->getNodeNumElements() << " elements on process " << myRank
02050         << ".  offset = " << offset << ".  Yet, the MultiVector contains only "
02051         << lclMV_.getOrigNumRows () << " on this process.");
02052     }
02053     RCP<MV> subViewMV;
02054     if (isConstantStride()) {
02055       KokkosClassic::MultiVector<Scalar, Node> newLocalMV =
02056         lclMV_.offsetViewNonConst (newNumRows, lclMV_.getNumCols (), offset, 0);
02057       subViewMV = rcp (new MV (subMap, newLocalMV, COMPUTE_VIEW_CONSTRUCTOR));
02058     }
02059     else {
02060       // Compute the max column being viewed.  This tells us where to stop the view.
02061       const size_t maxCol = *std::max_element (whichVectors_.begin(), whichVectors_.end());
02062       KokkosClassic::MultiVector<Scalar, Node> newLocalMV =
02063         lclMV_.offsetViewNonConst (newNumRows, maxCol+1, offset, 0);
02064       subViewMV = rcp (new MV (subMap, newLocalMV, whichVectors_, COMPUTE_VIEW_CONSTRUCTOR));
02065     }
02066     return subViewMV;
02067   }
02068 
02069 
02070   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02071   Teuchos::RCP<const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> >
02072   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
02073   subView (const ArrayView<const size_t> &cols) const
02074   {
02075     using Teuchos::arcp_const_cast;
02076     using Teuchos::Array;
02077     using Teuchos::ArrayRCP;
02078     using Teuchos::RCP;
02079     using Teuchos::rcp;
02080     using Teuchos::rcp_const_cast;
02081     typedef MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> MV;
02082 
02083     TEUCHOS_TEST_FOR_EXCEPTION(cols.size() == 0, std::runtime_error,
02084       "Tpetra::MultiVector::subView(ArrayView): range must include at least one vector.");
02085     // this is const, so the lclMV_ is const, so that we can only get const buffers
02086     // we will cast away the const; this is okay, because
02087     //   a) the constructor doesn't modify the data, and
02088     //   b) we are encapsulating in a const MV before returning
02089     const size_t myStride = MVT::getStride(lclMV_),
02090                  myLen    = MVT::getNumRows(lclMV_),
02091               numViewCols = cols.size();
02092     // use the smallest view possible of the buffer: from the first element of the minInd vector to the last element of the maxInd vector
02093     // this minimizes overlap between views, and keeps view of the minimum amount necessary, in order to allow node to achieve maximum efficiency.
02094     // adjust the indices appropriately; shift so that smallest index is 0
02095     ArrayRCP<const Scalar> cbuf = MVT::getValues(lclMV_);
02096     ArrayRCP<Scalar>      ncbuf = arcp_const_cast<Scalar> (cbuf);
02097     Array<size_t> newCols (numViewCols);
02098     size_t minInd = Teuchos::OrdinalTraits<size_t>::max();
02099     size_t maxInd = Teuchos::OrdinalTraits<size_t>::zero();
02100     if (isConstantStride()) {
02101       for (size_t j=0; j < numViewCols; ++j) {
02102         newCols[j] = cols[j];
02103         if (newCols[j] < minInd) minInd = newCols[j];
02104         if (maxInd < newCols[j]) maxInd = newCols[j];
02105       }
02106     }
02107     else {
02108       for (size_t j=0; j < numViewCols; ++j) {
02109         newCols[j] = whichVectors_[cols[j]];
02110         if (newCols[j] < minInd) minInd = newCols[j];
02111         if (maxInd < newCols[j]) maxInd = newCols[j];
02112       }
02113     }
02114     ArrayRCP<Scalar> minbuf = ncbuf.persistingView(minInd * myStride, myStride * (maxInd - minInd) + myLen);
02115     for (size_t j=0; j < numViewCols; ++j) {
02116       newCols[j] -= minInd;
02117     }
02118     RCP<const MV> constViewMV;
02119     return rcp_const_cast<const MV> (rcp (new MV (this->getMap (), minbuf, myStride, newCols (), COMPUTE_VIEW_CONSTRUCTOR)));
02120   }
02121 
02122 
02123   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02124   Teuchos::RCP<const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> >
02125   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
02126   subView (const Teuchos::Range1D &colRng) const
02127   {
02128     using Teuchos::arcp_const_cast;
02129     using Teuchos::Array;
02130     using Teuchos::ArrayRCP;
02131     using Teuchos::RCP;
02132     using Teuchos::rcp;
02133     typedef MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> MV;
02134 
02135     TEUCHOS_TEST_FOR_EXCEPTION(colRng.size() == 0, std::runtime_error,
02136       "Tpetra::MultiVector::subView(Range1D): range must include at least one vector.");
02137     size_t numViewVecs = colRng.size();
02138     // this is const, so the lclMV_ is const, so that we can only get const buffers
02139     // we will cast away the const; this is okay, because
02140     //   a) the constructor doesn't modify the data, and
02141     //   b) we are encapsulating in a const MV before returning
02142     ArrayRCP<const Scalar> cbuf = MVT::getValues(lclMV_);
02143     ArrayRCP<Scalar>      ncbuf = arcp_const_cast<Scalar>(cbuf);
02144     // resulting MultiVector is constant stride only if *this is
02145     if (isConstantStride()) {
02146       // view goes from first entry of first vector to last entry of last vector
02147       ArrayRCP<Scalar> subdata = ncbuf.persistingView( MVT::getStride(lclMV_) * colRng.lbound(),
02148                                                        MVT::getStride(lclMV_) * (numViewVecs-1) + getLocalLength() );
02149       return rcp (new MV (this->getMap (), subdata, MVT::getStride (lclMV_), numViewVecs, COMPUTE_VIEW_CONSTRUCTOR));
02150     }
02151     // otherwise, use a subset of this whichVectors_ to construct new multivector
02152     Array<size_t> whchvecs( whichVectors_.begin()+colRng.lbound(), whichVectors_.begin()+colRng.ubound()+1 );
02153     return rcp (new MV (this->getMap (), ncbuf, MVT::getStride (lclMV_), whchvecs, COMPUTE_VIEW_CONSTRUCTOR));
02154   }
02155 
02156 
02157   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02158   Teuchos::RCP<MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> >
02159   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
02160   subViewNonConst (const ArrayView<const size_t> &cols)
02161   {
02162     using Teuchos::as;
02163     using Teuchos::arcp_const_cast;
02164     using Teuchos::Array;
02165     using Teuchos::ArrayRCP;
02166     using Teuchos::RCP;
02167     using Teuchos::rcp;
02168     typedef MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> MV;
02169 
02170     TEUCHOS_TEST_FOR_EXCEPTION(cols.size() == 0, std::runtime_error,
02171       "Tpetra::MultiVector::subViewNonConst(ArrayView): range must include at least one vector.");
02172     if (isConstantStride()) {
02173       return rcp (new MV (this->getMap (), MVT::getValuesNonConst (lclMV_), MVT::getStride (lclMV_), cols, COMPUTE_VIEW_CONSTRUCTOR));
02174     }
02175     // else, lookup current whichVectors_ using cols
02176     Array<size_t> newcols(cols.size());
02177     for (size_t j = 0; j < as<size_t> (cols.size ()); ++j) {
02178       newcols[j] = whichVectors_[cols[j]];
02179     }
02180     return rcp (new MV (this->getMap (), MVT::getValuesNonConst (lclMV_), MVT::getStride (lclMV_), newcols (), COMPUTE_VIEW_CONSTRUCTOR));
02181   }
02182 
02183 
02184   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02185   Teuchos::RCP<MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> >
02186   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
02187   subViewNonConst (const Teuchos::Range1D &colRng)
02188   {
02189     using Teuchos::as;
02190     using Teuchos::arcp_const_cast;
02191     using Teuchos::Array;
02192     using Teuchos::ArrayRCP;
02193     using Teuchos::RCP;
02194     using Teuchos::rcp;
02195     typedef MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> MV;
02196 
02197     TEUCHOS_TEST_FOR_EXCEPTION(colRng.size() == 0, std::runtime_error,
02198       "Tpetra::MultiVector::subViewNonConst(Range1D): range must include at least one vector.");
02199     size_t numViewVecs = colRng.size();
02200     // resulting MultiVector is constant stride only if *this is
02201     if (isConstantStride ()) {
02202       // view goes from first entry of first vector to last entry of last vector
02203       const size_t stride = MVT::getStride(lclMV_);
02204       ArrayRCP<Scalar> data = MVT::getValuesNonConst(lclMV_);
02205       ArrayRCP<Scalar> subdata = data.persistingView( stride * colRng.lbound(),
02206                                                       stride * (numViewVecs-1) + getLocalLength() );
02207       return rcp (new MV (this->getMap (), subdata, stride, numViewVecs, COMPUTE_VIEW_CONSTRUCTOR));
02208     }
02209     // otherwise, use a subset of this whichVectors_ to construct new multivector
02210     Array<size_t> whchvecs( whichVectors_.begin()+colRng.lbound(), whichVectors_.begin()+colRng.ubound()+1 );
02211     const size_t stride = MVT::getStride(lclMV_);
02212     ArrayRCP<Scalar> data = MVT::getValuesNonConst(lclMV_);
02213     return rcp (new MV (this->getMap (), data, stride, whchvecs, COMPUTE_VIEW_CONSTRUCTOR));
02214   }
02215 
02216 
02217   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02218   Teuchos::RCP<const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node> >
02219   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
02220   getVector (size_t j) const
02221   {
02222     using Teuchos::arcp_const_cast;
02223     using Teuchos::ArrayRCP;
02224     using Teuchos::rcp;
02225     using Teuchos::rcp_const_cast;
02226     typedef Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node> V;
02227 
02228 #ifdef HAVE_TPETRA_DEBUG
02229     TEUCHOS_TEST_FOR_EXCEPTION( vectorIndexOutOfRange(j), std::runtime_error,
02230         "Tpetra::MultiVector::getVector(j): index j (== " << j << ") exceeds valid column range for this multivector.");
02231 #endif
02232     // this is const, so lclMV_ is const, so we get const buff
02233     // it is safe to cast away the const because we will wrap it in a const Vector below
02234     ArrayRCP<Scalar> ncbuff;
02235     if (getLocalLength() > 0) {
02236       ArrayRCP<const Scalar> cbuff = getSubArrayRCP(MVT::getValues(lclMV_),j);
02237       ncbuff = arcp_const_cast<Scalar>(cbuff);
02238     }
02239     return rcp_const_cast<const V> (rcp (new V (this->getMap (), ncbuff, COMPUTE_VIEW_CONSTRUCTOR)));
02240   }
02241 
02242 
02243   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02244   Teuchos::RCP<Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node> >
02245   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getVectorNonConst(size_t j)
02246   {
02247     using Teuchos::ArrayRCP;
02248     using Teuchos::rcp;
02249     typedef Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node> V;
02250 
02251 #ifdef HAVE_TPETRA_DEBUG
02252     TEUCHOS_TEST_FOR_EXCEPTION( vectorIndexOutOfRange(j), std::runtime_error,
02253         "Tpetra::MultiVector::getVectorNonConst(j): index j (== " << j << ") exceeds valid column range for this multivector.");
02254 #endif
02255     ArrayRCP<Scalar> ncbuff;
02256     if (getLocalLength() > 0) {
02257       ncbuff = getSubArrayRCP (MVT::getValuesNonConst (lclMV_), j);
02258     }
02259     return rcp (new V (this->getMap (), ncbuff, COMPUTE_VIEW_CONSTRUCTOR));
02260   }
02261 
02262 
02263   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02264   void
02265   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
02266   get1dCopy (Teuchos::ArrayView<Scalar> A, size_t LDA) const
02267   {
02268     using Teuchos::ArrayRCP;
02269     using Teuchos::ArrayView;
02270     using Teuchos::RCP;
02271     using Teuchos::rcp;
02272 
02273     const char tfecfFuncName[] = "get1dCopy(A,LDA)";
02274     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(LDA < getLocalLength(), std::runtime_error,
02275       ": specified stride is not large enough for local vector length.");
02276     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(Teuchos::as<size_t>(A.size()) < LDA*(getNumVectors()-1)+getLocalLength(), std::runtime_error,
02277       ": specified stride/storage is not large enough for the number of vectors.");
02278     RCP<Node> node = MVT::getNode(lclMV_);
02279     const size_t myStride = MVT::getStride(lclMV_),
02280                   numCols = getNumVectors(),
02281                   myLen   = getLocalLength();
02282     if (myLen > 0) {
02283       ArrayRCP<const Scalar> mydata = MVT::getValues(lclMV_);
02284       KOKKOS_NODE_TRACE("MultiVector::getVectorNonConst()")
02285       ArrayRCP<const Scalar> myview = node->template viewBuffer<Scalar>(myStride*(numCols-1)+myLen,mydata);
02286       typename ArrayView<Scalar>::iterator Aptr = A.begin();
02287       for (size_t j=0; j<numCols; j++) {
02288         ArrayRCP<const Scalar> myviewj = getSubArrayRCP(myview,j);
02289         std::copy(myviewj,myviewj+myLen,Aptr);
02290         Aptr += LDA;
02291       }
02292       myview = Teuchos::null;
02293       mydata = Teuchos::null;
02294     }
02295   }
02296 
02297 
02298   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02299   void
02300   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
02301   get2dCopy (Teuchos::ArrayView<const Teuchos::ArrayView<Scalar> > ArrayOfPtrs) const
02302   {
02303     using Teuchos::as;
02304     using Teuchos::ArrayRCP;
02305     using Teuchos::ArrayView;
02306     using Teuchos::RCP;
02307     using Teuchos::rcp;
02308 
02309     const char tfecfFuncName[] = "get2dCopy(ArrayOfPtrs)";
02310     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(Teuchos::as<size_t>(ArrayOfPtrs.size()) != getNumVectors(), std::runtime_error,
02311         ": Array of pointers must contain as many pointers as the MultiVector has rows.");
02312     RCP<Node> node = MVT::getNode(lclMV_);
02313     const size_t numCols = getNumVectors(),
02314                    myLen = getLocalLength();
02315     if (myLen > 0) {
02316       ArrayRCP<const Scalar> mybuff = MVT::getValues(lclMV_);
02317       KOKKOS_NODE_TRACE("MultiVector::get2dCopy()")
02318       ArrayRCP<const Scalar> myview = node->template viewBuffer<Scalar>(mybuff.size(), mybuff);
02319       for (size_t j=0; j<numCols; ++j) {
02320 #ifdef HAVE_TPETRA_DEBUG
02321         TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(as<size_t>(ArrayOfPtrs[j].size()) != getLocalLength(), std::runtime_error,
02322           ": The ArrayView provided in ArrayOfPtrs[" << j << "] was not large enough to contain the local entries.");
02323 #endif
02324         ArrayRCP<const Scalar> myviewj = getSubArrayRCP(myview,j);
02325         std::copy(myviewj,myviewj+myLen,ArrayOfPtrs[j].begin());
02326       }
02327       myview = Teuchos::null;
02328       mybuff = Teuchos::null;
02329     }
02330   }
02331 
02332 
02333   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02334   Teuchos::ArrayRCP<const Scalar>
02335   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::get1dView () const
02336   {
02337     TEUCHOS_TEST_FOR_EXCEPTION(!isConstantStride(), std::runtime_error,
02338       "Tpetra::MultiVector::get1dView() requires that this MultiVector have constant stride.");
02339     Teuchos::RCP<Node> node = MVT::getNode(lclMV_);
02340     KOKKOS_NODE_TRACE("MultiVector::get1dView()")
02341     return node->template viewBuffer<Scalar>( getStride()*(getNumVectors()-1)+getLocalLength(), MVT::getValues(lclMV_) );
02342   }
02343 
02344 
02345   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02346   Teuchos::ArrayRCP<Scalar>
02347   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::get1dViewNonConst ()
02348   {
02349     TEUCHOS_TEST_FOR_EXCEPTION(!isConstantStride(), std::runtime_error,
02350       "Tpetra::MultiVector::get1dViewNonConst(): requires that this MultiVector have constant stride.");
02351     Teuchos::RCP<Node> node = MVT::getNode(lclMV_);
02352     KOKKOS_NODE_TRACE("MultiVector::get1dViewNonConst()")
02353     return node->template viewBufferNonConst<Scalar>( KokkosClassic::ReadWrite, getStride()*(getNumVectors()-1)+getLocalLength(), MVT::getValuesNonConst(lclMV_) );
02354   }
02355 
02356 
02357   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02358   Teuchos::ArrayRCP<Teuchos::ArrayRCP<Scalar> >
02359   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::get2dViewNonConst()
02360   {
02361     using Teuchos::arcp;
02362     using Teuchos::ArrayRCP;
02363     using Teuchos::RCP;
02364 
02365     RCP<Node> node = MVT::getNode(lclMV_);
02366     ArrayRCP<ArrayRCP<Scalar> > views = arcp<ArrayRCP<Scalar> > (getNumVectors ());
02367     if (isConstantStride ()) {
02368       const size_t myStride = MVT::getStride(lclMV_),
02369                     numCols = getNumVectors(),
02370                     myLen   = getLocalLength();
02371       if (myLen > 0) {
02372         KOKKOS_NODE_TRACE("MultiVector::get2dViewNonConst()")
02373         ArrayRCP<Scalar> myview = node->template viewBufferNonConst<Scalar>(KokkosClassic::ReadWrite,myStride*(numCols-1)+myLen,MVT::getValuesNonConst(lclMV_));
02374         for (size_t j=0; j<numCols; ++j) {
02375           views[j] = myview.persistingView(0,myLen);
02376           myview += myStride;
02377         }
02378       }
02379     }
02380     else {
02381       const size_t myStride = MVT::getStride(lclMV_),
02382                     numCols = MVT::getNumCols(lclMV_),
02383                      myCols = getNumVectors(),
02384                      myLen  = MVT::getNumRows(lclMV_);
02385       if (myLen > 0) {
02386         KOKKOS_NODE_TRACE("MultiVector::get2dViewNonConst()")
02387         ArrayRCP<Scalar> myview = node->template viewBufferNonConst<Scalar>(KokkosClassic::ReadWrite,myStride*(numCols-1)+myLen,MVT::getValuesNonConst(lclMV_));
02388         for (size_t j=0; j<myCols; ++j) {
02389           views[j] = myview.persistingView(whichVectors_[j]*myStride,myLen);
02390         }
02391       }
02392     }
02393     return views;
02394   }
02395 
02396 
02397   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02398   Teuchos::ArrayRCP<Teuchos::ArrayRCP<const Scalar> >
02399   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::get2dView() const
02400   {
02401     using Teuchos::arcp;
02402     using Teuchos::ArrayRCP;
02403     using Teuchos::RCP;
02404 
02405     RCP<Node> node = MVT::getNode(lclMV_);
02406     ArrayRCP< ArrayRCP<const Scalar> > views = arcp<ArrayRCP<const Scalar> >(getNumVectors());
02407     if (isConstantStride()) {
02408       const size_t myStride = MVT::getStride(lclMV_),
02409                     numCols = getNumVectors(),
02410                     myLen   = getLocalLength();
02411       if (myLen > 0) {
02412         KOKKOS_NODE_TRACE("MultiVector::get2dView()")
02413         ArrayRCP<const Scalar> myview = node->template viewBuffer<Scalar>(myStride*(numCols-1)+myLen,MVT::getValues(lclMV_));
02414         for (size_t j=0; j<numCols; ++j) {
02415           views[j] = myview.persistingView(0,myLen);
02416           myview += myStride;
02417         }
02418       }
02419     }
02420     else {
02421       const size_t myStride = MVT::getStride(lclMV_),
02422                     numCols = MVT::getNumCols(lclMV_),
02423                      myCols = getNumVectors(),
02424                      myLen  = MVT::getNumRows(lclMV_);
02425       if (myLen > 0) {
02426         KOKKOS_NODE_TRACE("MultiVector::get2dView()")
02427         ArrayRCP<const Scalar> myview = node->template viewBuffer<Scalar>(myStride*(numCols-1)+myLen,MVT::getValues(lclMV_));
02428         for (size_t j=0; j<myCols; ++j) {
02429           views[j] = myview.persistingView(whichVectors_[j]*myStride,myLen);
02430         }
02431       }
02432     }
02433     return views;
02434   }
02435 
02436 
02437   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02438   void
02439   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
02440   multiply (Teuchos::ETransp transA,
02441             Teuchos::ETransp transB,
02442             const Scalar &alpha,
02443             const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& A,
02444             const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& B,
02445             const Scalar &beta)
02446   {
02447     using Teuchos::NO_TRANS;      // enums
02448     using Teuchos::TRANS;
02449     using Teuchos::CONJ_TRANS;
02450     using Teuchos::null;
02451     using Teuchos::ScalarTraits;  // traits
02452     using Teuchos::as;
02453     using Teuchos::RCP;
02454     using Teuchos::rcp;
02455     typedef MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> MV;
02456 
02457     // This routine performs a variety of matrix-matrix multiply operations, interpreting
02458     // the MultiVector (this-aka C , A and B) as 2D matrices.  Variations are due to
02459     // the fact that A, B and C can be local replicated or global distributed
02460     // MultiVectors and that we may or may not operate with the transpose of
02461     // A and B.  Possible cases are:
02462     //                                       Num
02463     //      OPERATIONS                        cases  Notes
02464     //  1) C(local) = A^X(local) * B^X(local)  4    (X=Trans or Not, No comm needed)
02465     //  2) C(local) = A^T(distr) * B  (distr)  1    (2D dot product, replicate C)
02466     //  3) C(distr) = A  (distr) * B^X(local)  2    (2D vector update, no comm needed)
02467     //
02468     // The following operations are not meaningful for 1D distributions:
02469     //
02470     // u1) C(local) = A^T(distr) * B^T(distr)  1
02471     // u2) C(local) = A  (distr) * B^X(distr)  2
02472     // u3) C(distr) = A^X(local) * B^X(local)  4
02473     // u4) C(distr) = A^X(local) * B^X(distr)  4
02474     // u5) C(distr) = A^T(distr) * B^X(local)  2
02475     // u6) C(local) = A^X(distr) * B^X(local)  4
02476     // u7) C(distr) = A^X(distr) * B^X(local)  4
02477     // u8) C(local) = A^X(local) * B^X(distr)  4
02478     //
02479     // Total of 32 case (2^5).
02480 
02481     const char errPrefix[] = "Tpetra::MultiVector::multiply(transOpA,transOpB,alpha,A,B,beta): ";
02482 
02483     TEUCHOS_TEST_FOR_EXCEPTION( ScalarTraits<Scalar>::isComplex && (transA == TRANS || transB == TRANS), std::invalid_argument,
02484         errPrefix << "non-conjugate transpose not supported for complex types.");
02485     transA = (transA == NO_TRANS ? NO_TRANS : CONJ_TRANS);
02486     transB = (transB == NO_TRANS ? NO_TRANS : CONJ_TRANS);
02487 
02488     // Compute effective dimensions, w.r.t. transpose operations on
02489     size_t A_nrows = (transA==CONJ_TRANS) ? A.getNumVectors() : A.getLocalLength();
02490     size_t A_ncols = (transA==CONJ_TRANS) ? A.getLocalLength() : A.getNumVectors();
02491     size_t B_nrows = (transB==CONJ_TRANS) ? B.getNumVectors() : B.getLocalLength();
02492     size_t B_ncols = (transB==CONJ_TRANS) ? B.getLocalLength() : B.getNumVectors();
02493 
02494     Scalar beta_local = beta; // local copy of beta; might be reassigned below
02495 
02496     TEUCHOS_TEST_FOR_EXCEPTION(
02497       getLocalLength() != A_nrows || getNumVectors() != B_ncols || A_ncols != B_nrows,
02498       std::runtime_error,
02499       errPrefix << "dimension of *this, op(A) and op(B) must be consistent.  "
02500       << std::endl << "The local part of *this is "
02501       << getLocalLength() << " x " << getNumVectors()
02502       << ", A is " << A_nrows << " x " << A_ncols
02503       << ", and B is " << B_nrows << " x " << B_ncols << ".");
02504 
02505     bool A_is_local = !A.isDistributed();
02506     bool B_is_local = !B.isDistributed();
02507     bool C_is_local = !this->isDistributed();
02508     bool Case1 = ( C_is_local &&  A_is_local &&  B_is_local);                                           // Case 1: C(local) = A^X(local) * B^X(local)
02509     bool Case2 = ( C_is_local && !A_is_local && !B_is_local && transA==CONJ_TRANS && transB==NO_TRANS); // Case 2: C(local) = A^T(distr) * B  (distr)
02510     bool Case3 = (!C_is_local && !A_is_local &&  B_is_local && transA==NO_TRANS  );                     // Case 3: C(distr) = A  (distr) * B^X(local)
02511 
02512     // Test that we are considering a meaningful cases
02513     TEUCHOS_TEST_FOR_EXCEPTION( !Case1 && !Case2 && !Case3, std::runtime_error,
02514         errPrefix << "multiplication of op(A) and op(B) into *this is not a supported use case.");
02515 
02516     if (beta != ScalarTraits<Scalar>::zero() && Case2)
02517     {
02518       // if Case2, then C is local and contributions must be summed across all nodes
02519       // however, if beta != 0, then accumulate beta*C into the sum
02520       // when summing across all nodes, we only want to accumulate this once, so
02521       // set beta == 0 on all nodes except node 0
02522       int MyPID = this->getMap()->getComm()->getRank();
02523       if (MyPID!=0) beta_local = ScalarTraits<Scalar>::zero();
02524     }
02525 
02526     // Check if A, B, C have constant stride, if not then make temp copy (strided)
02527     RCP<const MV> Atmp, Btmp;
02528     RCP<MV>       Ctmp;
02529     if (isConstantStride() == false) Ctmp = rcp (new MV (*this));
02530     else Ctmp = rcp(this,false);
02531 
02532     if (A.isConstantStride() == false) Atmp = rcp (new MV (A));
02533     else Atmp = rcp(&A,false);
02534 
02535     if (B.isConstantStride() == false) Btmp = rcp (new MV (B));
02536     else Btmp = rcp(&B,false);
02537 
02538 #ifdef HAVE_TEUCHOS_DEBUG
02539     TEUCHOS_TEST_FOR_EXCEPTION(!Ctmp->isConstantStride() || !Btmp->isConstantStride() || !Atmp->isConstantStride(), std::logic_error,
02540         errPrefix << "failed making temporary strided copies of input multivectors.");
02541 #endif
02542 
02543     KMV &C_mv = Ctmp->lclMV_;
02544     {
02545       // get local multivectors
02546       const KMV &A_mv = Atmp->lclMV_;
02547       const KMV &B_mv = Btmp->lclMV_;
02548       // do the multiply (GEMM)
02549       MVT::GEMM(C_mv,transA,transB,alpha,A_mv,B_mv,beta_local);
02550     }
02551 
02552     // Dispose of (possibly) extra copies of A, B
02553     Atmp = null;
02554     Btmp = null;
02555 
02556     RCP<Node> node = MVT::getNode(lclMV_);
02557     // If *this was not strided, copy the data from the strided version and then delete it
02558     if (! isConstantStride ()) {
02559       // *this is not strided, we must put data from Ctmp into *this
02560       TEUCHOS_TEST_FOR_EXCEPT(&C_mv != &lclMV_);
02561       const size_t numVecs = MVT::getNumCols(lclMV_);
02562       for (size_t j=0; j < numVecs; ++j) {
02563         KOKKOS_NODE_TRACE("MultiVector::multiply()")
02564         node->template copyBuffers<Scalar>(getLocalLength(),MVT::getValues(C_mv,j),MVT::getValuesNonConst(lclMV_,whichVectors_[j]));
02565       }
02566     }
02567 
02568     // If Case 2 then sum up *this and distribute it to all processors.
02569     if (Case2) {
02570       this->reduce();
02571     }
02572   }
02573 
02574   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02575   void
02576   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
02577   elementWiseMultiply (Scalar scalarAB,
02578                        const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& A,
02579                        const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& B,
02580                        Scalar scalarThis)
02581   {
02582     using Teuchos::arcp_const_cast;
02583     const char tfecfFuncName[] = "elementWiseMultiply()";
02584 
02585 #ifdef HAVE_TPETRA_DEBUG
02586     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
02587       getLocalLength() != A.getLocalLength() ||
02588       getLocalLength() != B.getLocalLength(), std::runtime_error,
02589       ": MultiVectors do not have the same local length.");
02590 #endif // HAVE_TPETRA_DEBUG
02591     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
02592       B.getNumVectors() != this->getNumVectors(), std::runtime_error,
02593       ": MultiVectors 'this' and B must have the same number of vectors.");
02594     try {
02595       MVT::ElemMult (lclMV_, scalarThis, scalarAB, (const KMV&) A.lclMV_,
02596                      (const KMV&) B.lclMV_);
02597     }
02598     catch (std::runtime_error &e) {
02599       TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true,std::runtime_error,
02600           ": caught exception from Kokkos:" << std::endl
02601           << e.what() << std::endl);
02602     }
02603   }
02604 
02605   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02606   void
02607   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::reduce()
02608   {
02609     using Teuchos::Array;
02610     using Teuchos::ArrayView;
02611     using Teuchos::ArrayRCP;
02612     using Teuchos::as;
02613     using Teuchos::RCP;
02614     using Teuchos::reduceAll;
02615     using Teuchos::REDUCE_SUM;
02616 
02617     // This method should be called only for locally replicated
02618     // MultiVectors (! isDistributed ()).
02619     TEUCHOS_TEST_FOR_EXCEPTION(this->isDistributed (), std::runtime_error,
02620       "Tpetra::MultiVector::reduce() should only be called with locally "
02621       "replicated or otherwise not distributed MultiVector objects.");
02622     RCP<const Teuchos::Comm<int> > comm = this->getMap()->getComm();
02623     if (comm->getSize() == 1) return;
02624     RCP<Node> node = MVT::getNode(lclMV_);
02625     // sum the data across all multivectors
02626     // need to have separate packed buffers for send and receive
02627     // if we're packed, we'll just set the receive buffer as our data, the send as a copy
02628     // if we're not packed, we'll use allocated buffers for both.
02629     ArrayView<Scalar> target;
02630     const size_t myStride = MVT::getStride(lclMV_),
02631                  numCols  = MVT::getNumCols(lclMV_),
02632                  myLen    = MVT::getNumRows(lclMV_);
02633     Array<Scalar> sourceBuffer(numCols*myLen), tmparr(0);
02634     bool packed = isConstantStride() && (myStride == myLen);
02635     KOKKOS_NODE_TRACE("MultiVector::reduce()")
02636     ArrayRCP<Scalar> bufView = node->template viewBufferNonConst<Scalar>(
02637                                           KokkosClassic::ReadWrite,myStride*(numCols-1)+myLen,
02638                                           MVT::getValuesNonConst(lclMV_) );
02639     if (packed) {
02640       // copy data from view to sourceBuffer, reduce into view below
02641       target = bufView(0,myLen*numCols);
02642       std::copy(target.begin(),target.end(),sourceBuffer.begin());
02643     }
02644     else {
02645       // copy data into sourceBuffer, reduce from sourceBuffer into tmparr below, copy back to view after that
02646       tmparr.resize(myLen*numCols);
02647       Scalar *sptr = sourceBuffer.getRawPtr();
02648       ArrayRCP<const Scalar> vptr = bufView;
02649       for (size_t j=0; j<numCols; ++j) {
02650         std::copy(vptr,vptr+myLen,sptr);
02651         sptr += myLen;
02652         vptr += myStride;
02653       }
02654       target = tmparr();
02655     }
02656     // reduce
02657     reduceAll<int,Scalar> (*comm, REDUCE_SUM, as<int> (numCols*myLen),
02658                            sourceBuffer.getRawPtr (), target.getRawPtr ());
02659     if (! packed) {
02660       // copy tmparr back into view
02661       const Scalar *sptr = tmparr.getRawPtr();
02662       ArrayRCP<Scalar> vptr = bufView;
02663       for (size_t j=0; j<numCols; ++j) {
02664         std::copy (sptr, sptr+myLen, vptr);
02665         sptr += myLen;
02666         vptr += myStride;
02667       }
02668     }
02669     bufView = Teuchos::null;
02670   }
02671 
02672 
02673   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02674   void
02675   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
02676   replaceLocalValue (LocalOrdinal MyRow,
02677                      size_t VectorIndex,
02678                      const Scalar &ScalarValue)
02679   {
02680 #ifdef HAVE_TPETRA_DEBUG
02681     const LocalOrdinal minLocalIndex = this->getMap()->getMinLocalIndex();
02682     const LocalOrdinal maxLocalIndex = this->getMap()->getMaxLocalIndex();
02683     TEUCHOS_TEST_FOR_EXCEPTION(
02684       MyRow < minLocalIndex || MyRow > maxLocalIndex,
02685       std::runtime_error,
02686       "Tpetra::MultiVector::replaceLocalValue: row index " << MyRow
02687       << " is invalid.  The range of valid row indices on this process "
02688       << this->getMap()->getComm()->getRank() << " is [" << minLocalIndex
02689       << ", " << maxLocalIndex << "].");
02690     TEUCHOS_TEST_FOR_EXCEPTION(
02691       vectorIndexOutOfRange(VectorIndex),
02692       std::runtime_error,
02693       "Tpetra::MultiVector::replaceLocalValue: vector index " << VectorIndex
02694       << " of the multivector is invalid.");
02695 #endif
02696     const size_t colInd = isConstantStride () ?
02697       VectorIndex : whichVectors_[VectorIndex];
02698     lclMV_.replaceLocalValue (Teuchos::as<size_t> (MyRow), colInd, ScalarValue);
02699   }
02700 
02701 
02702   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02703   void
02704   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
02705   sumIntoLocalValue (LocalOrdinal MyRow,
02706                      size_t VectorIndex,
02707                      const Scalar &ScalarValue)
02708   {
02709 #ifdef HAVE_TPETRA_DEBUG
02710     const LocalOrdinal minLocalIndex = this->getMap()->getMinLocalIndex();
02711     const LocalOrdinal maxLocalIndex = this->getMap()->getMaxLocalIndex();
02712     TEUCHOS_TEST_FOR_EXCEPTION(
02713       MyRow < minLocalIndex || MyRow > maxLocalIndex,
02714       std::runtime_error,
02715       "Tpetra::MultiVector::sumIntoLocalValue: row index " << MyRow
02716       << " is invalid.  The range of valid row indices on this process "
02717       << this->getMap()->getComm()->getRank() << " is [" << minLocalIndex
02718       << ", " << maxLocalIndex << "].");
02719     TEUCHOS_TEST_FOR_EXCEPTION(
02720       vectorIndexOutOfRange(VectorIndex),
02721       std::runtime_error,
02722       "Tpetra::MultiVector::sumIntoLocalValue: vector index " << VectorIndex
02723       << " of the multivector is invalid.");
02724 #endif
02725     const size_t colInd = isConstantStride () ?
02726       VectorIndex : whichVectors_[VectorIndex];
02727     lclMV_.sumIntoLocalValue (Teuchos::as<size_t> (MyRow), colInd, ScalarValue);
02728   }
02729 
02730 
02731   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02732   void
02733   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
02734   replaceGlobalValue (GlobalOrdinal GlobalRow,
02735                       size_t VectorIndex,
02736                       const Scalar &ScalarValue)
02737   {
02738     LocalOrdinal MyRow = this->getMap()->getLocalElement(GlobalRow);
02739 #ifdef HAVE_TPETRA_DEBUG
02740     TEUCHOS_TEST_FOR_EXCEPTION(
02741       MyRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid(),
02742       std::runtime_error,
02743       "Tpetra::MultiVector::replaceGlobalValue: global row index " << GlobalRow
02744       << "is not present on this process " << this->getMap()->getComm()->getRank()
02745       << ".");
02746     TEUCHOS_TEST_FOR_EXCEPTION(
02747       vectorIndexOutOfRange(VectorIndex),
02748       std::runtime_error,
02749       "Tpetra::MultiVector::replaceGlobalValue: vector index " << VectorIndex
02750       << " of the multivector is invalid.");
02751 #endif
02752     replaceLocalValue (MyRow, VectorIndex, ScalarValue);
02753   }
02754 
02755   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02756   void
02757   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
02758   sumIntoGlobalValue (GlobalOrdinal GlobalRow,
02759                       size_t VectorIndex,
02760                       const Scalar &ScalarValue)
02761   {
02762     LocalOrdinal MyRow = this->getMap()->getLocalElement(GlobalRow);
02763 #ifdef HAVE_TEUCHOS_DEBUG
02764     TEUCHOS_TEST_FOR_EXCEPTION(
02765       MyRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid(),
02766       std::runtime_error,
02767       "Tpetra::MultiVector::sumIntoGlobalValue: global row index " << GlobalRow
02768       << "is not present on this process " << this->getMap()->getComm()->getRank()
02769       << ".");
02770     TEUCHOS_TEST_FOR_EXCEPTION(
02771       vectorIndexOutOfRange(VectorIndex),
02772       std::runtime_error,
02773       "Tpetra::MultiVector::sumIntoGlobalValue: vector index " << VectorIndex
02774       << " of the multivector is invalid.");
02775 #endif
02776     sumIntoLocalValue (MyRow, VectorIndex, ScalarValue);
02777   }
02778 
02779   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02780   template <class T>
02781   Teuchos::ArrayRCP<T>
02782   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
02783   getSubArrayRCP (Teuchos::ArrayRCP<T> arr,
02784                   size_t j) const
02785   {
02786     const size_t stride = MVT::getStride (lclMV_);
02787     const size_t myLen = getLocalLength ();
02788     Teuchos::ArrayRCP<T> ret;
02789     if (isConstantStride()) {
02790       ret = arr.persistingView (j*stride, myLen);
02791     }
02792     else {
02793       ret = arr.persistingView (whichVectors_[j]*stride, myLen);
02794     }
02795     return ret;
02796   }
02797 
02798   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02799   const KokkosClassic::MultiVector<Scalar,Node>&
02800   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getLocalMV() const {
02801     return lclMV_;
02802   }
02803 
02804   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02805   KokkosClassic::MultiVector<Scalar,Node>&
02806   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getLocalMVNonConst() {
02807     return lclMV_;
02808   }
02809 
02810   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02811   std::string
02812   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::description() const
02813   {
02814     using std::endl;
02815     std::ostringstream oss;
02816     oss << Teuchos::typeName (*this) << " {" << endl
02817         << "  label: \"" << this->getObjectLabel () << "\"" << endl
02818         << "  numRows: " << getGlobalLength () << endl
02819         << "  numCols: " << getNumVectors () << endl
02820         << "  isConstantStride: " << isConstantStride () << endl;
02821     if (isConstantStride ()) {
02822       oss << "  columnStride: " << getStride () << endl;
02823     }
02824     oss << "}" << endl;
02825     return oss.str();
02826   }
02827 
02828   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02829   void
02830   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
02831   describe (Teuchos::FancyOStream &out,
02832             const Teuchos::EVerbosityLevel verbLevel) const
02833   {
02834     using Teuchos::ArrayRCP;
02835     using Teuchos::RCP;
02836     using Teuchos::VERB_DEFAULT;
02837     using Teuchos::VERB_NONE;
02838     using Teuchos::VERB_LOW;
02839     using Teuchos::VERB_MEDIUM;
02840     using Teuchos::VERB_HIGH;
02841     using Teuchos::VERB_EXTREME;
02842     using std::endl;
02843     using std::setw;
02844 
02845     // Set default verbosity if applicable.
02846     const Teuchos::EVerbosityLevel vl =
02847       (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
02848 
02849     RCP<const Teuchos::Comm<int> > comm = this->getMap()->getComm();
02850     const int myImageID = comm->getRank();
02851     const int numImages = comm->getSize();
02852 
02853     if (vl != VERB_NONE) {
02854       // Don't set the tab level unless we're printing something.
02855       Teuchos::OSTab tab (out);
02856 
02857       if (myImageID == 0) { // >= VERB_LOW prints description()
02858         out << this->description() << endl;
02859       }
02860       for (int imageCtr = 0; imageCtr < numImages; ++imageCtr) {
02861         if (myImageID == imageCtr) {
02862           if (vl != VERB_LOW) {
02863             // At verbosity > VERB_LOW, each process prints something.
02864             out << "Process " << myImageID << ":" << endl;
02865 
02866             Teuchos::OSTab procTab (out);
02867             // >= VERB_MEDIUM: print the local vector length.
02868             out << "local length=" << getLocalLength();
02869             if (vl != VERB_MEDIUM) {
02870               // >= VERB_HIGH: print isConstantStride() and getStride()
02871               if (isConstantStride()) {
02872                 out << ", constant stride=" << getStride() << endl;
02873               }
02874               else {
02875                 out << ", not constant stride" << endl;
02876               }
02877               if (vl == VERB_EXTREME) {
02878                 // VERB_EXTREME: print all the values in the multivector.
02879                 out << "Values:" << endl;
02880                 ArrayRCP<ArrayRCP<const Scalar> > X = this->get2dView();
02881                 for (size_t i = 0; i < getLocalLength(); ++i) {
02882                   for (size_t j = 0; j < getNumVectors(); ++j) {
02883                     out << X[j][i];
02884                     if (j < getNumVectors() - 1) {
02885                       out << " ";
02886                     }
02887                   } // for each column
02888                   out << endl;
02889                 } // for each row
02890               } // if vl == VERB_EXTREME
02891             } // if (vl != VERB_MEDIUM)
02892             else { // vl == VERB_LOW
02893               out << endl;
02894             }
02895           } // if vl != VERB_LOW
02896         } // if it is my process' turn to print
02897         comm->barrier();
02898       } // for each process in the communicator
02899     } // if vl != VERB_NONE
02900   }
02901 
02902 #if TPETRA_USE_KOKKOS_DISTOBJECT
02903 
02904   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02905   void
02906   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
02907   createViews() const
02908   {
02909     // Do nothing in Kokkos::View implementation
02910   }
02911 
02912   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02913   void
02914   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
02915   createViewsNonConst (KokkosClassic::ReadWriteOption rwo)
02916   {
02917     // Do nothing in Kokkos::View implementation
02918   }
02919 
02920   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02921   void
02922   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::releaseViews () const
02923   {
02924     // Do nothing in Kokkos::View implementation
02925   }
02926 
02927 #else
02928 
02929   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02930   void
02931   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
02932   createViews() const
02933   {
02934     Teuchos::RCP<Node> node = this->getMap ()->getNode ();
02935     if (cview_.is_null () && getLocalLength () > 0) {
02936       Teuchos::ArrayRCP<const Scalar> buff = MVT::getValues (lclMV_);
02937       KOKKOS_NODE_TRACE("MultiVector::createViews()")
02938       cview_ = node->template viewBuffer<Scalar> (buff.size (), buff);
02939     }
02940   }
02941 
02942   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02943   void
02944   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
02945   createViewsNonConst (KokkosClassic::ReadWriteOption rwo)
02946   {
02947     Teuchos::RCP<Node> node = this->getMap ()->getNode ();
02948     if (ncview_.is_null () && getLocalLength () > 0) {
02949       Teuchos::ArrayRCP<Scalar> buff = MVT::getValuesNonConst (lclMV_);
02950       KOKKOS_NODE_TRACE("MultiVector::createViewsNonConst()")
02951       ncview_ = node->template viewBufferNonConst<Scalar> (rwo, buff.size (), buff);
02952     }
02953   }
02954 
02955   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02956   void
02957   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::releaseViews () const
02958   {
02959     const int constViewCount = cview_.total_count ();
02960     const int nonconstViewCount = ncview_.total_count ();
02961 
02962     // This prevents unused variable compiler warnings, in case Tpetra
02963     // efficiency warnings aren't enabled.
02964     (void) constViewCount;
02965     (void) nonconstViewCount;
02966 
02967     // Release the views.
02968     cview_ = Teuchos::null;
02969     ncview_ = Teuchos::null;
02970   }
02971 
02972 #endif
02973 
02974   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02975   void
02976   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
02977   removeEmptyProcessesInPlace (const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >& newMap)
02978   {
02979     replaceMap (newMap);
02980   }
02981 
02982   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02983   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>
02984   createCopy (const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node >& src) {
02985     return src;
02986   }
02987 
02988   template <class DS, class DL, class DG, class DN, class SS, class SL, class SG, class SN>
02989   void
02990   deep_copy (MultiVector<DS,DL,DG,DN>& dst, const MultiVector<SS,SL,SG,SN>& src)
02991   {
02992     dst = src;
02993   }
02994 } // namespace Tpetra
02995 
02996 // Include KokkosRefactor partial specialisation if enabled
02997 #if defined(TPETRA_HAVE_KOKKOS_REFACTOR)
02998 #include "Tpetra_KokkosRefactor_MultiVector_def.hpp"
02999 #endif
03000 
03001 //
03002 // Explicit instantiation macro
03003 //
03004 // Must be expanded from within the Tpetra namespace!
03005 //
03006 
03007 #define TPETRA_MULTIVECTOR_INSTANT(SCALAR,LO,GO,NODE) \
03008   \
03009   template class MultiVector< SCALAR , LO , GO , NODE >; \
03010   template MultiVector< SCALAR , LO , GO , NODE > createCopy( const MultiVector< SCALAR , LO , GO , NODE >& src); \
03011   template void deep_copy (MultiVector< SCALAR , LO , GO , NODE >& dst, const MultiVector< SCALAR , LO , GO , NODE >& src); \
03012 
03013 
03014 #endif // TPETRA_MULTIVECTOR_DEF_HPP
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines