Tpetra Matrix/Vector Services Version of the Day
Tpetra_MultiVector_def.hpp
00001 // @HEADER
00002 // ***********************************************************************
00003 //
00004 //          Tpetra: Templated Linear Algebra Services Package
00005 //                 Copyright (2008) Sandia Corporation
00006 //
00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00008 // the U.S. Government retains certain rights in this software.
00009 //
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00038 //
00039 // ************************************************************************
00040 // @HEADER
00041 
00042 #ifndef TPETRA_MULTIVECTOR_DEF_HPP
00043 #define TPETRA_MULTIVECTOR_DEF_HPP
00044 
00045 #include <Kokkos_DefaultArithmetic.hpp>
00046 #include <Tpetra_Util.hpp>
00047 #include <Tpetra_Vector.hpp>
00048 
00049 #if TPETRA_USE_KOKKOS_DISTOBJECT
00050 #  include <Tpetra_Details_MultiVectorDistObjectKernels.hpp>
00051 #endif
00052 
00053 #ifdef DOXYGEN_USE_ONLY
00054 #  include "Tpetra_MultiVector_decl.hpp"
00055 #endif
00056 
00057 namespace Tpetra {
00058 
00059   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00060   bool
00061   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00062   vectorIndexOutOfRange(size_t VectorIndex) const {
00063     return (VectorIndex < 1 && VectorIndex != 0) || VectorIndex >= getNumVectors();
00064   }
00065 
00066   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00067   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00068   MultiVector () :
00069     DO (Teuchos::rcp (new Map<LocalOrdinal, GlobalOrdinal, Node> ())),
00070     lclMV_ (this->getMap ()->getNode ()),
00071     hasViewSemantics_ (false) // for now, not for long though
00072   {}
00073 
00074   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00075   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00076   MultiVector (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> >& map,
00077                size_t NumVectors,
00078                bool zeroOut) : /* default is true */
00079     DO (map),
00080     lclMV_ (map->getNode ()),
00081     hasViewSemantics_ (false) // for now, not for long though
00082   {
00083     using Teuchos::ArrayRCP;
00084     using Teuchos::RCP;
00085 
00086     TEUCHOS_TEST_FOR_EXCEPTION(NumVectors < 1, std::invalid_argument,
00087       "Tpetra::MultiVector::MultiVector(): NumVectors must be strictly positive.");
00088     const size_t myLen = getLocalLength();
00089     if (myLen > 0) {
00090       RCP<Node> node = map->getNode();
00091       // On host-type Kokkos Nodes, allocBuffer() just calls the
00092       // one-argument version of arcp to allocate memory.  This should
00093       // not fill the memory by default, otherwise we would lose the
00094       // first-touch allocation optimization.
00095       ArrayRCP<Scalar> data = node->template allocBuffer<Scalar>(myLen*NumVectors);
00096       MVT::initializeValues(lclMV_,myLen,NumVectors,data,myLen);
00097       if (zeroOut) {
00098         // MVT uses the Kokkos Node's parallel_for in this case, for
00099         // first-touch allocation (across rows).
00100         MVT::Init(lclMV_, Teuchos::ScalarTraits<Scalar>::zero());
00101       }
00102     }
00103     else {
00104       MVT::initializeValues(lclMV_,0,NumVectors,Teuchos::null,0);
00105     }
00106   }
00107 
00108   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00109   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00110   MultiVector (const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& source) :
00111     DO (source),
00112     lclMV_ (MVT::getNode (source.lclMV_)),
00113     hasViewSemantics_ (source.hasViewSemantics_)
00114   {
00115     if (source.hasViewSemantics_) {
00116       // Shallow copy.  The const_cast is ugly but does no harm:
00117       // ArrayRCP is a pointer, so there's no reason why a method that
00118       // returns it can't be marked const.  (If you're not changing
00119       // the pointer itself, e.g., by reallocating, the pointer is
00120       // const.  It doesn't matter whether or not you can change the
00121       // values to which the pointer points.)
00122       MVT::initializeValues (lclMV_, source.lclMV_.getNumRows (),
00123                              source.lclMV_.getNumCols (),
00124                              const_cast<KMV&> (source.lclMV_).getValuesNonConst (),
00125                              source.lclMV_.getStride (),
00126                              source.lclMV_.getOrigNumRows (),
00127                              source.lclMV_.getOrigNumCols ());
00128       // Don't forget to copy whichVectors_ (for non-constant-stride view).
00129       if (source.whichVectors_.size () > 0) {
00130         whichVectors_ = source.whichVectors_; // Array::op= does a deep copy
00131       }
00132     }
00133     else {
00134       using Teuchos::ArrayRCP;
00135       using Teuchos::RCP;
00136 
00137       // copy data from the source MultiVector into this multivector
00138       RCP<Node> node = MVT::getNode(source.lclMV_);
00139       const LocalOrdinal myLen = source.getLocalLength();
00140       const size_t numVecs = source.getNumVectors();
00141 
00142       // On host-type Kokkos Nodes, allocBuffer() just calls the
00143       // one-argument version of arcp to allocate memory.  This should
00144       // not fill the memory by default, otherwise we would lose the
00145       // first-touch allocation optimization.
00146       ArrayRCP<Scalar> data = (myLen > 0) ?
00147         node->template allocBuffer<Scalar> (myLen * numVecs) :
00148         Teuchos::null;
00149       const size_t stride = (myLen > 0) ? myLen : size_t (0);
00150 
00151       // This just sets the dimensions, pointer, and stride of lclMV_.
00152       MVT::initializeValues (lclMV_, myLen, numVecs, data, stride);
00153       // This actually copies the data.  It uses the Node's
00154       // parallel_for to copy, which should ensure first-touch
00155       // allocation on systems that support it.
00156       if (source.isConstantStride ()) {
00157         MVT::Assign (lclMV_, source.lclMV_);
00158       }
00159       else {
00160         MVT::Assign (lclMV_, source.lclMV_, source.whichVectors_);
00161       }
00162     }
00163   }
00164 
00165 
00166   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00167   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00168   MultiVector (const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& source,
00169                const Teuchos::DataAccess copyOrView) :
00170     DO (source),
00171     lclMV_ (MVT::getNode (source.lclMV_)),
00172     hasViewSemantics_ (copyOrView == Teuchos::View)
00173   {
00174     if (copyOrView == Teuchos::View) {
00175       // Shallow copy.  The const_cast is ugly but does no harm:
00176       // ArrayRCP is a pointer, so there's no reason why a method that
00177       // returns it can't be marked const.  (If you're not changing
00178       // the pointer itself, e.g., by reallocating, the pointer is
00179       // const.  It doesn't matter whether or not you can change the
00180       // values to which the pointer points.)
00181       MVT::initializeValues (lclMV_, source.lclMV_.getNumRows (),
00182                              source.lclMV_.getNumCols (),
00183                              const_cast<KMV&> (source.lclMV_).getValuesNonConst (),
00184                              source.lclMV_.getStride (),
00185                              source.lclMV_.getOrigNumRows (),
00186                              source.lclMV_.getOrigNumCols ());
00187       // Don't forget to copy whichVectors_ (for non-constant-stride view).
00188       if (source.whichVectors_.size () > 0) {
00189         whichVectors_ = source.whichVectors_; // Array::op= does a deep copy
00190       }
00191     }
00192     else if (copyOrView == Teuchos::Copy) {
00193       using Teuchos::ArrayRCP;
00194       using Teuchos::RCP;
00195 
00196       // copy data from the source MultiVector into this multivector
00197       RCP<Node> node = MVT::getNode(source.lclMV_);
00198       const LocalOrdinal myLen = source.getLocalLength();
00199       const size_t numVecs = source.getNumVectors();
00200 
00201       ArrayRCP<Scalar> data = (myLen > 0) ?
00202         node->template allocBuffer<Scalar> (myLen * numVecs) :
00203         Teuchos::null;
00204       const size_t stride = (myLen > 0) ? myLen : size_t (0);
00205 
00206       // This just sets the dimensions, pointer, and stride of lclMV_.
00207       MVT::initializeValues (lclMV_, myLen, numVecs, data, stride);
00208       // This actually copies the data.  It uses the Node's
00209       // parallel_for to copy, which should ensure first-touch
00210       // allocation on systems that support it.
00211       if (source.isConstantStride ()) {
00212         MVT::Assign (lclMV_, source.lclMV_);
00213       }
00214       else {
00215         MVT::Assign (lclMV_, source.lclMV_, source.whichVectors_);
00216       }
00217     } else {
00218       TEUCHOS_TEST_FOR_EXCEPTION(
00219         true, std::invalid_argument, "Tpetra::MultiVector copy constructor: "
00220         "The second argument 'copyOrView' has an invalid value " << copyOrView
00221         << ".  Valid values include Teuchos::Copy = " << Teuchos::Copy <<
00222         " and Teuchos::View = " << Teuchos::View << ".");
00223     }
00224   }
00225 
00226 
00227   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00228   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00229   MultiVector (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> >& map,
00230                const KokkosClassic::MultiVector<Scalar,Node>& localMultiVector,
00231                EPrivateComputeViewConstructor /* dummy */) :
00232     DO (map),
00233     lclMV_ (localMultiVector),
00234     hasViewSemantics_ (false) // for now, not for long though
00235   {
00236     const size_t localNumElts = map->getNodeNumElements ();
00237     TEUCHOS_TEST_FOR_EXCEPTION(
00238       localMultiVector.getNumRows () < localNumElts,
00239       std::invalid_argument,
00240       "Tpetra::MultiVector constructor: The Map contains " << localNumElts
00241       << " on process " << map->getComm ()->getRank () << ", but the given "
00242       "KokkosClassic::MultiVector only has " << localMultiVector.getNumRows ()
00243       << " rows.");
00244   }
00245 
00246   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00247   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00248   MultiVector (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> >& map,
00249                const Teuchos::ArrayRCP<Scalar>& view,
00250                size_t LDA,
00251                size_t NumVectors,
00252                EPrivateHostViewConstructor /* dummy */) :
00253     DO (map),
00254     lclMV_ (map->getNode ()),
00255     hasViewSemantics_ (false) // for now, not for long though
00256   {
00257     using Teuchos::ArrayRCP;
00258     using Teuchos::RCP;
00259 
00260     const char tfecfFuncName[] = "MultiVector(view,LDA,NumVector)";
00261     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(NumVectors < 1, std::invalid_argument,
00262       ": NumVectors must be strictly positive, but you specified NumVectors = "
00263       << NumVectors << ".");
00264     const size_t myLen = getLocalLength();
00265     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(LDA < myLen, std::invalid_argument,
00266       ": LDA must be large enough to accomodate the local entries.");
00267     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00268       static_cast<size_t> (view.size ()) < LDA*(NumVectors - 1) + myLen,
00269       std::invalid_argument,
00270       ": view does not contain enough data to specify the entries in this.");
00271     MVT::initializeValues (lclMV_, myLen, NumVectors, view, myLen);
00272   }
00273 
00274   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00275   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00276   MultiVector (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > &map,
00277                Teuchos::ArrayRCP<Scalar> data,
00278                size_t LDA,
00279                size_t NumVectors,
00280                EPrivateComputeViewConstructor /* dummy */) :
00281     DO (map),
00282     lclMV_ (map->getNode ()),
00283     hasViewSemantics_ (false) // for now, not for long though
00284   {
00285     using Teuchos::ArrayRCP;
00286     using Teuchos::RCP;
00287 
00288     const char tfecfFuncName[] = "MultiVector(data,LDA,NumVector)";
00289     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(NumVectors < 1, std::invalid_argument,
00290       ": NumVectors must be strictly positive.");
00291     const size_t myLen = getLocalLength();
00292 #ifdef HAVE_TPETRA_DEBUG
00293     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00294       static_cast<size_t> (data.size ()) < LDA*(NumVectors - 1) + myLen,
00295       std::runtime_error,
00296       ": data does not contain enough data to specify the entries in this.");
00297 #endif
00298     MVT::initializeValues(lclMV_,myLen,NumVectors,data,LDA);
00299   }
00300 
00301   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00302   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00303   MultiVector (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > &map,
00304                Teuchos::ArrayRCP<Scalar> data,
00305                size_t LDA,
00306                Teuchos::ArrayView<const size_t> WhichVectors,
00307                EPrivateComputeViewConstructor /* dummy */) :
00308     DO (map),
00309     lclMV_ (map->getNode ()),
00310     whichVectors_ (WhichVectors),
00311     hasViewSemantics_ (false) // for now, not for long though
00312   {
00313     using Teuchos::ArrayRCP;
00314     using Teuchos::RCP;
00315 
00316     const char tfecfFuncName[] = "MultiVector(data,LDA,WhichVectors)";
00317     const size_t myLen = getLocalLength();
00318     size_t maxVector = *std::max_element(WhichVectors.begin(), WhichVectors.end());
00319     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(LDA < myLen, std::runtime_error,
00320       ": LDA must be large enough to accomodate the local entries.");
00321 #ifdef HAVE_TPETRA_DEBUG
00322     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00323       static_cast<size_t> (data.size ()) < LDA * maxVector + myLen, std::runtime_error,
00324       ": data does not contain enough data to specify the entries in this.");
00325 #endif
00326     if (WhichVectors.size () == 1) {
00327       // shift data so that desired vector is vector 0
00328       maxVector = 0;
00329       data += LDA * WhichVectors[0];
00330       // kill whichVectors_; we are constant stride
00331       whichVectors_.clear ();
00332     }
00333     if (myLen > 0) {
00334       MVT::initializeValues(lclMV_,myLen,maxVector+1,data,LDA);
00335     }
00336     else {
00337       MVT::initializeValues(lclMV_,0,WhichVectors.size(),Teuchos::null,0);
00338     }
00339   }
00340 
00341   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00342   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00343   MultiVector (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> >& map,
00344                const Teuchos::ArrayRCP<Scalar>& data,
00345                const size_t LDA,
00346                const size_t numVecs) :
00347     DO (map),
00348     lclMV_ (map->getNode ()),
00349     hasViewSemantics_ (false) // for now, not for long though
00350   {
00351     const char tfecfFuncName[] = "MultiVector(map,data,LDA,numVecs)";
00352     const size_t numRows = this->getLocalLength ();
00353     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(LDA < numRows, std::runtime_error,
00354       ": LDA = " << LDA << " < numRows = " << numRows << ".");
00355     MVT::initializeValues (lclMV_, numRows, numVecs, data, LDA);
00356   }
00357 
00358   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00359   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00360   MultiVector (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > &map,
00361                const KokkosClassic::MultiVector<Scalar,Node>& localMultiVector,
00362                Teuchos::ArrayView<const size_t> WhichVectors,
00363                EPrivateComputeViewConstructor /* dummy */) :
00364     DO (map),
00365     lclMV_ (localMultiVector),
00366     whichVectors_ (WhichVectors),
00367     hasViewSemantics_ (false) // for now, not for long though
00368   {
00369     const size_t localNumElts = map->getNodeNumElements ();
00370     TEUCHOS_TEST_FOR_EXCEPTION(
00371       localMultiVector.getNumRows () < localNumElts,
00372       std::invalid_argument,
00373       "Tpetra::MultiVector constructor: The Map contains " << localNumElts
00374       << " on process " << map->getComm ()->getRank () << ", but the given "
00375       "KokkosClassic::MultiVector only has " << localMultiVector.getNumRows ()
00376       << " rows.");
00377     TEUCHOS_TEST_FOR_EXCEPTION(
00378       WhichVectors.size () == 0,
00379       std::invalid_argument,
00380       "Tpetra::MultiVector constructor: WhichVectors.size() == 0.  "
00381       "The noncontiguous column view constructor only makes sense for a "
00382       "nonzero number of columns to view.");
00383 
00384     // Optimization for the case of a single column: just make a
00385     // contiguous ("constant stride") view of the one column.
00386     if (WhichVectors.size () == 1) {
00387       const size_t offsetRow = 0;
00388       const size_t offsetCol = WhichVectors[0];
00389       lclMV_ = lclMV_.offsetViewNonConst (localNumElts, 1, offsetRow, offsetCol);
00390       whichVectors_.clear (); // This being empty defines "constant stride."
00391     }
00392   }
00393 
00394 
00395   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00396   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00397   MultiVector (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> >& map,
00398                const Teuchos::ArrayView<const Scalar>& A,
00399                size_t LDA,
00400                size_t NumVectors) :
00401     DO (map),
00402     lclMV_ (map->getNode ()),
00403     hasViewSemantics_ (false) // for now, not for long though
00404   {
00405     using Teuchos::ArrayRCP;
00406     using Teuchos::ArrayView;
00407     using Teuchos::RCP;
00408 
00409     const char tfecfFuncName[] = "MultiVector(A,LDA)";
00410     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(NumVectors < 1, std::invalid_argument,
00411       ": NumVectors must be strictly positive.");
00412     const size_t myLen = getLocalLength();
00413     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(LDA < myLen, std::runtime_error,
00414       ": LDA must be large enough to accomodate the local entries.");
00415 #ifdef HAVE_TPETRA_DEBUG
00416     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00417       static_cast<size_t> (A.size ()) < LDA*(NumVectors - 1) + myLen,
00418       std::runtime_error,
00419       ": A does not contain enough data to specify the entries in this.");
00420 #endif
00421     if (myLen > 0) {
00422       RCP<Node> node = MVT::getNode(lclMV_);
00423       ArrayRCP<Scalar> mydata = node->template allocBuffer<Scalar>(myLen*NumVectors);
00424       MVT::initializeValues(lclMV_,myLen,NumVectors,mydata,myLen);
00425       // FIXME (mfh 13 Sep 2012) It would be better to use the Kokkos
00426       // Node's copyToBuffer method to push data directly to the
00427       // device pointer, rather than using an intermediate host
00428       // buffer.  Also, we should have an optimization for the
00429       // contiguous storage case (constant stride, and the stride
00430       // equals the local number of rows).
00431       ArrayRCP<Scalar> myview = node->template viewBufferNonConst<Scalar>(KokkosClassic::WriteOnly,myLen*NumVectors,mydata);
00432       typename ArrayView<const Scalar>::iterator srcit = A.begin();
00433       for (size_t j = 0; j < NumVectors; ++j) {
00434         std::copy(srcit,srcit+myLen,myview);
00435         srcit += LDA;
00436         myview += myLen;
00437       }
00438       mydata = Teuchos::null;
00439       myview = Teuchos::null;
00440     }
00441     else {
00442       MVT::initializeValues(lclMV_,0,NumVectors,Teuchos::null,0);
00443     }
00444   }
00445 
00446   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00447   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00448   MultiVector (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> >& map,
00449                const Teuchos::ArrayView<const ArrayView<const Scalar> >& ArrayOfPtrs,
00450                size_t NumVectors) :
00451     DO (map),
00452     lclMV_ (map->getNode ()),
00453     hasViewSemantics_ (false) // for now, not for long though
00454   {
00455     using Teuchos::ArrayRCP;
00456     using Teuchos::ArrayView;
00457     using Teuchos::RCP;
00458 
00459     const char tfecfFuncName[] = "MultiVector(ArrayOfPtrs)";
00460     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00461       NumVectors < 1 || NumVectors != static_cast<size_t>(ArrayOfPtrs.size()),
00462       std::runtime_error,
00463       ": ArrayOfPtrs.size() must be strictly positive and as large as ArrayOfPtrs.");
00464     const size_t myLen = getLocalLength();
00465     if (myLen > 0) {
00466       RCP<Node> node = MVT::getNode(lclMV_);
00467       ArrayRCP<Scalar> mydata = node->template allocBuffer<Scalar>(myLen*NumVectors);
00468       MVT::initializeValues(lclMV_,myLen,NumVectors,mydata,myLen);
00469       ArrayRCP<Scalar> myview = node->template viewBufferNonConst<Scalar>(KokkosClassic::WriteOnly,myLen*NumVectors,mydata);
00470       for (size_t j = 0; j < NumVectors; ++j) {
00471 #ifdef HAVE_TPETRA_DEBUG
00472         TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00473           static_cast<size_t>(ArrayOfPtrs[j].size()) != getLocalLength(),
00474           std::runtime_error,
00475           ": ArrayOfPtrs[" << j << "].size() (== " << ArrayOfPtrs[j].size()
00476           << ") is not equal to getLocalLength() (== " << getLocalLength());
00477 #endif
00478         typename ArrayView<const Scalar>::iterator src = ArrayOfPtrs[j].begin();
00479         std::copy(src,src+myLen,myview);
00480         myview += myLen;
00481       }
00482       myview = Teuchos::null;
00483       mydata = Teuchos::null;
00484     }
00485     else {
00486       MVT::initializeValues(lclMV_,0,NumVectors,Teuchos::null,0);
00487     }
00488   }
00489 
00490   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00491   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::~MultiVector() {
00492   }
00493 
00494 
00495   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00496   bool MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::isConstantStride() const {
00497     return whichVectors_.empty();
00498   }
00499 
00500 
00501   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00502   size_t
00503   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getLocalLength() const
00504   {
00505     if (this->getMap ().is_null ()) { // possible, due to replaceMap().
00506       return static_cast<size_t> (0);
00507     } else {
00508       return this->getMap ()->getNodeNumElements ();
00509     }
00510   }
00511 
00512 
00513   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00514   global_size_t MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getGlobalLength() const
00515   {
00516     if (this->getMap ().is_null ()) { // possible, due to replaceMap().
00517       return static_cast<size_t> (0);
00518     } else {
00519       return this->getMap ()->getGlobalNumElements ();
00520     }
00521   }
00522 
00523 
00524   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00525   size_t MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getStride() const {
00526     if (isConstantStride()) {
00527       return MVT::getStride(lclMV_);
00528     }
00529     return 0;
00530   }
00531 
00532 
00533   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00534   bool
00535   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00536   checkSizes (const SrcDistObject& sourceObj)
00537   {
00538     // Check whether the source object is a MultiVector.  If not, then
00539     // we can't even compare sizes, so it's definitely not OK to
00540     // Import or Export from it.
00541     typedef MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> this_type;
00542     const this_type* src = dynamic_cast<const this_type*> (&sourceObj);
00543     if (src == NULL) {
00544       return false;
00545     } else {
00546       // The target of the Import or Export calls checkSizes() in
00547       // DistObject::doTransfer().  By that point, we've already
00548       // constructed an Import or Export object using the two
00549       // multivectors' Maps, which means that (hopefully) we've
00550       // already checked other attributes of the multivectors.  Thus,
00551       // all we need to do here is check the number of columns.
00552       return src->getNumVectors () == this->getNumVectors ();
00553     }
00554   }
00555 
00556   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00557   size_t
00558   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00559   constantNumberOfPackets () const {
00560     return this->getNumVectors ();
00561   }
00562 
00563 #if TPETRA_USE_KOKKOS_DISTOBJECT
00564 
00565   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00566   void
00567   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00568   copyAndPermute (
00569     const SrcDistObject& sourceObj,
00570     size_t numSameIDs,
00571     const Kokkos::View<const LocalOrdinal*, device_type> &permuteToLIDs,
00572     const Kokkos::View<const LocalOrdinal*, device_type> &permuteFromLIDs)
00573   {
00574     using Teuchos::ArrayRCP;
00575     using Teuchos::ArrayView;
00576     using Teuchos::RCP;
00577     using Kokkos::Compat::getKokkosViewDeepCopy;
00578     typedef MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> MV;
00579     typedef typename ArrayView<const LocalOrdinal>::size_type size_type;
00580     const char tfecfFuncName[] = "copyAndPermute";
00581 
00582     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00583       permuteToLIDs.size() != permuteFromLIDs.size(), std::runtime_error,
00584       ": permuteToLIDs and permuteFromLIDs must have the same size."
00585       << std::endl << "permuteToLIDs.size() = " << permuteToLIDs.size ()
00586       << " != permuteFromLIDs.size() = " << permuteFromLIDs.size () << ".");
00587 
00588     // We've already called checkSizes(), so this cast must succeed.
00589     const MV& sourceMV = dynamic_cast<const MV&> (sourceObj);
00590 
00591     const size_t numCols = this->getNumVectors ();
00592     const size_t stride = MVT::getStride (lclMV_);
00593 
00594     // Copy rows [0, numSameIDs-1] of the local multivectors.
00595     //
00596     // For GPU Nodes: All of this happens using device pointers; this
00597     // does not require host views of either source or destination.
00598     if (isConstantStride ()) {
00599       const size_t numSrcCols = MVT::getNumCols (sourceMV.lclMV_);
00600       const size_t numDestCols = MVT::getNumCols (lclMV_);
00601 
00602       const KMV src = sourceMV.lclMV_.offsetView (numSameIDs, numSrcCols, 0, 0);
00603       KMV dest = lclMV_.offsetViewNonConst (numSameIDs, numDestCols, 0, 0);
00604       if (sourceMV.isConstantStride ()) {
00605         MVT::Assign (dest, src);
00606       }
00607       else {
00608         MVT::Assign (dest, src, sourceMV.whichVectors_ ());
00609       }
00610     }
00611     else {
00612       // Copy the columns one at a time, since MVT doesn't have an
00613       // Assign method for noncontiguous access to the destination
00614       // multivector.
00615       for (size_t j = 0; j < numCols; ++j) {
00616         const size_t destCol = isConstantStride () ? j : whichVectors_[j];
00617         const size_t srcCol = sourceMV.isConstantStride () ?
00618           j : sourceMV.whichVectors_[j];
00619         KMV dest_j = lclMV_.offsetViewNonConst (numSameIDs, 1, 0, destCol);
00620         const KMV src_j = sourceMV.lclMV_.offsetView (numSameIDs, 1, 0, srcCol);
00621         MVT::Assign (dest_j, src_j); // Copy src_j into dest_j
00622       }
00623     }
00624 
00625     // For the remaining GIDs, execute the permutations.  This may
00626     // involve noncontiguous access of both source and destination
00627     // vectors, depending on the LID lists.
00628     //
00629     // FIXME (mfh 20 June 2012) For an Export with duplicate GIDs on
00630     // the same process, this merges their values by replacement of
00631     // the last encountered GID, not by the specified merge rule
00632     // (such as ADD).
00633 
00634     // If there are no permutations, we are done
00635     if (permuteFromLIDs.size() == 0 || permuteToLIDs.size() == 0)
00636       return;
00637 
00638     if (this->isConstantStride ()) {
00639       Details::PermuteArrayMultiColumnConstantStride<Scalar,LocalOrdinal,device_type> perm;
00640       perm.permuteToLIDs = permuteToLIDs;
00641       perm.permuteFromLIDs = permuteFromLIDs;
00642       perm.src = sourceMV.getKokkosView();
00643       perm.dest = getKokkosViewNonConst();
00644       perm.src_stride = MVT::getStride (sourceMV.lclMV_);
00645       perm.dest_stride = stride;
00646       perm.numCols = numCols;
00647 
00648       perm.permute();
00649     }
00650     else {
00651       Details::PermuteArrayMultiColumnVariableStride<Scalar,LocalOrdinal,device_type> perm;
00652       perm.permuteToLIDs = permuteToLIDs;
00653       perm.permuteFromLIDs = permuteFromLIDs;
00654       perm.src = sourceMV.getKokkosView();
00655       perm.dest = getKokkosViewNonConst();
00656       perm.src_whichVectors =
00657         getKokkosViewDeepCopy<device_type>(sourceMV.whichVectors_ ());
00658       perm.dest_whichVectors =
00659         getKokkosViewDeepCopy<device_type>(whichVectors_ ());
00660       perm.src_stride = MVT::getStride (sourceMV.lclMV_);
00661       perm.dest_stride = stride;
00662       perm.numCols = numCols;
00663 
00664       perm.permute();
00665     }
00666   }
00667 
00668   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00669   void
00670   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00671   packAndPrepare (
00672     const SrcDistObject& sourceObj,
00673     const Kokkos::View<const LocalOrdinal*, device_type> &exportLIDs,
00674     Kokkos::View<Scalar*, device_type> &exports,
00675     const Kokkos::View<size_t*, device_type> &numExportPacketsPerLID,
00676     size_t& constantNumPackets,
00677     Distributor & /* distor */ )
00678   {
00679     using Teuchos::Array;
00680     using Teuchos::ArrayView;
00681     using Kokkos::Compat::getKokkosViewDeepCopy;
00682     typedef MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> MV;
00683     typedef Array<size_t>::size_type size_type;
00684 
00685     // If we have no exports, there is nothing to do
00686     if (exportLIDs.size() == 0)
00687       return;
00688 
00689     // We've already called checkSizes(), so this cast must succeed.
00690     const MV& sourceMV = dynamic_cast<const MV&> (sourceObj);
00691 
00692     // We don't need numExportPacketsPerLID; forestall "unused
00693     // variable" compile warnings.
00694     (void) numExportPacketsPerLID;
00695 
00696     /* The layout in the export for MultiVectors is as follows:
00697        exports = { all of the data from row exportLIDs.front() ;
00698                    ....
00699                    all of the data from row exportLIDs.back() }
00700       This doesn't have the best locality, but is necessary because
00701       the data for a Packet (all data associated with an LID) is
00702       required to be contiguous. */
00703 
00704     const size_t numCols = sourceMV.getNumVectors ();
00705     const size_t stride = MVT::getStride (sourceMV.lclMV_);
00706     // This spares us from needing to fill numExportPacketsPerLID.
00707     // Setting constantNumPackets to a nonzero value signals that
00708     // all packets have the same number of entries.
00709     constantNumPackets = numCols;
00710 
00711     const size_t numExportLIDs = exportLIDs.size ();
00712     const size_t newExportsSize = numCols * numExportLIDs;
00713     if (exports.size () != newExportsSize) {
00714       Kokkos::Compat::realloc(exports, newExportsSize);
00715     }
00716 
00717     if (numCols == 1) { // special case for one column only
00718       // MultiVector always represents a single column with constant
00719       // stride, but it doesn't hurt to implement both cases anyway.
00720       if (sourceMV.isConstantStride ()) {
00721         Details::PackArraySingleColumnConstantStride<Scalar,LocalOrdinal,device_type> pack;
00722         pack.exportLIDs = exportLIDs;
00723         pack.exports = exports;
00724         pack.src = sourceMV.getKokkosView();
00725         pack.pack();
00726       }
00727       else {
00728         Details::PackArraySingleColumnOffset<Scalar,LocalOrdinal,device_type> pack;
00729         pack.exportLIDs = exportLIDs;
00730         pack.exports = exports;
00731         pack.src = sourceMV.getKokkosView();
00732         pack.offset = sourceMV.whichVectors_[0] * stride;
00733         pack.pack();
00734       }
00735     }
00736     else { // the source MultiVector has multiple columns
00737       if (sourceMV.isConstantStride ()) {
00738         Details::PackArrayMultiColumnConstantStride<Scalar,LocalOrdinal,device_type> pack;
00739         pack.exportLIDs = exportLIDs;
00740         pack.exports = exports;
00741         pack.src = sourceMV.getKokkosView();
00742         pack.stride = stride;
00743         pack.numCols = numCols;
00744         pack.pack ();
00745       }
00746       else {
00747         Details::PackArrayMultiColumnVariableStride<Scalar,LocalOrdinal,device_type> pack;
00748         pack.exportLIDs = exportLIDs;
00749         pack.exports = exports;
00750         pack.src = sourceMV.getKokkosView();
00751         pack.srcWhichVectors =
00752           getKokkosViewDeepCopy<device_type>(sourceMV.whichVectors_ ());
00753         pack.stride = stride;
00754         pack.numCols = numCols;
00755         pack.pack ();
00756       }
00757     }
00758   }
00759 
00760 
00761   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00762   void
00763   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00764   unpackAndCombine (
00765     const Kokkos::View<const LocalOrdinal*, device_type> &importLIDs,
00766     const Kokkos::View<const Scalar*, device_type> &imports,
00767     const Kokkos::View<size_t*, device_type> &numPacketsPerLID,
00768     size_t constantNumPackets,
00769     Distributor & /* distor */,
00770     CombineMode CM)
00771   {
00772     using Teuchos::ArrayView;
00773     using Kokkos::Compat::getKokkosViewDeepCopy;
00774     typedef Teuchos::ScalarTraits<Scalar> SCT;
00775     typedef typename ArrayView<const LocalOrdinal>::size_type size_type;
00776     const char tfecfFuncName[] = "unpackAndCombine";
00777 
00778     // If we have no imports, there is nothing to do
00779     if (importLIDs.size() == 0)
00780       return;
00781 
00782     // We don't need numPacketsPerLID; forestall "unused variable"
00783     // compile warnings.
00784     (void) numPacketsPerLID;
00785 
00786     /* The layout in the export for MultiVectors is as follows:
00787        imports = { all of the data from row exportLIDs.front() ;
00788                    ....
00789                    all of the data from row exportLIDs.back() }
00790       This doesn't have the best locality, but is necessary because
00791       the data for a Packet (all data associated with an LID) is
00792       required to be contiguous. */
00793 
00794     const size_t numVecs = getNumVectors ();
00795 
00796 #ifdef HAVE_TPETRA_DEBUG
00797     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00798       static_cast<size_t> (imports.size()) != getNumVectors()*importLIDs.size(),
00799       std::runtime_error,
00800       ": 'imports' buffer size must be consistent with the amount of data to "
00801       "be sent.  " << std::endl << "imports.size() = " << imports.size()
00802       << " != getNumVectors()*importLIDs.size() = " << getNumVectors() << "*"
00803       << importLIDs.size() << " = " << getNumVectors() * importLIDs.size()
00804       << ".");
00805 
00806     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00807       constantNumPackets == static_cast<size_t> (0), std::runtime_error,
00808       ": constantNumPackets input argument must be nonzero.");
00809 
00810     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00811       static_cast<size_t> (numVecs) != static_cast<size_t> (constantNumPackets),
00812       std::runtime_error, ": constantNumPackets must equal numVecs.");
00813 #endif // HAVE_TPETRA_DEBUG
00814 
00815     if (numVecs > 0 && importLIDs.size () > 0) {
00816       const size_t myStride = MVT::getStride (lclMV_);
00817 
00818       // NOTE (mfh 10 Mar 2012) If you want to implement custom
00819       // combine modes, start editing here.  Also, if you trust
00820       // inlining, it would be nice to condense this code by using a
00821       // binary function object f:
00822       //
00823       // ncview_[...] = f (ncview_[...], *impptr++);
00824       if (CM == INSERT || CM == REPLACE) {
00825         if (isConstantStride()) {
00826           Details::UnpackArrayMultiColumnConstantStride<Scalar,LocalOrdinal,Details::InsertOp,device_type> unpack;
00827           unpack.importLIDs = importLIDs;
00828           unpack.imports = imports;
00829           unpack.dest = getKokkosViewNonConst();
00830           unpack.stride = myStride;
00831           unpack.numCols = numVecs;
00832           unpack.op = Details::InsertOp();
00833 
00834           unpack.unpack();
00835         }
00836         else {
00837           Details::UnpackArrayMultiColumnVariableStride<Scalar,LocalOrdinal,Details::InsertOp,device_type> unpack;
00838           unpack.importLIDs = importLIDs;
00839           unpack.imports = imports;
00840           unpack.dest = getKokkosViewNonConst();
00841           unpack.whichVectors =
00842             getKokkosViewDeepCopy<device_type>(whichVectors_ ());
00843           unpack.stride = myStride;
00844           unpack.numCols = numVecs;
00845           unpack.op = Details::InsertOp();
00846 
00847           unpack.unpack();
00848         }
00849       }
00850       else if (CM == ADD) {
00851         if (isConstantStride()) {
00852           Details::UnpackArrayMultiColumnConstantStride<Scalar,LocalOrdinal,Details::AddOp,device_type> unpack;
00853           unpack.importLIDs = importLIDs;
00854           unpack.imports = imports;
00855           unpack.dest = getKokkosViewNonConst();
00856           unpack.stride = myStride;
00857           unpack.numCols = numVecs;
00858           unpack.op = Details::AddOp();
00859 
00860           unpack.unpack();
00861         }
00862         else {
00863           Details::UnpackArrayMultiColumnVariableStride<Scalar,LocalOrdinal,Details::AddOp,device_type> unpack;
00864           unpack.importLIDs = importLIDs;
00865           unpack.imports = imports;
00866           unpack.dest = getKokkosViewNonConst();
00867           unpack.whichVectors =
00868             getKokkosViewDeepCopy<device_type>(whichVectors_ ());
00869           unpack.stride = myStride;
00870           unpack.numCols = numVecs;
00871           unpack.op = Details::AddOp();
00872 
00873           unpack.unpack();
00874         }
00875       }
00876       else if (CM == ABSMAX) {
00877         if (isConstantStride()) {
00878           Details::UnpackArrayMultiColumnConstantStride<Scalar,LocalOrdinal,Details::AbsMaxOp,device_type> unpack;
00879           unpack.importLIDs = importLIDs;
00880           unpack.imports = imports;
00881           unpack.dest = getKokkosViewNonConst();
00882           unpack.stride = myStride;
00883           unpack.numCols = numVecs;
00884           unpack.op = Details::AbsMaxOp();
00885 
00886           unpack.unpack();
00887         }
00888         else {
00889           Details::UnpackArrayMultiColumnVariableStride<Scalar,LocalOrdinal,Details::AbsMaxOp,device_type> unpack;
00890           unpack.importLIDs = importLIDs;
00891           unpack.imports = imports;
00892           unpack.dest = getKokkosViewNonConst();
00893           unpack.whichVectors =
00894             getKokkosViewDeepCopy<device_type>(whichVectors_ ());
00895           unpack.stride = myStride;
00896           unpack.numCols = numVecs;
00897           unpack.op = Details::AbsMaxOp();
00898 
00899           unpack.unpack();
00900         }
00901       }
00902       else {
00903         TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00904           CM != ADD && CM != REPLACE && CM != INSERT && CM != ABSMAX,
00905           std::invalid_argument, ": Invalid CombineMode: " << CM << ".  Valid "
00906           "CombineMode values are ADD, REPLACE, INSERT, and ABSMAX.");
00907       }
00908     }
00909   }
00910 
00911 #else
00912 
00913   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00914   void
00915   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00916   copyAndPermute (const SrcDistObject& sourceObj,
00917                   size_t numSameIDs,
00918                   const Teuchos::ArrayView<const LocalOrdinal> &permuteToLIDs,
00919                   const Teuchos::ArrayView<const LocalOrdinal> &permuteFromLIDs)
00920   {
00921     using Teuchos::ArrayRCP;
00922     using Teuchos::ArrayView;
00923     using Teuchos::RCP;
00924     typedef MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> MV;
00925     typedef typename ArrayView<const LocalOrdinal>::size_type size_type;
00926     const char tfecfFuncName[] = "copyAndPermute";
00927 
00928     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00929       permuteToLIDs.size() != permuteFromLIDs.size(), std::runtime_error,
00930       ": permuteToLIDs and permuteFromLIDs must have the same size."
00931       << std::endl << "permuteToLIDs.size() = " << permuteToLIDs.size ()
00932       << " != permuteFromLIDs.size() = " << permuteFromLIDs.size () << ".");
00933 
00934     // We've already called checkSizes(), so this cast must succeed.
00935     const MV& sourceMV = dynamic_cast<const MV&> (sourceObj);
00936 
00937     const size_t numCols = this->getNumVectors ();
00938 
00939     // Copy rows [0, numSameIDs-1] of the local multivectors.
00940     //
00941     // For GPU Nodes: All of this happens using device pointers; this
00942     // does not require host views of either source or destination.
00943     //
00944     // mfh 10 Jan 2013: In the current implementation, device data has
00945     // already been copied to the host views.  If we do a
00946     // device-device copy here, it won't synch the host views
00947     // correctly.  Thus, we replace this step with a host copy, if
00948     // Node is a GPU Node.  That ensures correctness for now, though
00949     // it's really something we need to fix if we plan to run
00950     // efficiently on GPUs.
00951     if (Node::isHostNode) {
00952       if (isConstantStride ()) {
00953         const size_t numSrcCols = MVT::getNumCols (sourceMV.lclMV_);
00954         const size_t numDestCols = MVT::getNumCols (lclMV_);
00955 
00956         const KMV src = sourceMV.lclMV_.offsetView (numSameIDs, numSrcCols, 0, 0);
00957         KMV dest = lclMV_.offsetViewNonConst (numSameIDs, numDestCols, 0, 0);
00958         if (sourceMV.isConstantStride ()) {
00959           MVT::Assign (dest, src);
00960         }
00961         else {
00962           MVT::Assign (dest, src, sourceMV.whichVectors_ ());
00963         }
00964       }
00965       else {
00966         // Copy the columns one at a time, since MVT doesn't have an
00967         // Assign method for noncontiguous access to the destination
00968         // multivector.
00969         for (size_t j = 0; j < numCols; ++j) {
00970           const size_t destCol = isConstantStride () ? j : whichVectors_[j];
00971           const size_t srcCol = sourceMV.isConstantStride () ?
00972             j : sourceMV.whichVectors_[j];
00973           KMV dest_j = lclMV_.offsetViewNonConst (numSameIDs, 1, 0, destCol);
00974           const KMV src_j = sourceMV.lclMV_.offsetView (numSameIDs, 1, 0, srcCol);
00975           MVT::Assign (dest_j, src_j); // Copy src_j into dest_j
00976         }
00977       }
00978     }
00979 
00980     // Copy the vectors one at a time.  This works whether or not the
00981     // multivectors have constant stride.
00982     for (size_t j = 0; j < numCols; ++j) {
00983       // Get host views of the current source and destination column.
00984       //
00985       // FIXME (mfh 10 Mar 2012) Copying should really be done on the
00986       // device.  There's no need to bring everything back to the
00987       // host, copy there, and then copy back.
00988       ArrayView<const Scalar> srcView =
00989         sourceMV.getSubArrayRCP (sourceMV.cview_, j) ();
00990       ArrayView<Scalar> dstView = getSubArrayRCP (ncview_, j) ();
00991 
00992       if (! Node::isHostNode) {
00993         // The first numSameIDs IDs are the same between source and
00994         // target, so we can just copy the data.  (This favors faster
00995         // contiguous access whenever we can do it.)
00996         std::copy (srcView.begin (), srcView.begin () + numSameIDs,
00997                    dstView.begin ());
00998       }
00999 
01000       // For the remaining GIDs, execute the permutations.  This may
01001       // involve noncontiguous access of both source and destination
01002       // vectors, depending on the LID lists.
01003       //
01004       // FIXME (mfh 09 Apr 2012) Permutation should be done on the
01005       // device, not on the host.
01006       //
01007       // FIXME (mfh 20 June 2012) For an Export with duplicate GIDs on
01008       // the same process, this merges their values by replacement of
01009       // the last encountered GID, not by the specified merge rule
01010       // (such as ADD).
01011       const size_type numPermuteLIDs =
01012         std::min (permuteToLIDs.size (), permuteFromLIDs.size ());
01013       for (size_type k = 0; k < numPermuteLIDs; ++k) {
01014         dstView[permuteToLIDs[k]] = srcView[permuteFromLIDs[k]];
01015       }
01016     }
01017   }
01018 
01019   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01020   void
01021   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01022   packAndPrepare (const SrcDistObject& sourceObj,
01023                   const ArrayView<const LocalOrdinal> &exportLIDs,
01024                   Array<Scalar> &exports,
01025                   const ArrayView<size_t> &numExportPacketsPerLID,
01026                   size_t& constantNumPackets,
01027                   Distributor & /* distor */ )
01028   {
01029     using Teuchos::Array;
01030     using Teuchos::ArrayView;
01031     typedef MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> MV;
01032     typedef Array<size_t>::size_type size_type;
01033 
01034     // We've already called checkSizes(), so this cast must succeed.
01035     const MV& sourceMV = dynamic_cast<const MV&> (sourceObj);
01036 
01037     // We don't need numExportPacketsPerLID; forestall "unused
01038     // variable" compile warnings.
01039     (void) numExportPacketsPerLID;
01040 
01041     /* The layout in the export for MultiVectors is as follows:
01042        exports = { all of the data from row exportLIDs.front() ;
01043                    ....
01044                    all of the data from row exportLIDs.back() }
01045       This doesn't have the best locality, but is necessary because
01046       the data for a Packet (all data associated with an LID) is
01047       required to be contiguous. */
01048 
01049     const size_t numCols = sourceMV.getNumVectors ();
01050     const size_t stride = MVT::getStride (sourceMV.lclMV_);
01051     // This spares us from needing to fill numExportPacketsPerLID.
01052     // Setting constantNumPackets to a nonzero value signals that
01053     // all packets have the same number of entries.
01054     constantNumPackets = numCols;
01055 
01056     const size_type numExportLIDs = exportLIDs.size ();
01057     const size_type newExportsSize = numCols * numExportLIDs;
01058     if (exports.size () != newExportsSize) {
01059       exports.resize (newExportsSize);
01060     }
01061 
01062     ArrayView<const Scalar> srcView = sourceMV.cview_ ();
01063     if (numCols == 1) { // special case for one column only
01064       // MultiVector always represents a single column with constant
01065       // stride, but it doesn't hurt to implement both cases anyway.
01066       if (sourceMV.isConstantStride ()) {
01067         for (size_type k = 0; k < numExportLIDs; ++k) {
01068           exports[k] = srcView[exportLIDs[k]];
01069         }
01070       }
01071       else {
01072         const size_t offset = sourceMV.whichVectors_[0] * stride;
01073         for (size_type k = 0; k < numExportLIDs; ++k) {
01074           exports[k] = srcView[exportLIDs[k] + offset];
01075         }
01076       }
01077     }
01078     else { // the source MultiVector has multiple columns
01079       typename Array<Scalar>::iterator expptr = exports.begin ();
01080       if (sourceMV.isConstantStride ()) {
01081         for (size_type k = 0; k < numExportLIDs; ++k) {
01082           const size_t localRow = static_cast<size_t> (exportLIDs[k]);
01083           for (size_t j = 0; j < numCols; ++j) {
01084             *expptr++ = srcView[localRow + j*stride];
01085           }
01086         }
01087       }
01088       else {
01089         ArrayView<const size_t> srcWhichVectors = sourceMV.whichVectors_ ();
01090         for (size_type k = 0; k < numExportLIDs; ++k) {
01091           const size_t localRow = static_cast<size_t> (exportLIDs[k]);
01092           for (size_t j = 0; j < numCols; ++j) {
01093             *expptr++ = srcView[localRow + srcWhichVectors[j]*stride];
01094           }
01095         }
01096       }
01097     }
01098   }
01099 
01100 
01101   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01102   void
01103   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01104   unpackAndCombine (const ArrayView<const LocalOrdinal> &importLIDs,
01105                     const ArrayView<const Scalar> &imports,
01106                     const ArrayView<size_t> &numPacketsPerLID,
01107                     size_t constantNumPackets,
01108                     Distributor & /* distor */,
01109                     CombineMode CM)
01110   {
01111     using Teuchos::ArrayView;
01112     typedef Teuchos::ScalarTraits<Scalar> SCT;
01113     typedef typename ArrayView<const LocalOrdinal>::size_type size_type;
01114     const char tfecfFuncName[] = "unpackAndCombine";
01115 
01116     // We don't need numPacketsPerLID; forestall "unused variable"
01117     // compile warnings.
01118     (void) numPacketsPerLID;
01119 
01120     /* The layout in the export for MultiVectors is as follows:
01121        imports = { all of the data from row exportLIDs.front() ;
01122                    ....
01123                    all of the data from row exportLIDs.back() }
01124       This doesn't have the best locality, but is necessary because
01125       the data for a Packet (all data associated with an LID) is
01126       required to be contiguous. */
01127 
01128     const size_t numVecs = getNumVectors ();
01129 
01130 #ifdef HAVE_TPETRA_DEBUG
01131     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
01132       static_cast<size_t> (imports.size()) != getNumVectors()*importLIDs.size(),
01133       std::runtime_error,
01134       ": 'imports' buffer size must be consistent with the amount of data to "
01135       "be sent.  " << std::endl << "imports.size() = " << imports.size()
01136       << " != getNumVectors()*importLIDs.size() = " << getNumVectors() << "*"
01137       << importLIDs.size() << " = " << getNumVectors() * importLIDs.size()
01138       << ".");
01139 
01140     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
01141       constantNumPackets == static_cast<size_t> (0), std::runtime_error,
01142       ": constantNumPackets input argument must be nonzero.");
01143 
01144     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
01145       static_cast<size_t> (numVecs) != static_cast<size_t> (constantNumPackets),
01146       std::runtime_error, ": constantNumPackets must equal numVecs.");
01147 #endif // HAVE_TPETRA_DEBUG
01148 
01149     if (numVecs > 0 && importLIDs.size () > 0) {
01150       typename ArrayView<const Scalar>::iterator impptr = imports.begin ();
01151       ArrayView<Scalar> destView = ncview_ ();
01152       const size_type numImportLIDs = importLIDs.size ();
01153       const size_t myStride = MVT::getStride (lclMV_);
01154 
01155       // NOTE (mfh 10 Mar 2012) If you want to implement custom
01156       // combine modes, start editing here.  Also, if you trust
01157       // inlining, it would be nice to condense this code by using a
01158       // binary function object f:
01159       //
01160       // ncview_[...] = f (ncview_[...], *impptr++);
01161       if (CM == INSERT || CM == REPLACE) {
01162         if (isConstantStride()) {
01163           for (size_type k = 0; k < numImportLIDs; ++k) {
01164             const size_t localRow = static_cast<size_t> (importLIDs[k]);
01165             for (size_t j = 0; j < numVecs; ++j) {
01166               destView[localRow + myStride*j] = *impptr++;
01167             }
01168           }
01169         }
01170         else {
01171           for (size_type k = 0; k < numImportLIDs; ++k) {
01172             const size_t localRow = static_cast<size_t> (importLIDs[k]);
01173             for (size_t j = 0; j < numVecs; ++j) {
01174               destView[localRow + myStride*whichVectors_[j]] = *impptr++;
01175             }
01176           }
01177         }
01178       }
01179       else if (CM == ADD) {
01180         if (isConstantStride()) {
01181           for (size_type k = 0; k < numImportLIDs; ++k) {
01182             const size_t localRow = static_cast<size_t> (importLIDs[k]);
01183             for (size_t j = 0; j < numVecs; ++j) {
01184               destView[localRow + myStride*j] += *impptr++;
01185             }
01186           }
01187         }
01188         else {
01189           for (size_type k = 0; k < numImportLIDs; ++k) {
01190             const size_t localRow = static_cast<size_t> (importLIDs[k]);
01191             for (size_t j = 0; j < numVecs; ++j) {
01192               destView[localRow + myStride*whichVectors_[j]] += *impptr++;
01193             }
01194           }
01195         }
01196       }
01197       else if (CM == ABSMAX) {
01198         if (isConstantStride()) {
01199           for (size_type k = 0; k < numImportLIDs; ++k) {
01200             const size_t localRow = static_cast<size_t> (importLIDs[k]);
01201             for (size_t j = 0; j < numVecs; ++j) {
01202               Scalar &curval       = destView[localRow + myStride*j];
01203               const Scalar &newval = *impptr++;
01204               curval = std::max( SCT::magnitude(curval), SCT::magnitude(newval) );
01205             }
01206           }
01207         }
01208         else {
01209           for (size_type k = 0; k < numImportLIDs; ++k) {
01210             const size_t localRow = static_cast<size_t> (importLIDs[k]);
01211             for (size_t j = 0; j < numVecs; ++j) {
01212               Scalar &curval       = destView[localRow + myStride*whichVectors_[j]];
01213               const Scalar &newval = *impptr++;
01214               curval = std::max( SCT::magnitude(curval), SCT::magnitude(newval) );
01215             }
01216           }
01217         }
01218       }
01219       else {
01220         TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
01221           CM != ADD && CM != REPLACE && CM != INSERT && CM != ABSMAX,
01222           std::invalid_argument, ": Invalid CombineMode: " << CM << ".  Valid "
01223           "CombineMode values are ADD, REPLACE, INSERT, and ABSMAX.");
01224       }
01225     }
01226   }
01227 
01228 #endif
01229 
01230   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01231   inline size_t MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getNumVectors() const {
01232     if (isConstantStride()) {
01233       return MVT::getNumCols(lclMV_);
01234     }
01235     else {
01236       return whichVectors_.size();
01237     }
01238   }
01239 
01240 
01241   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01242   void
01243   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01244   dot (const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &A,
01245        const Teuchos::ArrayView<Scalar> &dots) const
01246   {
01247     using Teuchos::Array;
01248     using Teuchos::ArrayRCP;
01249     using Teuchos::arcp_const_cast;
01250 
01251     const char tfecfFuncName[] = "dot()";
01252     const size_t myLen   = getLocalLength();
01253     const size_t numVecs = getNumVectors();
01254 #ifdef HAVE_TPETRA_DEBUG
01255     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( !this->getMap()->isCompatible(*A.getMap()), std::runtime_error,
01256         ": MultiVectors do not have compatible Maps:" << std::endl
01257         << "this->getMap(): " << std::endl << *this->getMap()
01258         << "A.getMap(): " << std::endl << *A.getMap() << std::endl);
01259 #else
01260     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( getLocalLength() != A.getLocalLength(), std::runtime_error,
01261         ": MultiVectors do not have the same local length.");
01262 #endif
01263     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(A.getNumVectors() != numVecs, std::runtime_error,
01264         ": MultiVectors must have the same number of vectors.");
01265     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
01266       static_cast<size_t> (dots.size ()) != numVecs, std::runtime_error,
01267       ": dots.size() must be as large as the number of vectors in *this and A.");
01268     if (isConstantStride() && A.isConstantStride()) {
01269       MVT::Dot(lclMV_,A.lclMV_,dots);
01270     }
01271     else {
01272       KMV v(MVT::getNode(lclMV_)), a(MVT::getNode(lclMV_));
01273       ArrayRCP<Scalar> vptr  = arcp_const_cast<Scalar>(MVT::getValues(lclMV_));
01274       ArrayRCP<Scalar> avptr = arcp_const_cast<Scalar>(MVT::getValues(A.lclMV_));
01275       for (size_t j=0; j < numVecs; ++j) {
01276         ArrayRCP<Scalar> vj  =   getSubArrayRCP( vptr,j);
01277         ArrayRCP<Scalar> avj = A.getSubArrayRCP(avptr,j);
01278         MVT::initializeValues(a,myLen, 1, avj, myLen);
01279         MVT::initializeValues(v,myLen, 1,  vj, myLen);
01280         dots[j] = MVT::Dot((const KMV&)v,(const KMV &)a);
01281       }
01282     }
01283     if (this->isDistributed()) {
01284       Array<Scalar> ldots(dots);
01285       Teuchos::reduceAll (* (this->getMap ()->getComm ()), Teuchos::REDUCE_SUM,
01286                           static_cast<int> (numVecs), ldots.getRawPtr (),
01287                           dots.getRawPtr ());
01288     }
01289   }
01290 
01291 
01292   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01293   void
01294   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01295   norm2 (const Teuchos::ArrayView<typename Teuchos::ScalarTraits<Scalar>::magnitudeType>& norms) const
01296   {
01297     using Teuchos::arcp_const_cast;
01298     using Teuchos::Array;
01299     using Teuchos::ArrayRCP;
01300     using Teuchos::ArrayView;
01301     using Teuchos::reduceAll;
01302     using Teuchos::REDUCE_SUM;
01303     using Teuchos::ScalarTraits;
01304     typedef typename ScalarTraits<Scalar>::magnitudeType MT;
01305     typedef ScalarTraits<MT> STM;
01306 
01307     const size_t numVecs = this->getNumVectors();
01308     TEUCHOS_TEST_FOR_EXCEPTION(static_cast<size_t>(norms.size()) != numVecs,
01309       std::runtime_error,
01310       "Tpetra::MultiVector::norm2(norms): norms.size() must be as large as the number of vectors in *this.");
01311     if (isConstantStride ()) {
01312       MVT::Norm2Squared (lclMV_,norms);
01313     }
01314     else {
01315       KMV v (MVT::getNode (lclMV_));
01316       ArrayRCP<Scalar> vi;
01317       for (size_t i=0; i < numVecs; ++i) {
01318         vi = arcp_const_cast<Scalar> (MVT::getValues (lclMV_, whichVectors_[i]));
01319         MVT::initializeValues (v, MVT::getNumRows (lclMV_), 1, vi, MVT::getStride (lclMV_));
01320         norms[i] = MVT::Norm2Squared (v);
01321       }
01322     }
01323     if (this->isDistributed ()) {
01324       Array<MT> lnorms (norms);
01325       // FIXME (mfh 25 Apr 2012) Somebody explain to me why we're
01326       // calling Teuchos::reduceAll when MultiVector has a perfectly
01327       // good reduce() function.
01328       reduceAll (*this->getMap ()->getComm (), REDUCE_SUM,
01329                  static_cast<int> (numVecs), lnorms.getRawPtr (),
01330                  norms.getRawPtr ());
01331     }
01332     for (typename ArrayView<MT>::iterator n = norms.begin(); n != norms.begin()+numVecs; ++n) {
01333       (*n) = STM::squareroot (*n);
01334     }
01335   }
01336 
01337 
01338   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01339   void
01340   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01341   normWeighted (const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& weights,
01342                 const Teuchos::ArrayView<typename Teuchos::ScalarTraits<Scalar>::magnitudeType> &norms) const
01343   {
01344     using Teuchos::arcp_const_cast;
01345     using Teuchos::Array;
01346     using Teuchos::ArrayRCP;
01347     using Teuchos::ArrayView;
01348     using Teuchos::reduceAll;
01349     using Teuchos::REDUCE_SUM;
01350     using Teuchos::ScalarTraits;
01351     typedef ScalarTraits<Scalar> SCT;
01352     typedef typename SCT::magnitudeType Mag;
01353 
01354     const char tfecfFuncName[] = "normWeighted()";
01355 
01356     const Mag OneOverN = ScalarTraits<Mag>::one() / getGlobalLength();
01357     bool OneW = false;
01358     const size_t numVecs = this->getNumVectors();
01359     if (weights.getNumVectors() == 1) {
01360       OneW = true;
01361     }
01362     else {
01363       TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(weights.getNumVectors() != numVecs, std::runtime_error,
01364         ": MultiVector of weights must contain either one vector or the same number of vectors as this.");
01365     }
01366 #ifdef HAVE_TPETRA_DEBUG
01367     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( !this->getMap()->isCompatible(*weights.getMap()), std::runtime_error,
01368       ": MultiVectors do not have compatible Maps:" << std::endl
01369       << "this->getMap(): " << std::endl << *this->getMap()
01370       << "weights.getMap(): " << std::endl << *weights.getMap() << std::endl);
01371 #else
01372     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( getLocalLength() != weights.getLocalLength(), std::runtime_error,
01373       ": MultiVectors do not have the same local length.");
01374 #endif
01375     const size_t myLen = getLocalLength();
01376 
01377     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(static_cast<size_t>(norms.size()) != numVecs, std::runtime_error,
01378       ": norms.size() must be as large as the number of vectors in *this.");
01379     if (isConstantStride() && weights.isConstantStride()) {
01380       MVT::WeightedNorm(lclMV_,weights.lclMV_,norms);
01381     }
01382     else {
01383       KMV v(MVT::getNode(lclMV_)), w(MVT::getNode(lclMV_));
01384       ArrayRCP<Scalar> vptr = arcp_const_cast<Scalar> (MVT::getValues (lclMV_));
01385       ArrayRCP<Scalar> wptr = arcp_const_cast<Scalar> (MVT::getValues (weights.lclMV_));
01386       ArrayRCP<Scalar> wj   = wptr.persistingView (0,myLen);
01387       MVT::initializeValues (w,myLen, 1, wj, myLen);
01388       for (size_t j=0; j < numVecs; ++j) {
01389         ArrayRCP<Scalar> vj = getSubArrayRCP (vptr, j);
01390         MVT::initializeValues(v,myLen, 1,  vj, myLen);
01391         if (! OneW) {
01392           wj = weights.getSubArrayRCP(wptr,j);
01393           MVT::initializeValues (w, myLen, 1, wj, myLen);
01394         }
01395         norms[j] = MVT::WeightedNorm ((const KMV&)v, (const KMV &)w);
01396       }
01397     }
01398     if (this->isDistributed ()) {
01399       Array<Mag> lnorms(norms);
01400       reduceAll (*this->getMap ()->getComm (), REDUCE_SUM,
01401                  static_cast<int> (numVecs), lnorms.getRawPtr (),
01402                  norms.getRawPtr ());
01403     }
01404     for (typename ArrayView<Mag>::iterator n = norms.begin(); n != norms.begin()+numVecs; ++n) {
01405       *n = ScalarTraits<Mag>::squareroot(*n * OneOverN);
01406     }
01407   }
01408 
01409 
01410   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01411   void
01412   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01413   norm1 (const ArrayView<typename Teuchos::ScalarTraits<Scalar>::magnitudeType> &norms) const
01414   {
01415     using Teuchos::Array;
01416     using Teuchos::ArrayRCP;
01417     using Teuchos::arcp_const_cast;
01418     using Teuchos::reduceAll;
01419     using Teuchos::REDUCE_SUM;
01420     typedef typename Teuchos::ScalarTraits<Scalar>::magnitudeType Mag;
01421 
01422     const size_t numVecs = this->getNumVectors();
01423     TEUCHOS_TEST_FOR_EXCEPTION(
01424       static_cast<size_t>(norms.size()) != numVecs, std::runtime_error,
01425       "Tpetra::MultiVector::norm1(norms): norms.size() must be as large "
01426       "as the number of vectors in *this.");
01427     if (isConstantStride()) {
01428       MVT::Norm1(lclMV_,norms);
01429     }
01430     else {
01431       KMV v(MVT::getNode(lclMV_));
01432       ArrayRCP<Scalar> vj;
01433       for (size_t j=0; j < numVecs; ++j) {
01434         vj = arcp_const_cast<Scalar> (MVT::getValues(lclMV_,whichVectors_[j]) );
01435         MVT::initializeValues (v, MVT::getNumRows(lclMV_), 1, vj, MVT::getStride (lclMV_));
01436         norms[j] = MVT::Norm1 ((const KMV&)v);
01437       }
01438     }
01439     if (this->isDistributed ()) {
01440       Array<Mag> lnorms (norms);
01441       reduceAll (*this->getMap ()->getComm (), REDUCE_SUM,
01442                  static_cast<int> (numVecs), lnorms.getRawPtr (),
01443                  norms.getRawPtr ());
01444     }
01445   }
01446 
01447 
01448   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01449   void
01450   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01451   normInf (const Teuchos::ArrayView<typename Teuchos::ScalarTraits<Scalar>::magnitudeType> &norms) const
01452   {
01453     using Teuchos::Array;
01454     using Teuchos::ArrayRCP;
01455     using Teuchos::arcp_const_cast;
01456     using Teuchos::reduceAll;
01457     using Teuchos::REDUCE_MAX;
01458     typedef typename Teuchos::ScalarTraits<Scalar>::magnitudeType Mag;
01459 
01460     const size_t numVecs = this->getNumVectors();
01461     TEUCHOS_TEST_FOR_EXCEPTION(static_cast<size_t>(norms.size()) != numVecs, std::runtime_error,
01462       "Tpetra::MultiVector::normInf(norms): norms.size() must be as large as the number of vectors in *this.");
01463     if (isConstantStride ()) {
01464       MVT::NormInf(lclMV_,norms);
01465     }
01466     else {
01467       KMV v(MVT::getNode(lclMV_));
01468       ArrayRCP<Scalar> vj;
01469       for (size_t j=0; j < numVecs; ++j) {
01470         vj = Teuchos::arcp_const_cast<Scalar>( MVT::getValues(lclMV_,whichVectors_[j]) );
01471         MVT::initializeValues(v,MVT::getNumRows(lclMV_), 1, vj, MVT::getStride(lclMV_));
01472         norms[j] = MVT::NormInf((const KMV&)v);
01473       }
01474     }
01475     if (this->isDistributed()) {
01476       Array<Mag> lnorms(norms);
01477       reduceAll (*this->getMap ()->getComm (), REDUCE_MAX,
01478                  static_cast<int> (numVecs), lnorms.getRawPtr (),
01479                  norms.getRawPtr ());
01480     }
01481   }
01482 
01483 
01484   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01485   void
01486   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01487   meanValue (const Teuchos::ArrayView<Scalar> &means) const
01488   {
01489     using Teuchos::Array;
01490     using Teuchos::ArrayRCP;
01491     using Teuchos::arcp_const_cast;
01492     using Teuchos::reduceAll;
01493     using Teuchos::REDUCE_SUM;
01494     typedef Teuchos::ScalarTraits<Scalar> SCT;
01495 
01496     const size_t numVecs = getNumVectors();
01497     const size_t myLen   = getLocalLength();
01498     TEUCHOS_TEST_FOR_EXCEPTION(static_cast<size_t>(means.size()) != numVecs, std::runtime_error,
01499       "Tpetra::MultiVector::meanValue(): means.size() must be as large as the number of vectors in *this.");
01500     // compute local components of the means
01501     // sum these across all nodes
01502     if (isConstantStride()) {
01503       MVT::Sum(lclMV_,means);
01504     }
01505     else {
01506       KMV v(MVT::getNode(lclMV_));
01507       ArrayRCP<Scalar> vptr = arcp_const_cast<Scalar>(MVT::getValues(lclMV_));
01508       for (size_t j=0; j < numVecs; ++j) {
01509         ArrayRCP<Scalar> vj =   getSubArrayRCP( vptr,j);
01510         MVT::initializeValues(v,myLen, 1,  vj, myLen);
01511         means[j] = MVT::Sum((const KMV &)v);
01512       }
01513     }
01514     if (this->isDistributed()) {
01515       Array<Scalar> lmeans(means);
01516       // only combine if we are a distributed MV
01517       reduceAll (*this->getMap ()->getComm (), REDUCE_SUM,
01518                  static_cast<int> (numVecs), lmeans.getRawPtr (),
01519                  means.getRawPtr ());
01520     }
01521     // mfh 12 Apr 2012: Don't take out the cast from the ordinal type
01522     // to the magnitude type, since operator/ (std::complex<T>, int)
01523     // isn't necessarily defined.
01524     const Scalar OneOverN =
01525       SCT::one() / static_cast<typename SCT::magnitudeType> (getGlobalLength ());
01526     for (typename ArrayView<Scalar>::iterator i = means.begin();
01527          i != means.begin() + numVecs;
01528          ++i)
01529     {
01530       (*i) = (*i) * OneOverN;
01531     }
01532   }
01533 
01534 
01535   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01536   void
01537   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01538   randomize()
01539   {
01540     if (isConstantStride ()) {
01541       MVT::Random (lclMV_);
01542     }
01543     else {
01544       const size_t numVecs = this->getNumVectors ();
01545       KMV v (MVT::getNode (lclMV_));
01546       Teuchos::ArrayRCP<Scalar> vj;
01547       for (size_t j = 0; j < numVecs; ++j) {
01548         vj = MVT::getValuesNonConst (lclMV_, whichVectors_[j]);
01549         MVT::initializeValues (v, MVT::getNumRows (lclMV_), 1, vj, MVT::getStride (lclMV_));
01550         MVT::Random (v);
01551       }
01552     }
01553   }
01554 
01555 
01556   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01557   void
01558   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01559   putScalar (const Scalar &alpha)
01560   {
01561     const size_t numVecs = getNumVectors();
01562     if (isConstantStride()) {
01563       MVT::Init(lclMV_,alpha);
01564     }
01565     else {
01566       KMV v(MVT::getNode(lclMV_));
01567       Teuchos::ArrayRCP<Scalar> vj;
01568       for (size_t j=0; j < numVecs; ++j) {
01569         vj = MVT::getValuesNonConst(lclMV_,whichVectors_[j]);
01570         MVT::initializeValues(v,MVT::getNumRows(lclMV_), 1, vj, MVT::getStride(lclMV_));
01571         MVT::Init(v,alpha);
01572       }
01573     }
01574   }
01575 
01576 
01577   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01578   void
01579   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01580   replaceMap (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> >& newMap)
01581   {
01582     using Teuchos::ArrayRCP;
01583     using Teuchos::Comm;
01584     using Teuchos::RCP;
01585 
01586     // mfh 28 Mar 2013: This method doesn't forget whichVectors_, so
01587     // it might work if the MV is a column view of another MV.
01588     // However, things might go wrong when restoring the original
01589     // Map, so we don't allow this case for now.
01590     TEUCHOS_TEST_FOR_EXCEPTION(
01591       ! this->isConstantStride (), std::logic_error,
01592       "Tpetra::MultiVector::replaceMap: This method does not currently work "
01593       "if the MultiVector is a column view of another MultiVector (that is, if "
01594       "isConstantStride() == false).");
01595 
01596     // Case 1: current Map and new Map are both nonnull on this process.
01597     // Case 2: current Map is nonnull, new Map is null.
01598     // Case 3: current Map is null, new Map is nonnull.
01599     // Case 4: both Maps are null: forbidden.
01600     //
01601     // Case 1 means that we don't have to do anything on this process,
01602     // other than assign the new Map.  (We always have to do that.)
01603     // It's an error for the user to supply a Map that requires
01604     // resizing in this case.
01605     //
01606     // Case 2 means that the calling process is in the current Map's
01607     // communicator, but will be excluded from the new Map's
01608     // communicator.  We don't have to do anything on the calling
01609     // process; just leave whatever data it may have alone.
01610     //
01611     // Case 3 means that the calling process is excluded from the
01612     // current Map's communicator, but will be included in the new
01613     // Map's communicator.  This means we need to (re)allocate the
01614     // local (KokkosClassic::)MultiVector if it does not have the right
01615     // number of rows.  If the new number of rows is nonzero, we'll
01616     // fill the newly allocated local data with zeros, as befits a
01617     // projection operation.
01618     //
01619     // The typical use case for Case 3 is that the MultiVector was
01620     // first created with the Map with more processes, then that Map
01621     // was replaced with a Map with fewer processes, and finally the
01622     // original Map was restored on this call to replaceMap.
01623 
01624 #ifdef HAVE_TEUCHOS_DEBUG
01625     // mfh 28 Mar 2013: We can't check for compatibility across the
01626     // whole communicator, unless we know that the current and new
01627     // Maps are nonnull on _all_ participating processes.
01628     // TEUCHOS_TEST_FOR_EXCEPTION(
01629     //   origNumProcs == newNumProcs && ! this->getMap ()->isCompatible (*map),
01630     //   std::invalid_argument, "Tpetra::MultiVector::project: "
01631     //   "If the input Map's communicator is compatible (has the same number of "
01632     //   "processes as) the current Map's communicator, then the two Maps must be "
01633     //   "compatible.  The replaceMap() method is not for data redistribution; "
01634     //   "use Import or Export for that purpose.");
01635 
01636     // TODO (mfh 28 Mar 2013) Add compatibility checks for projections
01637     // of the Map, in case the process counts don't match.
01638 #endif // HAVE_TEUCHOS_DEBUG
01639 
01640     if (this->getMap ().is_null ()) { // current Map is null
01641       TEUCHOS_TEST_FOR_EXCEPTION(
01642         newMap.is_null (), std::invalid_argument,
01643         "Tpetra::MultiVector::replaceMap: both current and new Maps are null.  "
01644         "This probably means that the input Map is incorrect.");
01645       // Case 3: current Map is null, new Map is nonnull.
01646       const size_t newNumRows = newMap->getNodeNumElements ();
01647       const size_t origNumRows = lclMV_.getNumRows ();
01648       const size_t numCols = getNumVectors ();
01649 
01650       if (origNumRows != newNumRows) {
01651         RCP<Node> node = newMap->getNode ();
01652         ArrayRCP<Scalar> data = newNumRows == 0 ? Teuchos::null :
01653           node->template allocBuffer<Scalar> (newNumRows * numCols);
01654         const size_t stride = newNumRows;
01655         MVT::initializeValues (lclMV_, newNumRows, numCols, data, stride);
01656         if (newNumRows > 0) {
01657           MVT::Init (lclMV_, Teuchos::ScalarTraits<Scalar>::zero ());
01658         }
01659       }
01660     }
01661     else if (newMap.is_null ()) { // Case 2: current Map is nonnull, new Map is null
01662       // I am an excluded process.  Reinitialize my data so that I
01663       // have 0 rows.  Keep the number of columns as before.
01664       const size_t numVecs = getNumVectors ();
01665       MVT::initializeValues (lclMV_, 0, numVecs, Teuchos::null, 0);
01666     }
01667     this->map_ = newMap;
01668   }
01669 
01670 
01671   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01672   void
01673   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01674   scale (const Scalar &alpha)
01675   {
01676     using Teuchos::arcp_const_cast;
01677     using Teuchos::ArrayRCP;
01678 
01679     // NOTE: can't substitute putScalar(0.0) for scale(0.0), because
01680     //       the former will overwrite NaNs present in the MultiVector, while the
01681     //       semantics of this call require multiplying them by 0, which IEEE requires to be NaN
01682     const size_t numVecs = getNumVectors();
01683     if (alpha == Teuchos::ScalarTraits<Scalar>::one()) {
01684       // do nothing
01685     }
01686     else if (isConstantStride()) {
01687       MVT::Scale(lclMV_,alpha);
01688     }
01689     else {
01690       KMV v(MVT::getNode(lclMV_));
01691       ArrayRCP<Scalar> vj;
01692       for (size_t j=0; j < numVecs; ++j) {
01693         vj = arcp_const_cast<Scalar> (MVT::getValues(lclMV_, whichVectors_[j]));
01694         MVT::initializeValues (v, MVT::getNumRows (lclMV_), 1, vj, MVT::getStride (lclMV_));
01695         MVT::Scale (v, alpha);
01696       }
01697     }
01698   }
01699 
01700 
01701   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01702   void
01703   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01704   scale (Teuchos::ArrayView<const Scalar> alphas)
01705   {
01706     using Teuchos::arcp_const_cast;
01707     using Teuchos::ArrayRCP;
01708 
01709     const size_t numVecs = this->getNumVectors();
01710     TEUCHOS_TEST_FOR_EXCEPTION(static_cast<size_t>(alphas.size()) != numVecs, std::runtime_error,
01711       "Tpetra::MultiVector::scale(alphas): alphas.size() must be as large as the number of vectors in *this.");
01712 
01713     // Unfortunately, MVT doesn't have a Scale method that works on a
01714     // whole MultiVector.  Thus, we have to scale a column at a time.
01715     if (this->isConstantStride ()) {
01716       for (size_t j = 0; j < numVecs; ++j) {
01717         const Scalar alpha_j = alphas[j];
01718         const size_t curCol = j;
01719 
01720         // If alphas[j] is zero, don't scale; just fill with zeros.
01721         // This follows the BLAS convention.  (zero*NaN is NaN.)
01722         //
01723         // Similarly, if alphas[j] is one, don't scale; just do nothing.
01724         // This follows the BLAS convention.  (one*NaN is NaN.)
01725         if (alpha_j == Teuchos::ScalarTraits<Scalar>::zero ()) {
01726           KMV MV_j = lclMV_.offsetViewNonConst (lclMV_.getNumRows (), 1, 0, curCol);
01727           MVT::Init (MV_j, alpha_j);
01728         }
01729         else if (alpha_j != Teuchos::ScalarTraits<Scalar>::one ()) {
01730           KMV MV_j = lclMV_.offsetViewNonConst (lclMV_.getNumRows (), 1, 0, curCol);
01731           MVT::Scale (MV_j, alpha_j);
01732         }
01733       }
01734     } else { // not constant stride
01735       for (size_t j = 0; j < numVecs; ++j) {
01736         const Scalar alpha_j = alphas[j];
01737         const size_t curCol = whichVectors_[j];
01738 
01739         // If alphas[j] is zero, don't scale; just fill with zeros.
01740         // This follows the BLAS convention.  (zero*NaN is NaN.)
01741         //
01742         // Similarly, if alphas[j] is one, don't scale; just do nothing.
01743         // This follows the BLAS convention.  (one*NaN is NaN.)
01744         if (alpha_j == Teuchos::ScalarTraits<Scalar>::zero ()) {
01745           KMV MV_j = lclMV_.offsetViewNonConst (lclMV_.getNumRows (), 1, 0, curCol);
01746           MVT::Init (MV_j, alpha_j);
01747         }
01748         else if (alpha_j != Teuchos::ScalarTraits<Scalar>::one ()) {
01749           KMV MV_j = lclMV_.offsetViewNonConst (lclMV_.getNumRows (), 1, 0, curCol);
01750           MVT::Scale (MV_j, alpha_j);
01751         }
01752       }
01753     }
01754 
01755     // KMV vec(MVT::getNode(lclMV_));
01756     // const size_t myLen = MVT::getNumRows(lclMV_);
01757     // if (myLen == 0) return;
01758     // ArrayRCP<Scalar> mybuf = MVT::getValuesNonConst(lclMV_);
01759     // for (size_t j = 0; j < numVecs; ++j) {
01760     //   if (alphas[j] == Teuchos::ScalarTraits<Scalar>::one()) {
01761     //     // do nothing: NaN * 1.0 == NaN, Number*1.0 == Number
01762     //   }
01763     //   else {
01764     //     ArrayRCP<Scalar> mybufj = getSubArrayRCP(mybuf,j);
01765     //     MVT::initializeValues(vec,myLen,1,mybufj,myLen);
01766     //     MVT::Scale(vec,alphas[j]);
01767     //   }
01768     // }
01769   }
01770 
01771 
01772   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01773   void
01774   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01775   scale (const Scalar &alpha, const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &A)
01776   {
01777     using Teuchos::arcp_const_cast;
01778     using Teuchos::ArrayRCP;
01779 
01780     const char tfecfFuncName[] = "scale(alpha,A)";
01781 
01782     const size_t numVecs = getNumVectors(),
01783                  myLen   = getLocalLength();
01784 #ifdef HAVE_TPETRA_DEBUG
01785     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(! this->getMap ()->isCompatible (*A.getMap ()),
01786       std::runtime_error, ": MultiVectors do not have compatible Maps:" << std::endl
01787         << "this->getMap(): " << std::endl << *(this->getMap ())
01788         << "A.getMap(): " << std::endl << *(A.getMap ()) << std::endl);
01789 #else
01790     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( getLocalLength() != A.getLocalLength(), std::runtime_error,
01791       ": MultiVectors do not have the same local length.");
01792 #endif
01793     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(A.getNumVectors() != numVecs, std::runtime_error,
01794       ": MultiVectors must have the same number of vectors.");
01795     if (isConstantStride() && A.isConstantStride()) {
01796       // set me == alpha*A
01797       MVT::Scale(lclMV_,alpha,(const KMV&)A.lclMV_);
01798     }
01799     else {
01800       KMV v(MVT::getNode(lclMV_)), a(MVT::getNode(lclMV_));
01801       ArrayRCP<Scalar> vptr  = MVT::getValuesNonConst (lclMV_);
01802       ArrayRCP<Scalar> avptr = arcp_const_cast<Scalar> (MVT::getValues (A.lclMV_));
01803       for (size_t j=0; j < numVecs; ++j) {
01804         ArrayRCP<Scalar>  vj =   getSubArrayRCP (vptr,  j);
01805         ArrayRCP<Scalar> avj = A.getSubArrayRCP (avptr, j);
01806         MVT::initializeValues (a,myLen, 1, avj, myLen);
01807         MVT::initializeValues (v,myLen, 1,  vj, myLen);
01808         MVT::Scale (v, alpha, (const KMV &)a);
01809       }
01810     }
01811   }
01812 
01813 
01814   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01815   void
01816   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01817   reciprocal (const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &A)
01818   {
01819     using Teuchos::arcp_const_cast;
01820     using Teuchos::ArrayRCP;
01821 
01822     const char tfecfFuncName[] = "reciprocal()";
01823 
01824 #ifdef HAVE_TPETRA_DEBUG
01825     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( !this->getMap()->isCompatible(*A.getMap()), std::runtime_error,
01826         ": MultiVectors do not have compatible Maps:" << std::endl
01827         << "this->getMap(): " << std::endl << *this->getMap()
01828         << "A.getMap(): " << std::endl << *A.getMap() << std::endl);
01829 #else
01830     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( getLocalLength() != A.getLocalLength(), std::runtime_error,
01831         ": MultiVectors do not have the same local length.");
01832 #endif
01833     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(A.getNumVectors() != this->getNumVectors(), std::runtime_error,
01834         ": MultiVectors must have the same number of vectors.");
01835     const size_t numVecs = getNumVectors();
01836     const size_t myLen = getLocalLength();
01837     try {
01838       if (isConstantStride() && A.isConstantStride()) {
01839         MVT::Recip(lclMV_,(const KMV&)A.lclMV_);
01840       }
01841       else {
01842         KMV v(MVT::getNode(lclMV_)), a(MVT::getNode(lclMV_));
01843         ArrayRCP<Scalar> vptr = MVT::getValuesNonConst(lclMV_),
01844                         avptr = arcp_const_cast<Scalar>(MVT::getValues(A.lclMV_));
01845         for (size_t j=0; j < numVecs; ++j) {
01846           ArrayRCP<Scalar> vj =   getSubArrayRCP( vptr,j),
01847                           avj = A.getSubArrayRCP(avptr,j);
01848           MVT::initializeValues(a,myLen, 1, avj, myLen);
01849           MVT::initializeValues(v,myLen, 1,  vj, myLen);
01850           MVT::Recip(v,(const KMV &)a);
01851         }
01852       }
01853     }
01854     catch (std::runtime_error &e) {
01855       TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true,std::runtime_error,
01856           ": caught exception from Kokkos:" << std::endl
01857           << e.what() << std::endl);
01858     }
01859   }
01860 
01861 
01862   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01863   void MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::abs(const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &A) {
01864     using Teuchos::arcp_const_cast;
01865     using Teuchos::ArrayRCP;
01866 
01867     const char tfecfFuncName[] = "abs()";
01868 #ifdef HAVE_TPETRA_DEBUG
01869     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( !this->getMap()->isCompatible(*A.getMap()), std::runtime_error,
01870         ": MultiVectors do not have compatible Maps:" << std::endl
01871         << "this->getMap(): " << std::endl << *this->getMap()
01872         << "A.getMap(): " << std::endl << *A.getMap() << std::endl);
01873 #else
01874     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( getLocalLength() != A.getLocalLength(), std::runtime_error,
01875         ": MultiVectors do not have the same local length.");
01876 #endif
01877     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(A.getNumVectors() != this->getNumVectors(), std::runtime_error,
01878         ": MultiVectors must have the same number of vectors.");
01879     const size_t myLen = getLocalLength();
01880     const size_t numVecs = getNumVectors();
01881     if (isConstantStride() && A.isConstantStride()) {
01882       MVT::Abs(lclMV_,(const KMV&)A.lclMV_);
01883     }
01884     else {
01885       KMV v(MVT::getNode(lclMV_)), a(MVT::getNode(lclMV_));
01886       ArrayRCP<Scalar> vptr = MVT::getValuesNonConst(lclMV_),
01887                       avptr = arcp_const_cast<Scalar>(MVT::getValues(A.lclMV_));
01888       for (size_t j=0; j < numVecs; ++j) {
01889         ArrayRCP<Scalar> vj =   getSubArrayRCP( vptr,j),
01890                         avj = A.getSubArrayRCP(avptr,j);
01891         MVT::initializeValues(a,myLen, 1, avj, myLen);
01892         MVT::initializeValues(v,myLen, 1,  vj, myLen);
01893         MVT::Abs(v,(const KMV &)a);
01894       }
01895     }
01896   }
01897 
01898 
01899   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01900   void MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::update(
01901                       const Scalar &alpha, const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &A,
01902                       const Scalar &beta)
01903   {
01904     using Teuchos::arcp_const_cast;
01905     using Teuchos::ArrayRCP;
01906 
01907     const char tfecfFuncName[] = "update()";
01908     // this = beta*this + alpha*A
01909     // must support case where &this == &A
01910     // can't short circuit on alpha==0.0 or beta==0.0, because 0.0*NaN != 0.0
01911 #ifdef HAVE_TPETRA_DEBUG
01912     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( !this->getMap()->isCompatible(*A.getMap()), std::runtime_error,
01913         ": MultiVectors do not have compatible Maps:" << std::endl
01914         << "this->getMap(): " << std::endl << this->getMap()
01915         << "A.getMap(): " << std::endl << *A.getMap() << std::endl);
01916 #else
01917     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( getLocalLength() != A.getLocalLength(), std::runtime_error,
01918         ": MultiVectors do not have the same local length.");
01919 #endif
01920     const size_t myLen = getLocalLength();
01921     const size_t numVecs = getNumVectors();
01922     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(A.getNumVectors() != this->getNumVectors(), std::runtime_error,
01923         ": MultiVectors must have the same number of vectors.");
01924     if (isConstantStride() && A.isConstantStride()) {
01925       MVT::GESUM(lclMV_,alpha,(const KMV&)A.lclMV_,beta);
01926     }
01927     else {
01928       KMV v(MVT::getNode(lclMV_)), a(MVT::getNode(lclMV_));
01929       ArrayRCP<Scalar> vptr = MVT::getValuesNonConst(lclMV_),
01930                       avptr = arcp_const_cast<Scalar>(MVT::getValues(A.lclMV_));
01931       for (size_t j=0; j < numVecs; ++j) {
01932         ArrayRCP<Scalar> vj =   getSubArrayRCP( vptr,j),
01933                         avj = A.getSubArrayRCP(avptr,j);
01934         MVT::initializeValues(a,myLen, 1, avj, myLen);
01935         MVT::initializeValues(v,myLen, 1,  vj, myLen);
01936         MVT::GESUM(v,alpha,(const KMV &)a,beta);
01937       }
01938     }
01939   }
01940 
01941 
01942   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01943   void MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::update(
01944                       const Scalar &alpha, const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &A,
01945                       const Scalar &beta,  const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &B,
01946                       const Scalar &gamma)
01947   {
01948     using Teuchos::arcp_const_cast;
01949     using Teuchos::ArrayRCP;
01950 
01951     const char tfecfFuncName[] = "update()";
01952     // this = alpha*A + beta*B + gamma*this
01953     // must support case where &this == &A or &this == &B
01954     // can't short circuit on alpha==0.0 or beta==0.0 or gamma==0.0, because 0.0*NaN != 0.0
01955 #ifdef HAVE_TPETRA_DEBUG
01956     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( !this->getMap()->isCompatible(*A.getMap()) || !this->getMap()->isCompatible(*B.getMap()),
01957         std::runtime_error,
01958         ": MultiVectors do not have compatible Maps:" << std::endl
01959         << "this->getMap(): " << std::endl << *this->getMap()
01960         << "A.getMap(): " << std::endl << *A.getMap() << std::endl
01961         << "B.getMap(): " << std::endl << *B.getMap() << std::endl);
01962 #else
01963     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( getLocalLength() != A.getLocalLength() || getLocalLength() != B.getLocalLength(), std::runtime_error,
01964         ": MultiVectors do not have the same local length.");
01965 #endif
01966     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(A.getNumVectors() != this->getNumVectors() || B.getNumVectors() != this->getNumVectors(), std::runtime_error,
01967         ": MultiVectors must have the same number of vectors.");
01968     const size_t myLen = getLocalLength();
01969     const size_t numVecs = getNumVectors();
01970     if (isConstantStride() && A.isConstantStride() && B.isConstantStride()) {
01971       MVT::GESUM(lclMV_,alpha,(const KMV&)A.lclMV_,beta,(const KMV&)B.lclMV_,gamma);
01972     }
01973     else {
01974       KMV v(MVT::getNode(lclMV_)), a(MVT::getNode(lclMV_)), b(MVT::getNode(lclMV_));
01975       ArrayRCP<Scalar> vptr = MVT::getValuesNonConst(lclMV_),
01976                       avptr = arcp_const_cast<Scalar>(MVT::getValues(A.lclMV_)),
01977                       bvptr = arcp_const_cast<Scalar>(MVT::getValues(B.lclMV_));
01978       for (size_t j=0; j < numVecs; ++j) {
01979         ArrayRCP<Scalar> vj =   getSubArrayRCP( vptr,j),
01980                         avj = A.getSubArrayRCP(avptr,j),
01981                         bvj = B.getSubArrayRCP(bvptr,j);
01982         MVT::initializeValues(b,myLen, 1, bvj, myLen);
01983         MVT::initializeValues(a,myLen, 1, avj, myLen);
01984         MVT::initializeValues(v,myLen, 1,  vj, myLen);
01985         MVT::GESUM(v,alpha,(const KMV&)a,beta,(const KMV&)b,gamma);
01986       }
01987     }
01988   }
01989 
01990 
01991   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01992   Teuchos::ArrayRCP<const Scalar>
01993   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01994   getData (size_t j) const
01995   {
01996     Teuchos::RCP<Node> node = MVT::getNode(lclMV_);
01997     return node->template viewBuffer<Scalar> (getLocalLength (), getSubArrayRCP (MVT::getValues(lclMV_), j));
01998   }
01999 
02000 
02001   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02002   Teuchos::ArrayRCP<Scalar>
02003   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
02004   getDataNonConst(size_t j)
02005   {
02006     Teuchos::RCP<Node> node = MVT::getNode(lclMV_);
02007     return node->template viewBufferNonConst<Scalar> (KokkosClassic::ReadWrite, getLocalLength (), getSubArrayRCP (MVT::getValuesNonConst (lclMV_), j));
02008   }
02009 
02010 
02011   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02012   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>&
02013   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
02014   operator= (const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& source)
02015   {
02016     if (source.hasViewSemantics_) {
02017       this->map_ = source.map_; // "op=" for DistObject
02018       lclMV_ = source.lclMV_; // Shallow copy
02019 
02020       // Don't forget to copy whichVectors_ (for non-constant-stride view).
02021       if (source.whichVectors_.size () > 0) {
02022         whichVectors_ = source.whichVectors_; // Array::op= does a deep copy
02023       }
02024       // View semantics are "contagious" for the "classic" version of
02025       // MultiVector.  That is, the right-hand side of the assignment
02026       // transmits view semantics to the left-hand side.  (The Kokkos
02027       // refactor version of MultiVector always has view semantics.)
02028       hasViewSemantics_ = true;
02029     }
02030     else {
02031 #ifdef HAVE_TPETRA_DEBUG
02032       using Teuchos::outArg;
02033       using Teuchos::REDUCE_MIN;
02034       using Teuchos::reduceAll;
02035       using std::endl;
02036 #endif // HAVE_TPETRA_DEBUG
02037 
02038       const char tfecfFuncName[] = "operator=";
02039       // Check for special case of this=Source, in which case we do nothing.
02040       if (this != &source) {
02041         // Whether the input and *this are compatible on the calling process.
02042         const int locallyCompat =
02043           (this->getLocalLength () == source.getLocalLength ()) ? 1 : 0;
02044 #ifdef HAVE_TPETRA_DEBUG
02045         int globallyCompat = 1; // innocent until proven guilty
02046         reduceAll<int, int> (* (this->getMap ()->getComm ()), REDUCE_MIN,
02047                              locallyCompat, outArg (globallyCompat));
02048         TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
02049           globallyCompat == 0, std::invalid_argument,
02050           ": MultiVectors do not have the same local length on all processes.");
02051 #else
02052         TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
02053           locallyCompat == 0, std::invalid_argument,
02054           ": MultiVectors do not have the same local length on the calling "
02055           "process " << this->getMap ()->getComm ()->getSize () << ".  *this "
02056           "has " << this->getLocalLength () << " local rows, and source has "
02057           << source.getLocalLength () << " rows.");
02058 #endif // HAVE_TPETRA_DEBUG
02059 
02060         TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
02061           source.getNumVectors() != getNumVectors(), std::invalid_argument,
02062           ": MultiVectors must have the same number of vectors.");
02063 
02064         Teuchos::RCP<Node> node = MVT::getNode (lclMV_);
02065         const size_t numVecs = getNumVectors();
02066         if (isConstantStride () && source.isConstantStride () &&
02067             getLocalLength () == getStride () &&
02068             source.getLocalLength ()== source.getStride ()) {
02069           // Both multivectors' data are stored contiguously, so we can
02070           // copy in one call.
02071           node->template copyBuffers<Scalar> (getLocalLength () * numVecs,
02072                                               MVT::getValues (source.lclMV_),
02073                                               MVT::getValuesNonConst (lclMV_));
02074         }
02075         else {
02076           // We have to copy the columns one at a time.
02077           for (size_t j=0; j < numVecs; ++j) {
02078             node->template copyBuffers<Scalar> (getLocalLength (),
02079                                                 source.getSubArrayRCP (MVT::getValues (source.lclMV_), j),
02080                                                 getSubArrayRCP (MVT::getValuesNonConst(lclMV_), j));
02081           }
02082         }
02083       }
02084     }
02085     return *this;
02086   }
02087 
02088 
02089   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02090   Teuchos::RCP<MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> >
02091   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
02092   subCopy (const Teuchos::ArrayView<const size_t> &cols) const
02093   {
02094     using Teuchos::RCP;
02095     using Teuchos::rcp;
02096 
02097     TEUCHOS_TEST_FOR_EXCEPTION(cols.size() < 1, std::runtime_error,
02098         "Tpetra::MultiVector::subCopy(cols): cols must contain at least one column.");
02099     size_t numCopyVecs = cols.size();
02100     const bool zeroData = false;
02101     RCP<Node> node = MVT::getNode(lclMV_);
02102     RCP<MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> > mv;
02103     // mv is allocated with constant stride
02104     mv = rcp (new MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> (this->getMap (), numCopyVecs, zeroData));
02105     // copy data from *this into mv
02106     for (size_t j=0; j<numCopyVecs; ++j) {
02107       node->template copyBuffers<Scalar> (getLocalLength (),
02108                                           getSubArrayRCP (MVT::getValues (lclMV_), cols[j]),
02109                                           MVT::getValuesNonConst (mv->lclMV_, j));
02110     }
02111     return mv;
02112   }
02113 
02114 
02115   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02116   Teuchos::RCP<MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> >
02117   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
02118   subCopy (const Teuchos::Range1D &colRng) const
02119   {
02120     using Teuchos::RCP;
02121     using Teuchos::rcp;
02122 
02123     TEUCHOS_TEST_FOR_EXCEPTION(colRng.size() == 0, std::runtime_error,
02124         "Tpetra::MultiVector::subCopy(Range1D): range must include at least one vector.");
02125     size_t numCopyVecs = colRng.size();
02126     const bool zeroData = false;
02127     RCP<Node> node = MVT::getNode(lclMV_);
02128     RCP<MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> > mv;
02129     // mv is allocated with constant stride
02130     mv = rcp (new MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> (this->getMap (), numCopyVecs, zeroData));
02131     // copy data from *this into mv
02132     for (size_t js=colRng.lbound(), jd=0; jd<numCopyVecs; ++jd, ++js) {
02133       node->template copyBuffers<Scalar> (getLocalLength (),
02134                                           getSubArrayRCP (MVT::getValues (lclMV_), js),
02135                                           MVT::getValuesNonConst (mv->lclMV_, jd));
02136     }
02137     return mv;
02138   }
02139 
02140 
02141   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02142   Teuchos::RCP<const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> >
02143   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
02144   offsetView (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> >& subMap,
02145               size_t offset) const
02146   {
02147     using Teuchos::RCP;
02148     using Teuchos::rcp;
02149     typedef MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> MV;
02150 
02151     const size_t newNumRows = subMap->getNodeNumElements();
02152     const bool tooManyElts = newNumRows + offset > lclMV_.getOrigNumRows ();
02153     if (tooManyElts) {
02154       const int myRank = this->getMap ()->getComm ()->getRank ();
02155       TEUCHOS_TEST_FOR_EXCEPTION(
02156         newNumRows + offset > MVT::getNumRows (lclMV_),
02157         std::runtime_error,
02158         "Tpetra::MultiVector::offsetView: Invalid input Map.  Input Map owns "
02159         << subMap->getNodeNumElements() << " elements on process " << myRank
02160         << ".  offset = " << offset << ".  Yet, the MultiVector contains only "
02161         << lclMV_.getOrigNumRows () << " on this process.");
02162     }
02163     RCP<const MV> constViewMV;
02164     if (isConstantStride()) {
02165       KokkosClassic::MultiVector<Scalar, Node> newLocalMV =
02166         lclMV_.offsetView (newNumRows, lclMV_.getNumCols (), offset, 0);
02167       constViewMV = rcp (new MV (subMap, newLocalMV, COMPUTE_VIEW_CONSTRUCTOR));
02168     }
02169     else {
02170       // Compute the max column being viewed.  This tells us where to stop the view.
02171       const size_t maxCol = *std::max_element (whichVectors_.begin(), whichVectors_.end());
02172       KokkosClassic::MultiVector<Scalar, Node> newLocalMV =
02173         lclMV_.offsetView (newNumRows, maxCol+1, offset, 0);
02174       constViewMV = rcp (new MV (subMap, newLocalMV, whichVectors_, COMPUTE_VIEW_CONSTRUCTOR));
02175     }
02176     return constViewMV;
02177   }
02178 
02179 
02180   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02181   Teuchos::RCP<MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> >
02182   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
02183   offsetViewNonConst (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> >& subMap,
02184                       size_t offset)
02185   {
02186     using Teuchos::RCP;
02187     using Teuchos::rcp;
02188     typedef MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> MV;
02189 
02190     const size_t newNumRows = subMap->getNodeNumElements();
02191     const bool tooManyElts = newNumRows + offset > lclMV_.getOrigNumRows ();
02192     if (tooManyElts) {
02193       const int myRank = this->getMap ()->getComm ()->getRank ();
02194       TEUCHOS_TEST_FOR_EXCEPTION(
02195         newNumRows + offset > MVT::getNumRows (lclMV_),
02196         std::runtime_error,
02197         "Tpetra::MultiVector::offsetViewNonconst: Invalid input Map.  Input Map owns "
02198         << subMap->getNodeNumElements() << " elements on process " << myRank
02199         << ".  offset = " << offset << ".  Yet, the MultiVector contains only "
02200         << lclMV_.getOrigNumRows () << " on this process.");
02201     }
02202     RCP<MV> subViewMV;
02203     if (isConstantStride()) {
02204       KokkosClassic::MultiVector<Scalar, Node> newLocalMV =
02205         lclMV_.offsetViewNonConst (newNumRows, lclMV_.getNumCols (), offset, 0);
02206       subViewMV = rcp (new MV (subMap, newLocalMV, COMPUTE_VIEW_CONSTRUCTOR));
02207     }
02208     else {
02209       // Compute the max column being viewed.  This tells us where to stop the view.
02210       const size_t maxCol = *std::max_element (whichVectors_.begin(), whichVectors_.end());
02211       KokkosClassic::MultiVector<Scalar, Node> newLocalMV =
02212         lclMV_.offsetViewNonConst (newNumRows, maxCol+1, offset, 0);
02213       subViewMV = rcp (new MV (subMap, newLocalMV, whichVectors_, COMPUTE_VIEW_CONSTRUCTOR));
02214     }
02215     return subViewMV;
02216   }
02217 
02218 
02219   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02220   Teuchos::RCP<const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> >
02221   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
02222   subView (const ArrayView<const size_t> &cols) const
02223   {
02224     using Teuchos::arcp_const_cast;
02225     using Teuchos::Array;
02226     using Teuchos::ArrayRCP;
02227     using Teuchos::RCP;
02228     using Teuchos::rcp;
02229     using Teuchos::rcp_const_cast;
02230     typedef MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> MV;
02231 
02232     TEUCHOS_TEST_FOR_EXCEPTION(cols.size() == 0, std::runtime_error,
02233       "Tpetra::MultiVector::subView(ArrayView): range must include at least one vector.");
02234     // this is const, so the lclMV_ is const, so that we can only get const buffers
02235     // we will cast away the const; this is okay, because
02236     //   a) the constructor doesn't modify the data, and
02237     //   b) we are encapsulating in a const MV before returning
02238     const size_t myStride = MVT::getStride(lclMV_),
02239                  myLen    = MVT::getNumRows(lclMV_),
02240               numViewCols = cols.size();
02241     // use the smallest view possible of the buffer: from the first element of the minInd vector to the last element of the maxInd vector
02242     // this minimizes overlap between views, and keeps view of the minimum amount necessary, in order to allow node to achieve maximum efficiency.
02243     // adjust the indices appropriately; shift so that smallest index is 0
02244     ArrayRCP<const Scalar> cbuf = MVT::getValues(lclMV_);
02245     ArrayRCP<Scalar>      ncbuf = arcp_const_cast<Scalar> (cbuf);
02246     Array<size_t> newCols (numViewCols);
02247     size_t minInd = Teuchos::OrdinalTraits<size_t>::max();
02248     size_t maxInd = Teuchos::OrdinalTraits<size_t>::zero();
02249     if (isConstantStride()) {
02250       for (size_t j=0; j < numViewCols; ++j) {
02251         newCols[j] = cols[j];
02252         if (newCols[j] < minInd) minInd = newCols[j];
02253         if (maxInd < newCols[j]) maxInd = newCols[j];
02254       }
02255     }
02256     else {
02257       for (size_t j=0; j < numViewCols; ++j) {
02258         newCols[j] = whichVectors_[cols[j]];
02259         if (newCols[j] < minInd) minInd = newCols[j];
02260         if (maxInd < newCols[j]) maxInd = newCols[j];
02261       }
02262     }
02263     ArrayRCP<Scalar> minbuf = ncbuf.persistingView(minInd * myStride, myStride * (maxInd - minInd) + myLen);
02264     for (size_t j=0; j < numViewCols; ++j) {
02265       newCols[j] -= minInd;
02266     }
02267     RCP<const MV> constViewMV;
02268     return rcp_const_cast<const MV> (rcp (new MV (this->getMap (), minbuf, myStride, newCols (), COMPUTE_VIEW_CONSTRUCTOR)));
02269   }
02270 
02271 
02272   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02273   Teuchos::RCP<const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> >
02274   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
02275   subView (const Teuchos::Range1D &colRng) const
02276   {
02277     using Teuchos::arcp_const_cast;
02278     using Teuchos::Array;
02279     using Teuchos::ArrayRCP;
02280     using Teuchos::RCP;
02281     using Teuchos::rcp;
02282     typedef MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> MV;
02283 
02284     TEUCHOS_TEST_FOR_EXCEPTION(colRng.size() == 0, std::runtime_error,
02285       "Tpetra::MultiVector::subView(Range1D): range must include at least one vector.");
02286     size_t numViewVecs = colRng.size();
02287     // this is const, so the lclMV_ is const, so that we can only get const buffers
02288     // we will cast away the const; this is okay, because
02289     //   a) the constructor doesn't modify the data, and
02290     //   b) we are encapsulating in a const MV before returning
02291     ArrayRCP<const Scalar> cbuf = MVT::getValues(lclMV_);
02292     ArrayRCP<Scalar>      ncbuf = arcp_const_cast<Scalar>(cbuf);
02293     // resulting MultiVector is constant stride only if *this is
02294     if (isConstantStride()) {
02295       // view goes from first entry of first vector to last entry of last vector
02296       ArrayRCP<Scalar> subdata = ncbuf.persistingView( MVT::getStride(lclMV_) * colRng.lbound(),
02297                                                        MVT::getStride(lclMV_) * (numViewVecs-1) + getLocalLength() );
02298       return rcp (new MV (this->getMap (), subdata, MVT::getStride (lclMV_), numViewVecs, COMPUTE_VIEW_CONSTRUCTOR));
02299     }
02300     // otherwise, use a subset of this whichVectors_ to construct new multivector
02301     Array<size_t> whchvecs( whichVectors_.begin()+colRng.lbound(), whichVectors_.begin()+colRng.ubound()+1 );
02302     return rcp (new MV (this->getMap (), ncbuf, MVT::getStride (lclMV_), whchvecs, COMPUTE_VIEW_CONSTRUCTOR));
02303   }
02304 
02305 
02306   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02307   Teuchos::RCP<MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> >
02308   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
02309   subViewNonConst (const ArrayView<const size_t> &cols)
02310   {
02311     using Teuchos::arcp_const_cast;
02312     using Teuchos::Array;
02313     using Teuchos::ArrayRCP;
02314     using Teuchos::RCP;
02315     using Teuchos::rcp;
02316     typedef MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> MV;
02317 
02318     TEUCHOS_TEST_FOR_EXCEPTION(cols.size() == 0, std::runtime_error,
02319       "Tpetra::MultiVector::subViewNonConst(ArrayView): range must include at least one vector.");
02320     if (isConstantStride()) {
02321       return rcp (new MV (this->getMap (), MVT::getValuesNonConst (lclMV_), MVT::getStride (lclMV_), cols, COMPUTE_VIEW_CONSTRUCTOR));
02322     }
02323     // else, lookup current whichVectors_ using cols
02324     Array<size_t> newcols(cols.size());
02325     for (size_t j = 0; j < static_cast<size_t> (cols.size ()); ++j) {
02326       newcols[j] = whichVectors_[cols[j]];
02327     }
02328     return rcp (new MV (this->getMap (), MVT::getValuesNonConst (lclMV_), MVT::getStride (lclMV_), newcols (), COMPUTE_VIEW_CONSTRUCTOR));
02329   }
02330 
02331 
02332   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02333   Teuchos::RCP<MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> >
02334   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
02335   subViewNonConst (const Teuchos::Range1D &colRng)
02336   {
02337     using Teuchos::arcp_const_cast;
02338     using Teuchos::Array;
02339     using Teuchos::ArrayRCP;
02340     using Teuchos::RCP;
02341     using Teuchos::rcp;
02342     typedef MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> MV;
02343 
02344     TEUCHOS_TEST_FOR_EXCEPTION(colRng.size() == 0, std::runtime_error,
02345       "Tpetra::MultiVector::subViewNonConst(Range1D): range must include at least one vector.");
02346     size_t numViewVecs = colRng.size();
02347     // resulting MultiVector is constant stride only if *this is
02348     if (isConstantStride ()) {
02349       // view goes from first entry of first vector to last entry of last vector
02350       const size_t stride = MVT::getStride(lclMV_);
02351       ArrayRCP<Scalar> data = MVT::getValuesNonConst(lclMV_);
02352       ArrayRCP<Scalar> subdata = data.persistingView( stride * colRng.lbound(),
02353                                                       stride * (numViewVecs-1) + getLocalLength() );
02354       return rcp (new MV (this->getMap (), subdata, stride, numViewVecs, COMPUTE_VIEW_CONSTRUCTOR));
02355     }
02356     // otherwise, use a subset of this whichVectors_ to construct new multivector
02357     Array<size_t> whchvecs( whichVectors_.begin()+colRng.lbound(), whichVectors_.begin()+colRng.ubound()+1 );
02358     const size_t stride = MVT::getStride(lclMV_);
02359     ArrayRCP<Scalar> data = MVT::getValuesNonConst(lclMV_);
02360     return rcp (new MV (this->getMap (), data, stride, whchvecs, COMPUTE_VIEW_CONSTRUCTOR));
02361   }
02362 
02363 
02364   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02365   Teuchos::RCP<const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node> >
02366   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
02367   getVector (size_t j) const
02368   {
02369     using Teuchos::arcp_const_cast;
02370     using Teuchos::ArrayRCP;
02371     using Teuchos::rcp;
02372     using Teuchos::rcp_const_cast;
02373     typedef Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node> V;
02374 
02375 #ifdef HAVE_TPETRA_DEBUG
02376     TEUCHOS_TEST_FOR_EXCEPTION( vectorIndexOutOfRange(j), std::runtime_error,
02377         "Tpetra::MultiVector::getVector(j): index j (== " << j << ") exceeds valid column range for this multivector.");
02378 #endif
02379     // this is const, so lclMV_ is const, so we get const buff
02380     // it is safe to cast away the const because we will wrap it in a const Vector below
02381     ArrayRCP<Scalar> ncbuff;
02382     if (getLocalLength() > 0) {
02383       ArrayRCP<const Scalar> cbuff = getSubArrayRCP(MVT::getValues(lclMV_),j);
02384       ncbuff = arcp_const_cast<Scalar>(cbuff);
02385     }
02386     return rcp_const_cast<const V> (rcp (new V (this->getMap (), ncbuff, COMPUTE_VIEW_CONSTRUCTOR)));
02387   }
02388 
02389 
02390   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02391   Teuchos::RCP<Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node> >
02392   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getVectorNonConst(size_t j)
02393   {
02394     using Teuchos::ArrayRCP;
02395     using Teuchos::rcp;
02396     typedef Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node> V;
02397 
02398 #ifdef HAVE_TPETRA_DEBUG
02399     TEUCHOS_TEST_FOR_EXCEPTION( vectorIndexOutOfRange(j), std::runtime_error,
02400         "Tpetra::MultiVector::getVectorNonConst(j): index j (== " << j << ") exceeds valid column range for this multivector.");
02401 #endif
02402     ArrayRCP<Scalar> ncbuff;
02403     if (getLocalLength() > 0) {
02404       ncbuff = getSubArrayRCP (MVT::getValuesNonConst (lclMV_), j);
02405     }
02406     return rcp (new V (this->getMap (), ncbuff, COMPUTE_VIEW_CONSTRUCTOR));
02407   }
02408 
02409 
02410   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02411   void
02412   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
02413   get1dCopy (Teuchos::ArrayView<Scalar> A, size_t LDA) const
02414   {
02415     using Teuchos::ArrayRCP;
02416     using Teuchos::ArrayView;
02417     using Teuchos::RCP;
02418     using Teuchos::rcp;
02419 
02420     const char tfecfFuncName[] = "get1dCopy(A,LDA)";
02421     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(LDA < getLocalLength(), std::runtime_error,
02422       ": specified stride is not large enough for local vector length.");
02423     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
02424       static_cast<size_t> (A.size ()) < LDA * (getNumVectors() - 1) + getLocalLength (),
02425       std::runtime_error, ": specified stride/storage is not large enough for "
02426       "the number of vectors.");
02427     RCP<Node> node = MVT::getNode(lclMV_);
02428     const size_t myStride = MVT::getStride(lclMV_),
02429                   numCols = getNumVectors(),
02430                   myLen   = getLocalLength();
02431     if (myLen > 0) {
02432       ArrayRCP<const Scalar> mydata = MVT::getValues(lclMV_);
02433       ArrayRCP<const Scalar> myview = node->template viewBuffer<Scalar>(myStride*(numCols-1)+myLen,mydata);
02434       typename ArrayView<Scalar>::iterator Aptr = A.begin();
02435       for (size_t j=0; j<numCols; j++) {
02436         ArrayRCP<const Scalar> myviewj = getSubArrayRCP(myview,j);
02437         std::copy(myviewj,myviewj+myLen,Aptr);
02438         Aptr += LDA;
02439       }
02440       myview = Teuchos::null;
02441       mydata = Teuchos::null;
02442     }
02443   }
02444 
02445 
02446   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02447   void
02448   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
02449   get2dCopy (Teuchos::ArrayView<const Teuchos::ArrayView<Scalar> > ArrayOfPtrs) const
02450   {
02451     using Teuchos::ArrayRCP;
02452     using Teuchos::ArrayView;
02453     using Teuchos::RCP;
02454     using Teuchos::rcp;
02455 
02456     const char tfecfFuncName[] = "get2dCopy(ArrayOfPtrs)";
02457     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
02458       static_cast<size_t> (ArrayOfPtrs.size ()) != getNumVectors(), std::runtime_error,
02459       ": Array of pointers must contain as many pointers as the MultiVector has rows.");
02460     RCP<Node> node = MVT::getNode(lclMV_);
02461     const size_t numCols = getNumVectors(),
02462                    myLen = getLocalLength();
02463     if (myLen > 0) {
02464       ArrayRCP<const Scalar> mybuff = MVT::getValues(lclMV_);
02465       ArrayRCP<const Scalar> myview = node->template viewBuffer<Scalar>(mybuff.size(), mybuff);
02466       for (size_t j=0; j<numCols; ++j) {
02467 #ifdef HAVE_TPETRA_DEBUG
02468         TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
02469           static_cast<size_t> (ArrayOfPtrs[j].size ()) != getLocalLength (),
02470           std::runtime_error, ": The ArrayView provided in ArrayOfPtrs[" << j
02471           << "] was not large enough to contain the local entries.");
02472 #endif
02473         ArrayRCP<const Scalar> myviewj = getSubArrayRCP(myview,j);
02474         std::copy(myviewj,myviewj+myLen,ArrayOfPtrs[j].begin());
02475       }
02476       myview = Teuchos::null;
02477       mybuff = Teuchos::null;
02478     }
02479   }
02480 
02481 
02482   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02483   Teuchos::ArrayRCP<const Scalar>
02484   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::get1dView () const
02485   {
02486     if (getLocalLength () == 0 || getNumVectors () == 0) {
02487       return Teuchos::null;
02488     } else {
02489       TEUCHOS_TEST_FOR_EXCEPTION(
02490         ! isConstantStride (), std::runtime_error, "Tpetra::MultiVector::"
02491         "get1dView: This MultiVector does not have constant stride, so it is "
02492         "not possible to view its data as a single array.  You may check "
02493         "whether a MultiVector has constant stride by calling "
02494         "isConstantStride().");
02495       Teuchos::RCP<Node> node = MVT::getNode (lclMV_);
02496       return node->template viewBuffer<Scalar> (getStride () * (getNumVectors () - 1) +
02497                                                 getLocalLength (),
02498                                                 MVT::getValues (lclMV_));
02499     }
02500   }
02501 
02502 
02503   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02504   Teuchos::ArrayRCP<Scalar>
02505   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::get1dViewNonConst ()
02506   {
02507     if (getLocalLength () == 0 || getNumVectors () == 0) {
02508       return Teuchos::null;
02509     } else {
02510       TEUCHOS_TEST_FOR_EXCEPTION(
02511         ! isConstantStride (), std::runtime_error, "Tpetra::MultiVector::"
02512         "get1dViewNonConst: This MultiVector does not have constant stride, so "
02513         "it is not possible to view its data as a single array.  You may check "
02514         "whether a MultiVector has constant stride by calling "
02515         "isConstantStride().");
02516       Teuchos::RCP<Node> node = MVT::getNode (lclMV_);
02517       return node->template viewBufferNonConst<Scalar> (KokkosClassic::ReadWrite,
02518                                                         getStride () * (getNumVectors () - 1) +
02519                                                         getLocalLength (),
02520                                                         MVT::getValuesNonConst (lclMV_));
02521     }
02522   }
02523 
02524 
02525   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02526   Teuchos::ArrayRCP<Teuchos::ArrayRCP<Scalar> >
02527   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::get2dViewNonConst()
02528   {
02529     using Teuchos::arcp;
02530     using Teuchos::ArrayRCP;
02531     using Teuchos::RCP;
02532 
02533     RCP<Node> node = MVT::getNode(lclMV_);
02534     ArrayRCP<ArrayRCP<Scalar> > views = arcp<ArrayRCP<Scalar> > (getNumVectors ());
02535     if (isConstantStride ()) {
02536       const size_t myStride = MVT::getStride(lclMV_),
02537                     numCols = getNumVectors(),
02538                     myLen   = getLocalLength();
02539       if (myLen > 0) {
02540         ArrayRCP<Scalar> myview = node->template viewBufferNonConst<Scalar>(KokkosClassic::ReadWrite,myStride*(numCols-1)+myLen,MVT::getValuesNonConst(lclMV_));
02541         for (size_t j=0; j<numCols; ++j) {
02542           views[j] = myview.persistingView(0,myLen);
02543           myview += myStride;
02544         }
02545       }
02546     }
02547     else {
02548       const size_t myStride = MVT::getStride(lclMV_),
02549                     numCols = MVT::getNumCols(lclMV_),
02550                      myCols = getNumVectors(),
02551                      myLen  = MVT::getNumRows(lclMV_);
02552       if (myLen > 0) {
02553         ArrayRCP<Scalar> myview = node->template viewBufferNonConst<Scalar>(KokkosClassic::ReadWrite,myStride*(numCols-1)+myLen,MVT::getValuesNonConst(lclMV_));
02554         for (size_t j=0; j<myCols; ++j) {
02555           views[j] = myview.persistingView(whichVectors_[j]*myStride,myLen);
02556         }
02557       }
02558     }
02559     return views;
02560   }
02561 
02562 
02563   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02564   Teuchos::ArrayRCP<Teuchos::ArrayRCP<const Scalar> >
02565   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::get2dView() const
02566   {
02567     using Teuchos::arcp;
02568     using Teuchos::ArrayRCP;
02569     using Teuchos::RCP;
02570 
02571     RCP<Node> node = MVT::getNode(lclMV_);
02572     ArrayRCP< ArrayRCP<const Scalar> > views = arcp<ArrayRCP<const Scalar> >(getNumVectors());
02573     if (isConstantStride()) {
02574       const size_t myStride = MVT::getStride(lclMV_),
02575                     numCols = getNumVectors(),
02576                     myLen   = getLocalLength();
02577       if (myLen > 0) {
02578         ArrayRCP<const Scalar> myview = node->template viewBuffer<Scalar>(myStride*(numCols-1)+myLen,MVT::getValues(lclMV_));
02579         for (size_t j=0; j<numCols; ++j) {
02580           views[j] = myview.persistingView(0,myLen);
02581           myview += myStride;
02582         }
02583       }
02584     }
02585     else {
02586       const size_t myStride = MVT::getStride(lclMV_),
02587                     numCols = MVT::getNumCols(lclMV_),
02588                      myCols = getNumVectors(),
02589                      myLen  = MVT::getNumRows(lclMV_);
02590       if (myLen > 0) {
02591         ArrayRCP<const Scalar> myview = node->template viewBuffer<Scalar>(myStride*(numCols-1)+myLen,MVT::getValues(lclMV_));
02592         for (size_t j=0; j<myCols; ++j) {
02593           views[j] = myview.persistingView(whichVectors_[j]*myStride,myLen);
02594         }
02595       }
02596     }
02597     return views;
02598   }
02599 
02600 
02601   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02602   void
02603   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
02604   multiply (Teuchos::ETransp transA,
02605             Teuchos::ETransp transB,
02606             const Scalar &alpha,
02607             const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& A,
02608             const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& B,
02609             const Scalar &beta)
02610   {
02611     using Teuchos::NO_TRANS;      // enums
02612     using Teuchos::TRANS;
02613     using Teuchos::CONJ_TRANS;
02614     using Teuchos::null;
02615     using Teuchos::ScalarTraits;  // traits
02616     using Teuchos::RCP;
02617     using Teuchos::rcp;
02618     typedef MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> MV;
02619 
02620     // This routine performs a variety of matrix-matrix multiply operations, interpreting
02621     // the MultiVector (this-aka C , A and B) as 2D matrices.  Variations are due to
02622     // the fact that A, B and C can be local replicated or global distributed
02623     // MultiVectors and that we may or may not operate with the transpose of
02624     // A and B.  Possible cases are:
02625     //                                       Num
02626     //      OPERATIONS                        cases  Notes
02627     //  1) C(local) = A^X(local) * B^X(local)  4    (X=Trans or Not, No comm needed)
02628     //  2) C(local) = A^T(distr) * B  (distr)  1    (2D dot product, replicate C)
02629     //  3) C(distr) = A  (distr) * B^X(local)  2    (2D vector update, no comm needed)
02630     //
02631     // The following operations are not meaningful for 1D distributions:
02632     //
02633     // u1) C(local) = A^T(distr) * B^T(distr)  1
02634     // u2) C(local) = A  (distr) * B^X(distr)  2
02635     // u3) C(distr) = A^X(local) * B^X(local)  4
02636     // u4) C(distr) = A^X(local) * B^X(distr)  4
02637     // u5) C(distr) = A^T(distr) * B^X(local)  2
02638     // u6) C(local) = A^X(distr) * B^X(local)  4
02639     // u7) C(distr) = A^X(distr) * B^X(local)  4
02640     // u8) C(local) = A^X(local) * B^X(distr)  4
02641     //
02642     // Total of 32 case (2^5).
02643 
02644     const char errPrefix[] = "Tpetra::MultiVector::multiply(transOpA,transOpB,alpha,A,B,beta): ";
02645 
02646     TEUCHOS_TEST_FOR_EXCEPTION( ScalarTraits<Scalar>::isComplex && (transA == TRANS || transB == TRANS), std::invalid_argument,
02647         errPrefix << "non-conjugate transpose not supported for complex types.");
02648     transA = (transA == NO_TRANS ? NO_TRANS : CONJ_TRANS);
02649     transB = (transB == NO_TRANS ? NO_TRANS : CONJ_TRANS);
02650 
02651     // Compute effective dimensions, w.r.t. transpose operations on
02652     size_t A_nrows = (transA==CONJ_TRANS) ? A.getNumVectors() : A.getLocalLength();
02653     size_t A_ncols = (transA==CONJ_TRANS) ? A.getLocalLength() : A.getNumVectors();
02654     size_t B_nrows = (transB==CONJ_TRANS) ? B.getNumVectors() : B.getLocalLength();
02655     size_t B_ncols = (transB==CONJ_TRANS) ? B.getLocalLength() : B.getNumVectors();
02656 
02657     Scalar beta_local = beta; // local copy of beta; might be reassigned below
02658 
02659     TEUCHOS_TEST_FOR_EXCEPTION(
02660       getLocalLength() != A_nrows || getNumVectors() != B_ncols || A_ncols != B_nrows,
02661       std::runtime_error,
02662       errPrefix << "dimension of *this, op(A) and op(B) must be consistent.  "
02663       << std::endl << "The local part of *this is "
02664       << getLocalLength() << " x " << getNumVectors()
02665       << ", A is " << A_nrows << " x " << A_ncols
02666       << ", and B is " << B_nrows << " x " << B_ncols << ".");
02667 
02668     bool A_is_local = !A.isDistributed();
02669     bool B_is_local = !B.isDistributed();
02670     bool C_is_local = !this->isDistributed();
02671     bool Case1 = ( C_is_local &&  A_is_local &&  B_is_local);                                           // Case 1: C(local) = A^X(local) * B^X(local)
02672     bool Case2 = ( C_is_local && !A_is_local && !B_is_local && transA==CONJ_TRANS && transB==NO_TRANS); // Case 2: C(local) = A^T(distr) * B  (distr)
02673     bool Case3 = (!C_is_local && !A_is_local &&  B_is_local && transA==NO_TRANS  );                     // Case 3: C(distr) = A  (distr) * B^X(local)
02674 
02675     // Test that we are considering a meaningful cases
02676     TEUCHOS_TEST_FOR_EXCEPTION( !Case1 && !Case2 && !Case3, std::runtime_error,
02677         errPrefix << "multiplication of op(A) and op(B) into *this is not a supported use case.");
02678 
02679     if (beta != ScalarTraits<Scalar>::zero() && Case2)
02680     {
02681       // if Case2, then C is local and contributions must be summed across all nodes
02682       // however, if beta != 0, then accumulate beta*C into the sum
02683       // when summing across all nodes, we only want to accumulate this once, so
02684       // set beta == 0 on all nodes except node 0
02685       int MyPID = this->getMap()->getComm()->getRank();
02686       if (MyPID!=0) beta_local = ScalarTraits<Scalar>::zero();
02687     }
02688 
02689     // Check if A, B, C have constant stride, if not then make temp copy (strided)
02690     RCP<const MV> Atmp, Btmp;
02691     RCP<MV>       Ctmp;
02692     if (isConstantStride() == false) Ctmp = rcp (new MV (*this));
02693     else Ctmp = rcp(this,false);
02694 
02695     if (A.isConstantStride() == false) Atmp = rcp (new MV (A));
02696     else Atmp = rcp(&A,false);
02697 
02698     if (B.isConstantStride() == false) Btmp = rcp (new MV (B));
02699     else Btmp = rcp(&B,false);
02700 
02701 #ifdef HAVE_TEUCHOS_DEBUG
02702     TEUCHOS_TEST_FOR_EXCEPTION(!Ctmp->isConstantStride() || !Btmp->isConstantStride() || !Atmp->isConstantStride(), std::logic_error,
02703         errPrefix << "failed making temporary strided copies of input multivectors.");
02704 #endif
02705 
02706     KMV &C_mv = Ctmp->lclMV_;
02707     {
02708       // get local multivectors
02709       const KMV &A_mv = Atmp->lclMV_;
02710       const KMV &B_mv = Btmp->lclMV_;
02711       // do the multiply (GEMM)
02712       MVT::GEMM(C_mv,transA,transB,alpha,A_mv,B_mv,beta_local);
02713     }
02714 
02715     // Dispose of (possibly) extra copies of A, B
02716     Atmp = null;
02717     Btmp = null;
02718 
02719     RCP<Node> node = MVT::getNode(lclMV_);
02720     // If *this was not strided, copy the data from the strided version and then delete it
02721     if (! isConstantStride ()) {
02722       // *this is not strided, we must put data from Ctmp into *this
02723       TEUCHOS_TEST_FOR_EXCEPT(&C_mv != &lclMV_);
02724       const size_t numVecs = MVT::getNumCols(lclMV_);
02725       for (size_t j=0; j < numVecs; ++j) {
02726         node->template copyBuffers<Scalar>(getLocalLength(),MVT::getValues(C_mv,j),MVT::getValuesNonConst(lclMV_,whichVectors_[j]));
02727       }
02728     }
02729 
02730     // If Case 2 then sum up *this and distribute it to all processors.
02731     if (Case2) {
02732       this->reduce();
02733     }
02734   }
02735 
02736   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02737   void
02738   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
02739   elementWiseMultiply (Scalar scalarAB,
02740                        const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& A,
02741                        const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& B,
02742                        Scalar scalarThis)
02743   {
02744     using Teuchos::arcp_const_cast;
02745     const char tfecfFuncName[] = "elementWiseMultiply()";
02746 
02747 #ifdef HAVE_TPETRA_DEBUG
02748     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
02749       getLocalLength() != A.getLocalLength() ||
02750       getLocalLength() != B.getLocalLength(), std::runtime_error,
02751       ": MultiVectors do not have the same local length.");
02752 #endif // HAVE_TPETRA_DEBUG
02753     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
02754       B.getNumVectors() != this->getNumVectors(), std::runtime_error,
02755       ": MultiVectors 'this' and B must have the same number of vectors.");
02756     try {
02757       MVT::ElemMult (lclMV_, scalarThis, scalarAB, (const KMV&) A.lclMV_,
02758                      (const KMV&) B.lclMV_);
02759     }
02760     catch (std::runtime_error &e) {
02761       TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true,std::runtime_error,
02762           ": caught exception from Kokkos:" << std::endl
02763           << e.what() << std::endl);
02764     }
02765   }
02766 
02767   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02768   void
02769   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::reduce()
02770   {
02771     using Teuchos::Array;
02772     using Teuchos::ArrayView;
02773     using Teuchos::ArrayRCP;
02774     using Teuchos::RCP;
02775     using Teuchos::reduceAll;
02776     using Teuchos::REDUCE_SUM;
02777 
02778     // This method should be called only for locally replicated
02779     // MultiVectors (! isDistributed ()).
02780     TEUCHOS_TEST_FOR_EXCEPTION(this->isDistributed (), std::runtime_error,
02781       "Tpetra::MultiVector::reduce() should only be called with locally "
02782       "replicated or otherwise not distributed MultiVector objects.");
02783     RCP<const Teuchos::Comm<int> > comm = this->getMap()->getComm();
02784     if (comm->getSize() == 1) return;
02785     RCP<Node> node = MVT::getNode(lclMV_);
02786     // sum the data across all multivectors
02787     // need to have separate packed buffers for send and receive
02788     // if we're packed, we'll just set the receive buffer as our data, the send as a copy
02789     // if we're not packed, we'll use allocated buffers for both.
02790     ArrayView<Scalar> target;
02791     const size_t myStride = MVT::getStride(lclMV_),
02792                  numCols  = MVT::getNumCols(lclMV_),
02793                  myLen    = MVT::getNumRows(lclMV_);
02794     Array<Scalar> sourceBuffer(numCols*myLen), tmparr(0);
02795     bool packed = isConstantStride() && (myStride == myLen);
02796     ArrayRCP<Scalar> bufView = node->template viewBufferNonConst<Scalar>(
02797                                           KokkosClassic::ReadWrite,myStride*(numCols-1)+myLen,
02798                                           MVT::getValuesNonConst(lclMV_) );
02799     if (packed) {
02800       // copy data from view to sourceBuffer, reduce into view below
02801       target = bufView(0,myLen*numCols);
02802       std::copy(target.begin(),target.end(),sourceBuffer.begin());
02803     }
02804     else {
02805       // copy data into sourceBuffer, reduce from sourceBuffer into tmparr below, copy back to view after that
02806       tmparr.resize(myLen*numCols);
02807       Scalar *sptr = sourceBuffer.getRawPtr();
02808       ArrayRCP<const Scalar> vptr = bufView;
02809       for (size_t j=0; j<numCols; ++j) {
02810         std::copy(vptr,vptr+myLen,sptr);
02811         sptr += myLen;
02812         vptr += myStride;
02813       }
02814       target = tmparr();
02815     }
02816     // reduce
02817     reduceAll<int,Scalar> (*comm, REDUCE_SUM, static_cast<int> (numCols*myLen),
02818                            sourceBuffer.getRawPtr (), target.getRawPtr ());
02819     if (! packed) {
02820       // copy tmparr back into view
02821       const Scalar *sptr = tmparr.getRawPtr();
02822       ArrayRCP<Scalar> vptr = bufView;
02823       for (size_t j=0; j<numCols; ++j) {
02824         std::copy (sptr, sptr+myLen, vptr);
02825         sptr += myLen;
02826         vptr += myStride;
02827       }
02828     }
02829     bufView = Teuchos::null;
02830   }
02831 
02832 
02833   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02834   void
02835   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
02836   replaceLocalValue (LocalOrdinal MyRow,
02837                      size_t VectorIndex,
02838                      const Scalar &ScalarValue)
02839   {
02840 #ifdef HAVE_TPETRA_DEBUG
02841     const LocalOrdinal minLocalIndex = this->getMap()->getMinLocalIndex();
02842     const LocalOrdinal maxLocalIndex = this->getMap()->getMaxLocalIndex();
02843     TEUCHOS_TEST_FOR_EXCEPTION(
02844       MyRow < minLocalIndex || MyRow > maxLocalIndex,
02845       std::runtime_error,
02846       "Tpetra::MultiVector::replaceLocalValue: row index " << MyRow
02847       << " is invalid.  The range of valid row indices on this process "
02848       << this->getMap()->getComm()->getRank() << " is [" << minLocalIndex
02849       << ", " << maxLocalIndex << "].");
02850     TEUCHOS_TEST_FOR_EXCEPTION(
02851       vectorIndexOutOfRange(VectorIndex),
02852       std::runtime_error,
02853       "Tpetra::MultiVector::replaceLocalValue: vector index " << VectorIndex
02854       << " of the multivector is invalid.");
02855 #endif
02856     const size_t colInd = isConstantStride () ?
02857       VectorIndex : whichVectors_[VectorIndex];
02858     lclMV_.replaceLocalValue (static_cast<size_t> (MyRow), colInd, ScalarValue);
02859   }
02860 
02861 
02862   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02863   void
02864   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
02865   sumIntoLocalValue (LocalOrdinal MyRow,
02866                      size_t VectorIndex,
02867                      const Scalar &ScalarValue)
02868   {
02869 #ifdef HAVE_TPETRA_DEBUG
02870     const LocalOrdinal minLocalIndex = this->getMap()->getMinLocalIndex();
02871     const LocalOrdinal maxLocalIndex = this->getMap()->getMaxLocalIndex();
02872     TEUCHOS_TEST_FOR_EXCEPTION(
02873       MyRow < minLocalIndex || MyRow > maxLocalIndex,
02874       std::runtime_error,
02875       "Tpetra::MultiVector::sumIntoLocalValue: row index " << MyRow
02876       << " is invalid.  The range of valid row indices on this process "
02877       << this->getMap()->getComm()->getRank() << " is [" << minLocalIndex
02878       << ", " << maxLocalIndex << "].");
02879     TEUCHOS_TEST_FOR_EXCEPTION(
02880       vectorIndexOutOfRange(VectorIndex),
02881       std::runtime_error,
02882       "Tpetra::MultiVector::sumIntoLocalValue: vector index " << VectorIndex
02883       << " of the multivector is invalid.");
02884 #endif
02885     const size_t colInd = isConstantStride () ?
02886       VectorIndex : whichVectors_[VectorIndex];
02887     lclMV_.sumIntoLocalValue (static_cast<size_t> (MyRow), colInd, ScalarValue);
02888   }
02889 
02890 
02891   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02892   void
02893   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
02894   replaceGlobalValue (GlobalOrdinal GlobalRow,
02895                       size_t VectorIndex,
02896                       const Scalar &ScalarValue)
02897   {
02898     LocalOrdinal MyRow = this->getMap()->getLocalElement(GlobalRow);
02899 #ifdef HAVE_TPETRA_DEBUG
02900     TEUCHOS_TEST_FOR_EXCEPTION(
02901       MyRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid(),
02902       std::runtime_error,
02903       "Tpetra::MultiVector::replaceGlobalValue: global row index " << GlobalRow
02904       << "is not present on this process " << this->getMap()->getComm()->getRank()
02905       << ".");
02906     TEUCHOS_TEST_FOR_EXCEPTION(
02907       vectorIndexOutOfRange(VectorIndex),
02908       std::runtime_error,
02909       "Tpetra::MultiVector::replaceGlobalValue: vector index " << VectorIndex
02910       << " of the multivector is invalid.");
02911 #endif
02912     replaceLocalValue (MyRow, VectorIndex, ScalarValue);
02913   }
02914 
02915   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02916   void
02917   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
02918   sumIntoGlobalValue (GlobalOrdinal GlobalRow,
02919                       size_t VectorIndex,
02920                       const Scalar &ScalarValue)
02921   {
02922     LocalOrdinal MyRow = this->getMap()->getLocalElement(GlobalRow);
02923 #ifdef HAVE_TEUCHOS_DEBUG
02924     TEUCHOS_TEST_FOR_EXCEPTION(
02925       MyRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid(),
02926       std::runtime_error,
02927       "Tpetra::MultiVector::sumIntoGlobalValue: global row index " << GlobalRow
02928       << "is not present on this process " << this->getMap()->getComm()->getRank()
02929       << ".");
02930     TEUCHOS_TEST_FOR_EXCEPTION(
02931       vectorIndexOutOfRange(VectorIndex),
02932       std::runtime_error,
02933       "Tpetra::MultiVector::sumIntoGlobalValue: vector index " << VectorIndex
02934       << " of the multivector is invalid.");
02935 #endif
02936     sumIntoLocalValue (MyRow, VectorIndex, ScalarValue);
02937   }
02938 
02939   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02940   template <class T>
02941   Teuchos::ArrayRCP<T>
02942   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
02943   getSubArrayRCP (Teuchos::ArrayRCP<T> arr,
02944                   size_t j) const
02945   {
02946     const size_t stride = MVT::getStride (lclMV_);
02947     const size_t myLen = getLocalLength ();
02948     Teuchos::ArrayRCP<T> ret;
02949     if (isConstantStride()) {
02950       ret = arr.persistingView (j*stride, myLen);
02951     }
02952     else {
02953       ret = arr.persistingView (whichVectors_[j]*stride, myLen);
02954     }
02955     return ret;
02956   }
02957 
02958   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02959   KokkosClassic::MultiVector<Scalar,Node>
02960   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getLocalMV() const {
02961     return lclMV_;
02962   }
02963 
02964   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02965   TEUCHOS_DEPRECATED
02966   KokkosClassic::MultiVector<Scalar,Node>&
02967   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getLocalMVNonConst() {
02968     return lclMV_;
02969   }
02970 
02971   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02972   std::string
02973   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::description() const
02974   {
02975     using std::endl;
02976     std::ostringstream oss;
02977     oss << Teuchos::typeName (*this) << " {" << endl
02978         << "  label: \"" << this->getObjectLabel () << "\"" << endl
02979         << "  numRows: " << getGlobalLength () << endl
02980         << "  numCols: " << getNumVectors () << endl
02981         << "  isConstantStride: " << isConstantStride () << endl;
02982     if (isConstantStride ()) {
02983       oss << "  columnStride: " << getStride () << endl;
02984     }
02985     oss << "}" << endl;
02986     return oss.str();
02987   }
02988 
02989   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02990   void
02991   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
02992   describe (Teuchos::FancyOStream &out,
02993             const Teuchos::EVerbosityLevel verbLevel) const
02994   {
02995     using Teuchos::ArrayRCP;
02996     using Teuchos::RCP;
02997     using Teuchos::VERB_DEFAULT;
02998     using Teuchos::VERB_NONE;
02999     using Teuchos::VERB_LOW;
03000     using Teuchos::VERB_MEDIUM;
03001     using Teuchos::VERB_HIGH;
03002     using Teuchos::VERB_EXTREME;
03003     using std::endl;
03004     using std::setw;
03005 
03006     // Set default verbosity if applicable.
03007     const Teuchos::EVerbosityLevel vl =
03008       (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
03009 
03010     RCP<const Teuchos::Comm<int> > comm = this->getMap()->getComm();
03011     const int myImageID = comm->getRank();
03012     const int numImages = comm->getSize();
03013 
03014     if (vl != VERB_NONE) {
03015       // Don't set the tab level unless we're printing something.
03016       Teuchos::OSTab tab (out);
03017 
03018       if (myImageID == 0) { // >= VERB_LOW prints description()
03019         out << this->description() << endl;
03020       }
03021       for (int imageCtr = 0; imageCtr < numImages; ++imageCtr) {
03022         if (myImageID == imageCtr) {
03023           if (vl != VERB_LOW) {
03024             // At verbosity > VERB_LOW, each process prints something.
03025             out << "Process " << myImageID << ":" << endl;
03026 
03027             Teuchos::OSTab procTab (out);
03028             // >= VERB_MEDIUM: print the local vector length.
03029             out << "local length=" << getLocalLength();
03030             if (vl != VERB_MEDIUM) {
03031               // >= VERB_HIGH: print isConstantStride() and getStride()
03032               if (isConstantStride()) {
03033                 out << ", constant stride=" << getStride() << endl;
03034               }
03035               else {
03036                 out << ", not constant stride" << endl;
03037               }
03038               if (vl == VERB_EXTREME) {
03039                 // VERB_EXTREME: print all the values in the multivector.
03040                 out << "Values:" << endl;
03041                 ArrayRCP<ArrayRCP<const Scalar> > X = this->get2dView();
03042                 for (size_t i = 0; i < getLocalLength(); ++i) {
03043                   for (size_t j = 0; j < getNumVectors(); ++j) {
03044                     out << X[j][i];
03045                     if (j < getNumVectors() - 1) {
03046                       out << " ";
03047                     }
03048                   } // for each column
03049                   out << endl;
03050                 } // for each row
03051               } // if vl == VERB_EXTREME
03052             } // if (vl != VERB_MEDIUM)
03053             else { // vl == VERB_LOW
03054               out << endl;
03055             }
03056           } // if vl != VERB_LOW
03057         } // if it is my process' turn to print
03058         comm->barrier();
03059       } // for each process in the communicator
03060     } // if vl != VERB_NONE
03061   }
03062 
03063 #if TPETRA_USE_KOKKOS_DISTOBJECT
03064 
03065   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
03066   void
03067   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
03068   createViews() const
03069   {
03070     // Do nothing in Kokkos::View implementation
03071   }
03072 
03073   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
03074   void
03075   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
03076   createViewsNonConst (KokkosClassic::ReadWriteOption rwo)
03077   {
03078     // Do nothing in Kokkos::View implementation
03079   }
03080 
03081   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
03082   void
03083   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::releaseViews () const
03084   {
03085     // Do nothing in Kokkos::View implementation
03086   }
03087 
03088 #else
03089 
03090   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
03091   void
03092   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
03093   createViews() const
03094   {
03095     Teuchos::RCP<Node> node = this->getMap ()->getNode ();
03096     if (cview_.is_null () && getLocalLength () > 0) {
03097       Teuchos::ArrayRCP<const Scalar> buff = MVT::getValues (lclMV_);
03098       cview_ = node->template viewBuffer<Scalar> (buff.size (), buff);
03099     }
03100   }
03101 
03102   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
03103   void
03104   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
03105   createViewsNonConst (KokkosClassic::ReadWriteOption rwo)
03106   {
03107     Teuchos::RCP<Node> node = this->getMap ()->getNode ();
03108     if (ncview_.is_null () && getLocalLength () > 0) {
03109       Teuchos::ArrayRCP<Scalar> buff = MVT::getValuesNonConst (lclMV_);
03110       ncview_ = node->template viewBufferNonConst<Scalar> (rwo, buff.size (), buff);
03111     }
03112   }
03113 
03114   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
03115   void
03116   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::releaseViews () const
03117   {
03118     const int constViewCount = cview_.total_count ();
03119     const int nonconstViewCount = ncview_.total_count ();
03120 
03121     // This prevents unused variable compiler warnings, in case Tpetra
03122     // efficiency warnings aren't enabled.
03123     (void) constViewCount;
03124     (void) nonconstViewCount;
03125 
03126     // Release the views.
03127     cview_ = Teuchos::null;
03128     ncview_ = Teuchos::null;
03129   }
03130 
03131 #endif
03132 
03133   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
03134   void
03135   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
03136   removeEmptyProcessesInPlace (const Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >& newMap)
03137   {
03138     replaceMap (newMap);
03139   }
03140 
03141   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
03142   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>
03143   createCopy (const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node >& src) {
03144     typedef MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> MV;
03145 
03146     if (src.getCopyOrView () == Teuchos::View) {
03147       // If src has view semantics, then neither the one-argument copy
03148       // constructor nor operator= will do a deep copy.  However, we
03149       // want the result to inherit the view semantics of src.  Thus, we
03150       // use the two-argument copy constructor to do a deep copy, then
03151       // set the result to have view semantics, and return it.
03152       MV dst (src, Teuchos::Copy); // This always creates a deep copy.
03153       dst.setCopyOrView (Teuchos::View);
03154       return dst;
03155     }
03156     else {
03157       return src; // copy ctor and op= both do a deep copy in this case.
03158     }
03159   }
03160 
03161   template <class DS, class DL, class DG, class DN, class SS, class SL, class SG, class SN>
03162   void
03163   deep_copy (MultiVector<DS,DL,DG,DN>& dst, const MultiVector<SS,SL,SG,SN>& src)
03164   {
03165     if (src.getCopyOrView () == Teuchos::Copy) {
03166       dst = src; // op= does a deep copy in this case.
03167     }
03168     else {
03169       // Remember the original semantics of dst.
03170       const Teuchos::DataAccess origMode = dst.getCopyOrView ();
03171 
03172       MultiVector<DS,DL,DG,DN> dst_temp (src, Teuchos::Copy); // deep copy
03173       dst_temp.setCopyOrView (Teuchos::View);
03174       dst = dst_temp; // shallow copy of dst_temp, thus a deep copy of src
03175 
03176       // Restore the original semantics of dst.
03177       dst.setCopyOrView (origMode);
03178     }
03179   }
03180 } // namespace Tpetra
03181 
03182 // Include KokkosRefactor partial specialisation if enabled
03183 #if defined(TPETRA_HAVE_KOKKOS_REFACTOR)
03184 #include "Tpetra_KokkosRefactor_MultiVector_def.hpp"
03185 #endif
03186 
03187 //
03188 // Explicit instantiation macro
03189 //
03190 // Must be expanded from within the Tpetra namespace!
03191 //
03192 
03193 #define TPETRA_MULTIVECTOR_INSTANT(SCALAR,LO,GO,NODE) \
03194   \
03195   template class MultiVector< SCALAR , LO , GO , NODE >; \
03196   template MultiVector< SCALAR , LO , GO , NODE > createCopy( const MultiVector< SCALAR , LO , GO , NODE >& src); \
03197   template void deep_copy (MultiVector< SCALAR , LO , GO , NODE >& dst, const MultiVector< SCALAR , LO , GO , NODE >& src); \
03198 
03199 
03200 #endif // TPETRA_MULTIVECTOR_DEF_HPP
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines