Tpetra Matrix/Vector Services Version of the Day
Tpetra_MultiVector_def.hpp
00001 // @HEADER
00002 // ***********************************************************************
00003 //
00004 //          Tpetra: Templated Linear Algebra Services Package
00005 //                 Copyright (2008) Sandia Corporation
00006 //
00007 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00008 // the U.S. Government retains certain rights in this software.
00009 //
00010 // Redistribution and use in source and binary forms, with or without
00011 // modification, are permitted provided that the following conditions are
00012 // met:
00013 //
00014 // 1. Redistributions of source code must retain the above copyright
00015 // notice, this list of conditions and the following disclaimer.
00016 //
00017 // 2. Redistributions in binary form must reproduce the above copyright
00018 // notice, this list of conditions and the following disclaimer in the
00019 // documentation and/or other materials provided with the distribution.
00020 //
00021 // 3. Neither the name of the Corporation nor the names of the
00022 // contributors may be used to endorse or promote products derived from
00023 // this software without specific prior written permission.
00024 //
00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00036 //
00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
00038 //
00039 // ************************************************************************
00040 // @HEADER
00041 
00042 #ifndef TPETRA_MULTIVECTOR_DEF_HPP
00043 #define TPETRA_MULTIVECTOR_DEF_HPP
00044 
00045 #include <Kokkos_NodeTrace.hpp>
00046 
00047 #include <Teuchos_Assert.hpp>
00048 #include <Teuchos_as.hpp>
00049 
00050 #include "Tpetra_Vector.hpp"
00051 
00052 #ifdef DOXYGEN_USE_ONLY
00053   #include "Tpetra_MultiVector_decl.hpp"
00054 #endif
00055 
00056 namespace Tpetra {
00057 
00058   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00059   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00060   MultiVector (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> >& map,
00061                size_t NumVectors,
00062                bool zeroOut) : /* default is true */
00063     DistObject<Scalar,LocalOrdinal,GlobalOrdinal,Node> (map),
00064     lclMV_ (map->getNode ()),
00065     releaseViewsRaisedEfficiencyWarning_ (false),
00066     createViewsRaisedEfficiencyWarning_ (false),
00067     createViewsNonConstRaisedEfficiencyWarning_ (false)
00068   {
00069     using Teuchos::ArrayRCP;
00070     using Teuchos::RCP;
00071 
00072     TEUCHOS_TEST_FOR_EXCEPTION(NumVectors < 1, std::invalid_argument,
00073       "Tpetra::MultiVector::MultiVector(): NumVectors must be strictly positive.");
00074     const size_t myLen = getLocalLength();
00075     if (myLen > 0) {
00076       RCP<Node> node = map->getNode();
00077       // On host-type Kokkos Nodes, allocBuffer() just calls the
00078       // one-argument version of arcp to allocate memory.  This should
00079       // not fill the memory by default, otherwise we would lose the
00080       // first-touch allocation optimization.
00081       ArrayRCP<Scalar> data = node->template allocBuffer<Scalar>(myLen*NumVectors);
00082       MVT::initializeValues(lclMV_,myLen,NumVectors,data,myLen);
00083       if (zeroOut) {
00084         // MVT uses the Kokkos Node's parallel_for in this case, for
00085         // first-touch allocation (across rows).
00086         MVT::Init(lclMV_, Teuchos::ScalarTraits<Scalar>::zero());
00087       }
00088     }
00089     else {
00090       MVT::initializeValues(lclMV_,0,NumVectors,Teuchos::null,0);
00091     }
00092   }
00093 
00094   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00095   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00096   MultiVector (const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& source) :
00097     DistObject<Scalar,LocalOrdinal,GlobalOrdinal,Node> (source),
00098     lclMV_ (MVT::getNode (source.lclMV_)),
00099     releaseViewsRaisedEfficiencyWarning_ (false),
00100     createViewsRaisedEfficiencyWarning_ (false),
00101     createViewsNonConstRaisedEfficiencyWarning_ (false)
00102   {
00103     using Teuchos::ArrayRCP;
00104     using Teuchos::RCP;
00105 
00106     // copy data from the source MultiVector into this multivector
00107     RCP<Node> node = MVT::getNode(source.lclMV_);
00108     const LocalOrdinal myLen = source.getLocalLength();
00109     const size_t numVecs = source.getNumVectors();
00110 
00111 #if 0
00112     if (source.isConstantStride ()) {
00113       // On host-type Kokkos Nodes, allocBuffer() just calls the
00114       // one-argument version of arcp to allocate memory.  This should
00115       // not fill the memory by default, otherwise we would lose the
00116       // first-touch allocation optimization.
00117       ArrayRCP<Scalar> data = (myLen > 0) ?
00118   node->template allocBuffer<Scalar> (myLen * numVecs) :
00119   Teuchos::null;
00120       const size_t stride = (myLen > 0) ? myLen : size_t (0);
00121 
00122       // This just sets the dimensions, pointer, and stride of lclMV_.
00123       MVT::initializeValues (lclMV_, myLen, numVecs, data, stride);
00124       // This actually copies the data.  It uses the Node's
00125       // parallel_for to copy, which should ensure first-touch
00126       // allocation on systems that support it.  Assign() only works
00127       // for copying the whole Kokkos::MultiVector, not when only
00128       // selecting certain columns (via whichVectors_).
00129       MVT::Assign (lclMV_, source.lclMV_);
00130     }
00131     else {
00132       if (myLen > 0) {
00133   // allocate data
00134   ArrayRCP<Scalar> data = node->template allocBuffer<Scalar>(myLen*numVecs);
00135   MVT::initializeValues(lclMV_,myLen,numVecs,data,myLen);
00136   // copy data
00137   {
00138     ArrayRCP<Scalar> dstdata = data;
00139     ArrayRCP<const Scalar> srcdata = MVT::getValues(source.lclMV_);
00140     for (size_t j = 0; j < numVecs; ++j) {
00141       ArrayRCP<const Scalar> srcj = source.getSubArrayRCP(srcdata,j);
00142       KOKKOS_NODE_TRACE("MultiVector::MultiVector(MV)")
00143         node->template copyBuffers<Scalar>(myLen,srcj,dstdata);
00144       dstdata += myLen;
00145     }
00146   }
00147       }
00148       else {
00149   MVT::initializeValues(lclMV_,0,numVecs,Teuchos::null,0);
00150       }
00151     }
00152 #else
00153     // On host-type Kokkos Nodes, allocBuffer() just calls the
00154     // one-argument version of arcp to allocate memory.  This should
00155     // not fill the memory by default, otherwise we would lose the
00156     // first-touch allocation optimization.
00157     ArrayRCP<Scalar> data = (myLen > 0) ?
00158       node->template allocBuffer<Scalar> (myLen * numVecs) :
00159       Teuchos::null;
00160     const size_t stride = (myLen > 0) ? myLen : size_t (0);
00161 
00162     // This just sets the dimensions, pointer, and stride of lclMV_.
00163     MVT::initializeValues (lclMV_, myLen, numVecs, data, stride);
00164     // This actually copies the data.  It uses the Node's
00165     // parallel_for to copy, which should ensure first-touch
00166     // allocation on systems that support it.
00167     if (source.isConstantStride ()) {
00168       MVT::Assign (lclMV_, source.lclMV_);
00169     }
00170     else {
00171       MVT::Assign (lclMV_, source.lclMV_, source.whichVectors_);
00172     }    
00173 #endif // 0
00174   }
00175 
00176   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00177   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00178   MultiVector (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> >& map,
00179                const Teuchos::ArrayRCP<Scalar>& view,
00180                size_t LDA,
00181                size_t NumVectors,
00182                EPrivateHostViewConstructor /* dummy */) :
00183     DistObject<Scalar,LocalOrdinal,GlobalOrdinal,Node> (map),
00184     lclMV_ (map->getNode ()),
00185     releaseViewsRaisedEfficiencyWarning_ (false),
00186     createViewsRaisedEfficiencyWarning_ (false),
00187     createViewsNonConstRaisedEfficiencyWarning_ (false)
00188   {
00189     using Teuchos::as;
00190     using Teuchos::ArrayRCP;
00191     using Teuchos::RCP;
00192 
00193     const std::string tfecfFuncName("MultiVector(view,LDA,NumVector)");
00194     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(NumVectors < 1, std::invalid_argument,
00195       ": NumVectors must be strictly positive, but you specified NumVectors = "
00196       << NumVectors << ".");
00197     const size_t myLen = getLocalLength();
00198     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(LDA < myLen, std::invalid_argument,
00199       ": LDA must be large enough to accomodate the local entries.");
00200     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00201       as<size_t> (view.size ()) < LDA*(NumVectors - 1) + myLen,
00202       std::invalid_argument,
00203       ": view does not contain enough data to specify the entries in this.");
00204     MVT::initializeValues (lclMV_, myLen, NumVectors, view, myLen);
00205   }
00206 
00207   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00208   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00209   MultiVector (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > &map,
00210                Teuchos::ArrayRCP<Scalar> data,
00211                size_t LDA,
00212                size_t NumVectors,
00213                EPrivateComputeViewConstructor /* dummy */) :
00214     DistObject<Scalar,LocalOrdinal,GlobalOrdinal,Node> (map),
00215     lclMV_ (map->getNode ()),
00216     releaseViewsRaisedEfficiencyWarning_ (false),
00217     createViewsRaisedEfficiencyWarning_ (false),
00218     createViewsNonConstRaisedEfficiencyWarning_ (false)
00219   {
00220     using Teuchos::as;
00221     using Teuchos::ArrayRCP;
00222     using Teuchos::RCP;
00223 
00224     const std::string tfecfFuncName("MultiVector(data,LDA,NumVector)");
00225     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(NumVectors < 1, std::invalid_argument,
00226       ": NumVectors must be strictly positive.");
00227     const size_t myLen = getLocalLength();
00228 #ifdef HAVE_TPETRA_DEBUG
00229     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00230       as<size_t> (data.size ()) < LDA*(NumVectors - 1) + myLen,
00231       std::runtime_error,
00232       ": data does not contain enough data to specify the entries in this.");
00233 #endif
00234     MVT::initializeValues(lclMV_,myLen,NumVectors,data,LDA);
00235   }
00236 
00237   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00238   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00239   MultiVector (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > &map,
00240                Teuchos::ArrayRCP<Scalar> data,
00241                size_t LDA,
00242                Teuchos::ArrayView<const size_t> WhichVectors,
00243                EPrivateComputeViewConstructor /* dummy */) :
00244     DistObject<Scalar,LocalOrdinal,GlobalOrdinal,Node> (map),
00245     lclMV_ (map->getNode ()),
00246     whichVectors_ (WhichVectors),
00247     releaseViewsRaisedEfficiencyWarning_ (false),
00248     createViewsRaisedEfficiencyWarning_ (false),
00249     createViewsNonConstRaisedEfficiencyWarning_ (false)
00250   {
00251     using Teuchos::as;
00252     using Teuchos::ArrayRCP;
00253     using Teuchos::RCP;
00254 
00255     const std::string tfecfFuncName("MultiVector(data,LDA,WhichVectors)");
00256     const size_t myLen = getLocalLength();
00257     size_t maxVector = *std::max_element(WhichVectors.begin(), WhichVectors.end());
00258     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(LDA < myLen, std::runtime_error,
00259       ": LDA must be large enough to accomodate the local entries.");
00260 #ifdef HAVE_TPETRA_DEBUG
00261     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00262       as<size_t> (data.size ()) < LDA * maxVector + myLen, std::runtime_error,
00263       ": data does not contain enough data to specify the entries in this.");
00264 #endif
00265     if (WhichVectors.size () == 1) {
00266       // shift data so that desired vector is vector 0
00267       maxVector = 0;
00268       data += LDA * WhichVectors[0];
00269       // kill whichVectors_; we are constant stride
00270       whichVectors_.clear ();
00271     }
00272     if (myLen > 0) {
00273       MVT::initializeValues(lclMV_,myLen,maxVector+1,data,LDA);
00274     }
00275     else {
00276       MVT::initializeValues(lclMV_,0,WhichVectors.size(),Teuchos::null,0);
00277     }
00278   }
00279 
00280 
00281   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00282   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00283   MultiVector (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> >& map,
00284                const Teuchos::ArrayView<const Scalar>& A,
00285                size_t LDA,
00286                size_t NumVectors) :
00287     DistObject<Scalar,LocalOrdinal,GlobalOrdinal,Node> (map),
00288     lclMV_ (map->getNode ()),
00289     releaseViewsRaisedEfficiencyWarning_ (false),
00290     createViewsRaisedEfficiencyWarning_ (false),
00291     createViewsNonConstRaisedEfficiencyWarning_ (false)
00292   {
00293     using Teuchos::as;
00294     using Teuchos::ArrayRCP;
00295     using Teuchos::ArrayView;
00296     using Teuchos::RCP;
00297 
00298     const std::string tfecfFuncName("MultiVector(A,LDA)");
00299     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(NumVectors < 1, std::invalid_argument,
00300       ": NumVectors must be strictly positive.");
00301     const size_t myLen = getLocalLength();
00302     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(LDA < myLen, std::runtime_error,
00303       ": LDA must be large enough to accomodate the local entries.");
00304 #ifdef HAVE_TPETRA_DEBUG
00305     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00306       as<size_t> (A.size ()) < LDA*(NumVectors - 1) + myLen,
00307       std::runtime_error,
00308       ": A does not contain enough data to specify the entries in this.");
00309 #endif
00310     if (myLen > 0) {
00311       RCP<Node> node = MVT::getNode(lclMV_);
00312       ArrayRCP<Scalar> mydata = node->template allocBuffer<Scalar>(myLen*NumVectors);
00313       MVT::initializeValues(lclMV_,myLen,NumVectors,mydata,myLen);
00314       // FIXME (mfh 13 Sep 2012) It would be better to use the Kokkos
00315       // Node's copyToBuffer method to push data directly to the
00316       // device pointer, rather than using an intermediate host
00317       // buffer.  Also, we should have an optimization for the
00318       // contiguous storage case (constant stride, and the stride
00319       // equals the local number of rows).
00320       KOKKOS_NODE_TRACE("MultiVector::MultiVector(1D)")
00321       ArrayRCP<Scalar> myview = node->template viewBufferNonConst<Scalar>(Kokkos::WriteOnly,myLen*NumVectors,mydata);
00322       typename ArrayView<const Scalar>::iterator srcit = A.begin();
00323       for (size_t j = 0; j < NumVectors; ++j) {
00324         std::copy(srcit,srcit+myLen,myview);
00325         srcit += LDA;
00326         myview += myLen;
00327       }
00328       mydata = Teuchos::null;
00329       myview = Teuchos::null;
00330     }
00331     else {
00332       MVT::initializeValues(lclMV_,0,NumVectors,Teuchos::null,0);
00333     }
00334   }
00335 
00336   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00337   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00338   MultiVector (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> >& map,
00339                const Teuchos::ArrayView<const ArrayView<const Scalar> >& ArrayOfPtrs,
00340                size_t NumVectors) :
00341     DistObject<Scalar,LocalOrdinal,GlobalOrdinal,Node> (map),
00342     lclMV_ (map->getNode ()),
00343     releaseViewsRaisedEfficiencyWarning_ (false),
00344     createViewsRaisedEfficiencyWarning_ (false),
00345     createViewsNonConstRaisedEfficiencyWarning_ (false)
00346   {
00347     using Teuchos::as;
00348     using Teuchos::ArrayRCP;
00349     using Teuchos::ArrayView;
00350     using Teuchos::RCP;
00351 
00352     const std::string tfecfFuncName("MultiVector(ArrayOfPtrs)");
00353     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00354       NumVectors < 1 || NumVectors != as<size_t>(ArrayOfPtrs.size()),
00355       std::runtime_error,
00356       ": ArrayOfPtrs.size() must be strictly positive and as large as ArrayOfPtrs.");
00357     const size_t myLen = getLocalLength();
00358     if (myLen > 0) {
00359       RCP<Node> node = MVT::getNode(lclMV_);
00360       ArrayRCP<Scalar> mydata = node->template allocBuffer<Scalar>(myLen*NumVectors);
00361       MVT::initializeValues(lclMV_,myLen,NumVectors,mydata,myLen);
00362       KOKKOS_NODE_TRACE("MultiVector::MultiVector(2D)")
00363       ArrayRCP<Scalar> myview = node->template viewBufferNonConst<Scalar>(Kokkos::WriteOnly,myLen*NumVectors,mydata);
00364       for (size_t j = 0; j < NumVectors; ++j) {
00365 #ifdef HAVE_TPETRA_DEBUG
00366         TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
00367           as<size_t>(ArrayOfPtrs[j].size()) != getLocalLength(),
00368           std::runtime_error,
00369           ": ArrayOfPtrs[" << j << "].size() (== " << ArrayOfPtrs[j].size()
00370           << ") is not equal to getLocalLength() (== " << getLocalLength());
00371 #endif
00372         typename ArrayView<const Scalar>::iterator src = ArrayOfPtrs[j].begin();
00373         std::copy(src,src+myLen,myview);
00374         myview += myLen;
00375       }
00376       myview = Teuchos::null;
00377       mydata = Teuchos::null;
00378     }
00379     else {
00380       MVT::initializeValues(lclMV_,0,NumVectors,Teuchos::null,0);
00381     }
00382   }
00383 
00384 
00385   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00386   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::~MultiVector() {
00387   }
00388 
00389 
00390   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00391   bool MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::isConstantStride() const {
00392     return whichVectors_.empty();
00393   }
00394 
00395 
00396   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00397   size_t MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getLocalLength() const {
00398     return this->getMap()->getNodeNumElements();
00399   }
00400 
00401 
00402   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00403   global_size_t MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getGlobalLength() const {
00404     return this->getMap()->getGlobalNumElements();
00405   }
00406 
00407 
00408   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00409   size_t MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getStride() const {
00410     if (isConstantStride()) {
00411       return MVT::getStride(lclMV_);
00412     }
00413     return 0;
00414   }
00415 
00416 
00417   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00418   bool
00419   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00420   checkSizes(const DistObject<Scalar,LocalOrdinal,GlobalOrdinal,Node> &sourceObj)
00421   {
00422     using Teuchos::RCP;
00423     using Teuchos::rcp_dynamic_cast;
00424     using Teuchos::rcpFromRef;
00425 
00426     typedef MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> MV;
00427     // rcp_dynamic_cast gives us superior cast failure output to dynamic_cast.
00428     RCP<const MV> A = rcp_dynamic_cast<const MV> (rcpFromRef (sourceObj), true);
00429 
00430     // This method is called in DistObject::doTransfer().  By that
00431     // point, we've already constructed an Import or Export object
00432     // using the two multivectors' Maps, which means that (hopefully)
00433     // we've already checked other attributes of the multivectors.
00434     // Thus, all we need to do here is check the number of columns.
00435     return (A->getNumVectors() == this->getNumVectors());
00436   }
00437 
00438 
00439   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00440   void MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::copyAndPermute(
00441                           const DistObject<Scalar,LocalOrdinal,GlobalOrdinal,Node> & sourceObj,
00442                           size_t numSameIDs,
00443                           const Teuchos::ArrayView<const LocalOrdinal> &permuteToLIDs,
00444                           const Teuchos::ArrayView<const LocalOrdinal> &permuteFromLIDs)
00445   {
00446     using Teuchos::ArrayRCP;
00447     using Teuchos::ArrayView;
00448     using Teuchos::RCP;
00449 
00450     const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &sourceMV = dynamic_cast<const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &>(sourceObj);
00451     typename ArrayView<const LocalOrdinal>::iterator pTo, pFrom;
00452     // any other error will be caught by Teuchos
00453     TEUCHOS_TEST_FOR_EXCEPTION(permuteToLIDs.size() != permuteFromLIDs.size(), std::runtime_error,
00454       "Tpetra::MultiVector::copyAndPermute(): permuteToLIDs and permuteFromLIDs must have the same size.");
00455     RCP<Node> node = MVT::getNode(lclMV_);
00456     const size_t numCols = getNumVectors();
00457 
00458     // Copy the vectors one at a time.  This works whether or not the
00459     // multivectors have constant stride.
00460     for (size_t j = 0; j < numCols; ++j) {
00461       // Get host views of the current source and destination column.
00462       //
00463       // FIXME (mfh 10 Mar 2012) Copying should really be done on the
00464       // device.  There's no need to bring everything back to the
00465       // host, copy there, and then copy back.
00466       ArrayRCP<const Scalar> srcptr = sourceMV.getSubArrayRCP(sourceMV.cview_,j);
00467       ArrayRCP<      Scalar> dstptr =          getSubArrayRCP(ncview_,j);
00468 
00469       // The first numImportIDs IDs are the same between source and
00470       // target, so we can just copy the data.  (This favors faster
00471       // contiguous access whenever we can do it.)
00472       std::copy(srcptr,srcptr+numSameIDs,dstptr);
00473 
00474       // For the remaining GIDs, execute the permutations.  This may
00475       // involve noncontiguous access of both source and destination
00476       // vectors, depending on the LID lists.
00477       //
00478       // FIXME (mfh 09 Apr 2012) Permutation should be done on the
00479       // device, not on the host.
00480       //
00481       // FIXME (mfh 20 June 2012) For an Export with duplicate GIDs on
00482       // the same process, this merges their values by replacement of
00483       // the last encountered GID, not by the specified merge rule
00484       // (such as ADD).
00485       for (pTo = permuteToLIDs.begin(), pFrom = permuteFromLIDs.begin();
00486            pTo != permuteToLIDs.end(); ++pTo, ++pFrom) {
00487         dstptr[*pTo] = srcptr[*pFrom];
00488       }
00489     }
00490   }
00491 
00492 
00493   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00494   void MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::packAndPrepare(
00495           const DistObject<Scalar,LocalOrdinal,GlobalOrdinal,Node> & sourceObj,
00496           const ArrayView<const LocalOrdinal> &exportLIDs,
00497           Array<Scalar> &exports,
00498           const ArrayView<size_t> &numExportPacketsPerLID,
00499           size_t& constantNumPackets,
00500           Distributor & /* distor */ )
00501   {
00502     using Teuchos::Array;
00503     using Teuchos::ArrayView;
00504     using Teuchos::as;
00505     typedef MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> MV;
00506 
00507     const MV& sourceMV = dynamic_cast<const MV&> (sourceObj);
00508 
00509     /* The layout in the export for MultiVectors is as follows:
00510        exports = { all of the data from row exportLIDs.front() ;
00511                    ....
00512                    all of the data from row exportLIDs.back() }
00513       This doesn't have the best locality, but is necessary because
00514       the data for a Packet (all data associated with an LID) is
00515       required to be contiguous. */
00516     TEUCHOS_TEST_FOR_EXCEPTION(as<int> (numExportPacketsPerLID.size ()) != exportLIDs.size (),
00517       std::runtime_error, "Tpetra::MultiVector::packAndPrepare(): size of num"
00518       "ExportPacketsPerLID buffer should be the same as exportLIDs.  numExport"
00519       "PacketsPerLID.size() = " << numExportPacketsPerLID.size() << ", but "
00520       "exportLIDs.size() = " << exportLIDs.size() << ".");
00521     const KMV& srcData = sourceMV.lclMV_;
00522     const size_t numCols = sourceMV.getNumVectors ();
00523     const size_t stride = MVT::getStride (srcData);
00524     constantNumPackets = numCols;
00525     exports.resize (numCols * exportLIDs.size ());
00526     typename ArrayView<const LocalOrdinal>::iterator idptr;
00527     typename Array<Scalar>::iterator expptr;
00528     expptr = exports.begin();
00529 
00530     if (sourceMV.isConstantStride ()) {
00531       size_t i = 0;
00532       for (idptr = exportLIDs.begin(); idptr != exportLIDs.end(); ++idptr, ++i) {
00533         for (size_t j = 0; j < numCols; ++j) {
00534           *expptr++ = sourceMV.cview_[j*stride + (*idptr)];
00535         }
00536         //we shouldn't need to set numExportPacketsPerLID[i] since we have set
00537         //constantNumPackets to a nonzero value. But we'll set it anyway, since
00538         //I'm not sure if the API will remain the way it is.
00539         numExportPacketsPerLID[i] = numCols;
00540       }
00541     }
00542     else {
00543       size_t i = 0;
00544       for (idptr = exportLIDs.begin(); idptr != exportLIDs.end(); ++idptr, ++i) {
00545         for (size_t j = 0; j < numCols; ++j) {
00546           *expptr++ = sourceMV.cview_[sourceMV.whichVectors_[j]*stride + (*idptr)];
00547         }
00548         numExportPacketsPerLID[i] = numCols;
00549       }
00550     }
00551   }
00552 
00553 
00554   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00555   void
00556   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00557   unpackAndCombine (const ArrayView<const LocalOrdinal> &importLIDs,
00558                     const ArrayView<const Scalar> &imports,
00559                     const ArrayView<size_t> &numPacketsPerLID,
00560                     size_t constantNumPackets,
00561                     Distributor & /* distor */,
00562                     CombineMode CM)
00563   {
00564     using Teuchos::ArrayView;
00565     using Teuchos::as;
00566     typedef Teuchos::ScalarTraits<Scalar> SCT;
00567 
00568     const std::string tfecfFuncName("unpackAndCombine()");
00569     /* The layout in the export for MultiVectors is as follows:
00570        imports = { all of the data from row exportLIDs.front() ;
00571                    ....
00572                    all of the data from row exportLIDs.back() }
00573       This doesn't have the best locality, but is necessary because
00574       the data for a Packet (all data associated with an LID) is
00575       required to be contiguous. */
00576 #ifdef HAVE_TPETRA_DEBUG
00577     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(as<size_t>(imports.size()) != getNumVectors()*importLIDs.size(), std::runtime_error,
00578         ": Imports buffer size must be consistent with the amount of data to be exported.");
00579 #endif
00580     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(as<size_t>(constantNumPackets) == as<size_t>(0), std::runtime_error,
00581         ": constantNumPackets input argument must be nonzero.");
00582 
00583     const size_t myStride = MVT::getStride(lclMV_);
00584     const size_t numVecs  = getNumVectors();
00585     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(as<size_t>(numPacketsPerLID.size()) != as<size_t>(importLIDs.size()), std::runtime_error,
00586         ": numPacketsPerLID must have the same length as importLIDs.");
00587     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(as<size_t>(numVecs) != as<size_t>(constantNumPackets), std::runtime_error,
00588         ": constantNumPackets must equal numVecs.");
00589 
00590     if (numVecs > 0 && importLIDs.size()) {
00591       typename ArrayView<const       Scalar>::iterator impptr;
00592       typename ArrayView<const LocalOrdinal>::iterator  idptr;
00593       impptr = imports.begin();
00594       // NOTE (mfh 10 Mar 2012) If you want to implement custom
00595       // combine modes, start editing here.  Also, if you trust
00596       // inlining, it would be nice to condense this code by using a
00597       // binary function object f:
00598       //
00599       // ncview_[...] = f (ncview_[...], *impptr++);
00600       if (CM == INSERT || CM == REPLACE) {
00601         if (isConstantStride()) {
00602           for (idptr = importLIDs.begin(); idptr != importLIDs.end(); ++idptr) {
00603             for (size_t j = 0; j < numVecs; ++j) {
00604               ncview_[myStride*j + *idptr] = *impptr++;
00605             }
00606           }
00607         }
00608         else {
00609           for (idptr = importLIDs.begin(); idptr != importLIDs.end(); ++idptr) {
00610             for (size_t j = 0; j < numVecs; ++j) {
00611               ncview_[myStride*whichVectors_[j] + *idptr] = *impptr++;
00612             }
00613           }
00614         }
00615       }
00616       else if (CM == ADD) {
00617         if (isConstantStride()) {
00618           for (idptr = importLIDs.begin(); idptr != importLIDs.end(); ++idptr) {
00619             for (size_t j = 0; j < numVecs; ++j) {
00620               ncview_[myStride*j + *idptr] += *impptr++;
00621             }
00622           }
00623         }
00624         else {
00625           for (idptr = importLIDs.begin(); idptr != importLIDs.end(); ++idptr) {
00626             for (size_t j = 0; j < numVecs; ++j) {
00627               ncview_[myStride*whichVectors_[j] + *idptr] += *impptr++;
00628             }
00629           }
00630         }
00631       }
00632       else if (CM == ABSMAX) {
00633         if (isConstantStride()) {
00634           for (idptr = importLIDs.begin(); idptr != importLIDs.end(); ++idptr) {
00635             for (size_t j = 0; j < numVecs; ++j) {
00636               Scalar &curval       = ncview_[myStride*j + *idptr];
00637               const Scalar &newval = *impptr++;
00638               curval = std::max( SCT::magnitude(curval), SCT::magnitude(newval) );
00639             }
00640           }
00641         }
00642         else {
00643           for (idptr = importLIDs.begin(); idptr != importLIDs.end(); ++idptr) {
00644             for (size_t j = 0; j < numVecs; ++j) {
00645               Scalar &curval       = ncview_[myStride*whichVectors_[j] + *idptr];
00646               const Scalar &newval = *impptr++;
00647               curval = std::max( SCT::magnitude(curval), SCT::magnitude(newval) );
00648             }
00649           }
00650         }
00651       }
00652       else {
00653         TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(CM != ADD && CM != REPLACE && CM != INSERT && CM != ABSMAX, std::invalid_argument,
00654             ": Invalid CombineMode: " << CM);
00655       }
00656     }
00657   }
00658 
00659 
00660   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00661   inline size_t MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getNumVectors() const {
00662     if (isConstantStride()) {
00663       return MVT::getNumCols(lclMV_);
00664     }
00665     else {
00666       return whichVectors_.size();
00667     }
00668   }
00669 
00670 
00671   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00672   void
00673   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00674   dot (const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &A,
00675        const Teuchos::ArrayView<Scalar> &dots) const
00676   {
00677     using Teuchos::Array;
00678     using Teuchos::ArrayRCP;
00679     using Teuchos::as;
00680     using Teuchos::arcp_const_cast;
00681 
00682     const std::string tfecfFuncName("dot()");
00683     const size_t myLen   = getLocalLength();
00684     const size_t numVecs = getNumVectors();
00685 #ifdef HAVE_TPETRA_DEBUG
00686     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( !this->getMap()->isCompatible(*A.getMap()), std::runtime_error,
00687         ": MultiVectors do not have compatible Maps:" << std::endl
00688         << "this->getMap(): " << std::endl << *this->getMap()
00689         << "A.getMap(): " << std::endl << *A.getMap() << std::endl);
00690 #else
00691     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( getLocalLength() != A.getLocalLength(), std::runtime_error,
00692         ": MultiVectors do not have the same local length.");
00693 #endif
00694     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(A.getNumVectors() != numVecs, std::runtime_error,
00695         ": MultiVectors must have the same number of vectors.");
00696     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(Teuchos::as<size_t>(dots.size()) != numVecs, std::runtime_error,
00697         ": dots.size() must be as large as the number of vectors in *this and A.");
00698     if (isConstantStride() && A.isConstantStride()) {
00699       MVT::Dot(lclMV_,A.lclMV_,dots);
00700     }
00701     else {
00702       KMV v(MVT::getNode(lclMV_)), a(MVT::getNode(lclMV_));
00703       ArrayRCP<Scalar> vptr  = arcp_const_cast<Scalar>(MVT::getValues(lclMV_));
00704       ArrayRCP<Scalar> avptr = arcp_const_cast<Scalar>(MVT::getValues(A.lclMV_));
00705       for (size_t j=0; j < numVecs; ++j) {
00706         ArrayRCP<Scalar> vj  =   getSubArrayRCP( vptr,j);
00707         ArrayRCP<Scalar> avj = A.getSubArrayRCP(avptr,j);
00708         MVT::initializeValues(a,myLen, 1, avj, myLen);
00709         MVT::initializeValues(v,myLen, 1,  vj, myLen);
00710         dots[j] = MVT::Dot((const KMV&)v,(const KMV &)a);
00711       }
00712     }
00713     if (this->isDistributed()) {
00714       Array<Scalar> ldots(dots);
00715       Teuchos::reduceAll(*this->getMap()->getComm(),Teuchos::REDUCE_SUM,as<int>(numVecs),ldots.getRawPtr(),dots.getRawPtr());
00716     }
00717   }
00718 
00719 
00720   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00721   void
00722   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00723   norm2 (const Teuchos::ArrayView<typename Teuchos::ScalarTraits<Scalar>::magnitudeType>& norms) const
00724   {
00725     using Teuchos::arcp_const_cast;
00726     using Teuchos::Array;
00727     using Teuchos::ArrayRCP;
00728     using Teuchos::ArrayView;
00729     using Teuchos::as;
00730     using Teuchos::reduceAll;
00731     using Teuchos::REDUCE_SUM;
00732     using Teuchos::ScalarTraits;
00733     typedef typename ScalarTraits<Scalar>::magnitudeType MT;
00734     typedef ScalarTraits<MT> STM;
00735 
00736     const size_t numVecs = this->getNumVectors();
00737     TEUCHOS_TEST_FOR_EXCEPTION(as<size_t>(norms.size()) != numVecs,
00738       std::runtime_error,
00739       "Tpetra::MultiVector::norm2(norms): norms.size() must be as large as the number of vectors in *this.");
00740     if (isConstantStride ()) {
00741       MVT::Norm2Squared (lclMV_,norms);
00742     }
00743     else {
00744       KMV v (MVT::getNode (lclMV_));
00745       ArrayRCP<Scalar> vi;
00746       for (size_t i=0; i < numVecs; ++i) {
00747         vi = arcp_const_cast<Scalar> (MVT::getValues (lclMV_, whichVectors_[i]));
00748         MVT::initializeValues (v, MVT::getNumRows (lclMV_), 1, vi, MVT::getStride (lclMV_));
00749         norms[i] = MVT::Norm2Squared (v);
00750       }
00751     }
00752     if (this->isDistributed ()) {
00753       Array<MT> lnorms (norms);
00754       // FIXME (mfh 25 Apr 2012) Somebody explain to me why we're
00755       // calling Teuchos::reduceAll when MultiVector has a perfectly
00756       // good reduce() function.
00757       reduceAll (*this->getMap ()->getComm (), REDUCE_SUM, as<int> (numVecs),
00758                  lnorms.getRawPtr (), norms.getRawPtr ());
00759     }
00760     for (typename ArrayView<MT>::iterator n = norms.begin(); n != norms.begin()+numVecs; ++n) {
00761       (*n) = STM::squareroot (*n);
00762     }
00763   }
00764 
00765 
00766   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00767   void
00768   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00769   normWeighted (const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& weights,
00770                 const Teuchos::ArrayView<typename Teuchos::ScalarTraits<Scalar>::magnitudeType> &norms) const
00771   {
00772     using Teuchos::arcp_const_cast;
00773     using Teuchos::Array;
00774     using Teuchos::ArrayRCP;
00775     using Teuchos::ArrayView;
00776     using Teuchos::as;
00777     using Teuchos::reduceAll;
00778     using Teuchos::REDUCE_SUM;
00779     using Teuchos::ScalarTraits;
00780     typedef ScalarTraits<Scalar> SCT;
00781     typedef typename SCT::magnitudeType Mag;
00782 
00783     const std::string tfecfFuncName("normWeighted()");
00784 
00785     const Mag OneOverN = ScalarTraits<Mag>::one() / getGlobalLength();
00786     bool OneW = false;
00787     const size_t numVecs = this->getNumVectors();
00788     if (weights.getNumVectors() == 1) {
00789       OneW = true;
00790     }
00791     else {
00792       TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(weights.getNumVectors() != numVecs, std::runtime_error,
00793         ": MultiVector of weights must contain either one vector or the same number of vectors as this.");
00794     }
00795 #ifdef HAVE_TPETRA_DEBUG
00796     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( !this->getMap()->isCompatible(*weights.getMap()), std::runtime_error,
00797       ": MultiVectors do not have compatible Maps:" << std::endl
00798       << "this->getMap(): " << std::endl << *this->getMap()
00799       << "weights.getMap(): " << std::endl << *weights.getMap() << std::endl);
00800 #else
00801     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( getLocalLength() != weights.getLocalLength(), std::runtime_error,
00802       ": MultiVectors do not have the same local length.");
00803 #endif
00804     const size_t myLen = getLocalLength();
00805 
00806     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(as<size_t>(norms.size()) != numVecs, std::runtime_error,
00807       ": norms.size() must be as large as the number of vectors in *this.");
00808     if (isConstantStride() && weights.isConstantStride()) {
00809       MVT::WeightedNorm(lclMV_,weights.lclMV_,norms);
00810     }
00811     else {
00812       KMV v(MVT::getNode(lclMV_)), w(MVT::getNode(lclMV_));
00813       ArrayRCP<Scalar> vptr = arcp_const_cast<Scalar> (MVT::getValues (lclMV_));
00814       ArrayRCP<Scalar> wptr = arcp_const_cast<Scalar> (MVT::getValues (weights.lclMV_));
00815       ArrayRCP<Scalar> wj   = wptr.persistingView (0,myLen);
00816       MVT::initializeValues (w,myLen, 1, wj, myLen);
00817       for (size_t j=0; j < numVecs; ++j) {
00818         ArrayRCP<Scalar> vj = getSubArrayRCP (vptr, j);
00819         MVT::initializeValues(v,myLen, 1,  vj, myLen);
00820         if (! OneW) {
00821           wj = weights.getSubArrayRCP(wptr,j);
00822           MVT::initializeValues (w, myLen, 1, wj, myLen);
00823         }
00824         norms[j] = MVT::WeightedNorm ((const KMV&)v, (const KMV &)w);
00825       }
00826     }
00827     if (this->isDistributed ()) {
00828       Array<Mag> lnorms(norms);
00829       reduceAll (*this->getMap ()->getComm (), REDUCE_SUM, as<int>(numVecs),
00830                  lnorms.getRawPtr (), norms.getRawPtr ());
00831     }
00832     for (typename ArrayView<Mag>::iterator n = norms.begin(); n != norms.begin()+numVecs; ++n) {
00833       *n = ScalarTraits<Mag>::squareroot(*n * OneOverN);
00834     }
00835   }
00836 
00837 
00838   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00839   void
00840   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00841   norm1 (const ArrayView<typename Teuchos::ScalarTraits<Scalar>::magnitudeType> &norms) const
00842   {
00843     using Teuchos::Array;
00844     using Teuchos::ArrayRCP;
00845     using Teuchos::arcp_const_cast;
00846     using Teuchos::as;
00847     using Teuchos::reduceAll;
00848     using Teuchos::REDUCE_SUM;
00849     typedef typename Teuchos::ScalarTraits<Scalar>::magnitudeType Mag;
00850 
00851     const size_t numVecs = this->getNumVectors();
00852     TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::as<size_t>(norms.size()) != numVecs, std::runtime_error,
00853         "Tpetra::MultiVector::norm1(norms): norms.size() must be as large as the number of vectors in *this.");
00854     if (isConstantStride()) {
00855       MVT::Norm1(lclMV_,norms);
00856     }
00857     else {
00858       KMV v(MVT::getNode(lclMV_));
00859       ArrayRCP<Scalar> vj;
00860       for (size_t j=0; j < numVecs; ++j) {
00861         vj = arcp_const_cast<Scalar> (MVT::getValues(lclMV_,whichVectors_[j]) );
00862         MVT::initializeValues (v, MVT::getNumRows(lclMV_), 1, vj, MVT::getStride (lclMV_));
00863         norms[j] = MVT::Norm1 ((const KMV&)v);
00864       }
00865     }
00866     if (this->isDistributed ()) {
00867       Array<Mag> lnorms (norms);
00868       reduceAll (*this->getMap ()->getComm (), REDUCE_SUM, as<int> (numVecs), lnorms.getRawPtr (), norms.getRawPtr ());
00869     }
00870   }
00871 
00872 
00873   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00874   void
00875   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00876   normInf (const Teuchos::ArrayView<typename Teuchos::ScalarTraits<Scalar>::magnitudeType> &norms) const
00877   {
00878     using Teuchos::Array;
00879     using Teuchos::ArrayRCP;
00880     using Teuchos::arcp_const_cast;
00881     using Teuchos::as;
00882     using Teuchos::reduceAll;
00883     using Teuchos::REDUCE_MAX;
00884     typedef typename Teuchos::ScalarTraits<Scalar>::magnitudeType Mag;
00885 
00886     const size_t numVecs = this->getNumVectors();
00887     TEUCHOS_TEST_FOR_EXCEPTION(as<size_t>(norms.size()) != numVecs, std::runtime_error,
00888       "Tpetra::MultiVector::normInf(norms): norms.size() must be as large as the number of vectors in *this.");
00889     if (isConstantStride ()) {
00890       MVT::NormInf(lclMV_,norms);
00891     }
00892     else {
00893       KMV v(MVT::getNode(lclMV_));
00894       ArrayRCP<Scalar> vj;
00895       for (size_t j=0; j < numVecs; ++j) {
00896         vj = Teuchos::arcp_const_cast<Scalar>( MVT::getValues(lclMV_,whichVectors_[j]) );
00897         MVT::initializeValues(v,MVT::getNumRows(lclMV_), 1, vj, MVT::getStride(lclMV_));
00898         norms[j] = MVT::NormInf((const KMV&)v);
00899       }
00900     }
00901     if (this->isDistributed()) {
00902       Array<Mag> lnorms(norms);
00903       reduceAll (*this->getMap ()->getComm (), REDUCE_MAX, as<int> (numVecs), lnorms.getRawPtr (), norms.getRawPtr ());
00904     }
00905   }
00906 
00907 
00908   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00909   void
00910   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00911   meanValue (const Teuchos::ArrayView<Scalar> &means) const
00912   {
00913     using Teuchos::Array;
00914     using Teuchos::ArrayRCP;
00915     using Teuchos::arcp_const_cast;
00916     using Teuchos::as;
00917     using Teuchos::arcp_const_cast;
00918     using Teuchos::reduceAll;
00919     using Teuchos::REDUCE_SUM;
00920     typedef Teuchos::ScalarTraits<Scalar> SCT;
00921 
00922     const size_t numVecs = getNumVectors();
00923     const size_t myLen   = getLocalLength();
00924     TEUCHOS_TEST_FOR_EXCEPTION(as<size_t>(means.size()) != numVecs, std::runtime_error,
00925       "Tpetra::MultiVector::meanValue(): means.size() must be as large as the number of vectors in *this.");
00926     // compute local components of the means
00927     // sum these across all nodes
00928     if (isConstantStride()) {
00929       MVT::Sum(lclMV_,means);
00930     }
00931     else {
00932       KMV v(MVT::getNode(lclMV_));
00933       ArrayRCP<Scalar> vptr = arcp_const_cast<Scalar>(MVT::getValues(lclMV_));
00934       for (size_t j=0; j < numVecs; ++j) {
00935         ArrayRCP<Scalar> vj =   getSubArrayRCP( vptr,j);
00936         MVT::initializeValues(v,myLen, 1,  vj, myLen);
00937         means[j] = MVT::Sum((const KMV &)v);
00938       }
00939     }
00940     if (this->isDistributed()) {
00941       Array<Scalar> lmeans(means);
00942       // only combine if we are a distributed MV
00943       reduceAll (*this->getMap ()->getComm (), REDUCE_SUM, as<int> (numVecs),
00944                  lmeans.getRawPtr (), means.getRawPtr ());
00945     }
00946     // mfh 12 Apr 2012: Don't take out the cast from the ordinal type
00947     // to the magnitude type, since operator/ (std::complex<T>, int)
00948     // isn't necessarily defined.
00949     const Scalar OneOverN =
00950       SCT::one() / as<typename SCT::magnitudeType> (getGlobalLength ());
00951     for (typename ArrayView<Scalar>::iterator i = means.begin();
00952          i != means.begin() + numVecs;
00953          ++i) {
00954       (*i) = (*i) * OneOverN;
00955     }
00956   }
00957 
00958 
00959   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00960   void
00961   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00962   randomize()
00963   {
00964     if (isConstantStride ()) {
00965       MVT::Random (lclMV_);
00966     }
00967     else {
00968       const size_t numVecs = this->getNumVectors ();
00969       KMV v (MVT::getNode (lclMV_));
00970       Teuchos::ArrayRCP<Scalar> vj;
00971       for (size_t j = 0; j < numVecs; ++j) {
00972         vj = MVT::getValuesNonConst (lclMV_, whichVectors_[j]);
00973         MVT::initializeValues (v, MVT::getNumRows (lclMV_), 1, vj, MVT::getStride (lclMV_));
00974         MVT::Random (v);
00975       }
00976     }
00977   }
00978 
00979 
00980   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
00981   void
00982   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
00983   putScalar (const Scalar &alpha)
00984   {
00985     const size_t numVecs = getNumVectors();
00986     if (isConstantStride()) {
00987       MVT::Init(lclMV_,alpha);
00988     }
00989     else {
00990       KMV v(MVT::getNode(lclMV_));
00991       Teuchos::ArrayRCP<Scalar> vj;
00992       for (size_t j=0; j < numVecs; ++j) {
00993         vj = MVT::getValuesNonConst(lclMV_,whichVectors_[j]);
00994         MVT::initializeValues(v,MVT::getNumRows(lclMV_), 1, vj, MVT::getStride(lclMV_));
00995         MVT::Init(v,alpha);
00996       }
00997     }
00998   }
00999 
01000 
01001   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01002   void
01003   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01004   replaceMap (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> > &map)
01005   {
01006 #ifdef HAVE_TEUCHOS_DEBUG
01007     TEUCHOS_TEST_FOR_EXCEPTION(! this->getMap ()->isCompatible (*map),
01008       std::invalid_argument, "Tpetra::MultiVector::replaceMap(): The input map "
01009       "is not compatible with this multivector's current map.  The replaceMap() "
01010       "method is not for data redistribution; use Import or Export for that.");
01011 #endif // HAVE_TEUCHOS_DEBUG
01012     this->map_ = map;
01013   }
01014 
01015   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01016   void
01017   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01018   scale (const Scalar &alpha)
01019   {
01020     using Teuchos::arcp_const_cast;
01021     using Teuchos::ArrayRCP;
01022 
01023     // NOTE: can't substitute putScalar(0.0) for scale(0.0), because
01024     //       the former will overwrite NaNs present in the MultiVector, while the
01025     //       semantics of this call require multiplying them by 0, which IEEE requires to be NaN
01026     const size_t numVecs = getNumVectors();
01027     if (alpha == Teuchos::ScalarTraits<Scalar>::one()) {
01028       // do nothing
01029     }
01030     else if (isConstantStride()) {
01031       MVT::Scale(lclMV_,alpha);
01032     }
01033     else {
01034       KMV v(MVT::getNode(lclMV_));
01035       ArrayRCP<Scalar> vj;
01036       for (size_t j=0; j < numVecs; ++j) {
01037         vj = arcp_const_cast<Scalar> (MVT::getValues(lclMV_, whichVectors_[j]));
01038         MVT::initializeValues (v, MVT::getNumRows (lclMV_), 1, vj, MVT::getStride (lclMV_));
01039         MVT::Scale (v, alpha);
01040       }
01041     }
01042   }
01043 
01044 
01045   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01046   void
01047   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01048   scale (Teuchos::ArrayView<const Scalar> alphas)
01049   {
01050     using Teuchos::arcp_const_cast;
01051     using Teuchos::ArrayRCP;
01052     using Teuchos::as;
01053 
01054     const size_t numVecs = this->getNumVectors();
01055     TEUCHOS_TEST_FOR_EXCEPTION(as<size_t>(alphas.size()) != numVecs, std::runtime_error,
01056       "Tpetra::MultiVector::scale(alphas): alphas.size() must be as large as the number of vectors in *this.");
01057     KMV vec(MVT::getNode(lclMV_));
01058     const size_t myLen = MVT::getNumRows(lclMV_);
01059     if (myLen == 0) return;
01060     ArrayRCP<Scalar> mybuf = MVT::getValuesNonConst(lclMV_);
01061     for (size_t j = 0; j < numVecs; ++j) {
01062       if (alphas[j] == Teuchos::ScalarTraits<Scalar>::one()) {
01063         // do nothing: NaN * 1.0 == NaN, Number*1.0 == Number
01064       }
01065       else {
01066         ArrayRCP<Scalar> mybufj = getSubArrayRCP(mybuf,j);
01067         MVT::initializeValues(vec,myLen,1,mybufj,myLen);
01068         MVT::Scale(vec,alphas[j]);
01069       }
01070     }
01071   }
01072 
01073 
01074   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01075   void
01076   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01077   scale (const Scalar &alpha, const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &A)
01078   {
01079     using Teuchos::arcp_const_cast;
01080     using Teuchos::ArrayRCP;
01081     using Teuchos::as;
01082 
01083     const std::string tfecfFuncName("scale(alpha,A)");
01084 
01085     const size_t numVecs = getNumVectors(),
01086                  myLen   = getLocalLength();
01087 #ifdef HAVE_TPETRA_DEBUG
01088     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(! this->getMap ()->isCompatible (*A.getMap ()),
01089       std::runtime_error, ": MultiVectors do not have compatible Maps:" << std::endl
01090         << "this->getMap(): " << std::endl << *(this->getMap ())
01091         << "A.getMap(): " << std::endl << *(A.getMap ()) << std::endl);
01092 #else
01093     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( getLocalLength() != A.getLocalLength(), std::runtime_error,
01094       ": MultiVectors do not have the same local length.");
01095 #endif
01096     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(A.getNumVectors() != numVecs, std::runtime_error,
01097       ": MultiVectors must have the same number of vectors.");
01098     if (isConstantStride() && A.isConstantStride()) {
01099       // set me == alpha*A
01100       MVT::Scale(lclMV_,alpha,(const KMV&)A.lclMV_);
01101     }
01102     else {
01103       KMV v(MVT::getNode(lclMV_)), a(MVT::getNode(lclMV_));
01104       ArrayRCP<Scalar> vptr  = MVT::getValuesNonConst (lclMV_);
01105       ArrayRCP<Scalar> avptr = arcp_const_cast<Scalar> (MVT::getValues (A.lclMV_));
01106       for (size_t j=0; j < numVecs; ++j) {
01107         ArrayRCP<Scalar>  vj =   getSubArrayRCP (vptr,  j);
01108         ArrayRCP<Scalar> avj = A.getSubArrayRCP (avptr, j);
01109         MVT::initializeValues (a,myLen, 1, avj, myLen);
01110         MVT::initializeValues (v,myLen, 1,  vj, myLen);
01111         MVT::Scale (v, alpha, (const KMV &)a);
01112       }
01113     }
01114   }
01115 
01116 
01117   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01118   void
01119   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01120   reciprocal (const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &A)
01121   {
01122     using Teuchos::arcp_const_cast;
01123     using Teuchos::ArrayRCP;
01124     using Teuchos::as;
01125 
01126     const std::string tfecfFuncName("reciprocal()");
01127 
01128 #ifdef HAVE_TPETRA_DEBUG
01129     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( !this->getMap()->isCompatible(*A.getMap()), std::runtime_error,
01130         ": MultiVectors do not have compatible Maps:" << std::endl
01131         << "this->getMap(): " << std::endl << *this->getMap()
01132         << "A.getMap(): " << std::endl << *A.getMap() << std::endl);
01133 #else
01134     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( getLocalLength() != A.getLocalLength(), std::runtime_error,
01135         ": MultiVectors do not have the same local length.");
01136 #endif
01137     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(A.getNumVectors() != this->getNumVectors(), std::runtime_error,
01138         ": MultiVectors must have the same number of vectors.");
01139     const size_t numVecs = getNumVectors();
01140     const size_t myLen = getLocalLength();
01141     try {
01142       if (isConstantStride() && A.isConstantStride()) {
01143         MVT::Recip(lclMV_,(const KMV&)A.lclMV_);
01144       }
01145       else {
01146         KMV v(MVT::getNode(lclMV_)), a(MVT::getNode(lclMV_));
01147         ArrayRCP<Scalar> vptr = MVT::getValuesNonConst(lclMV_),
01148                         avptr = arcp_const_cast<Scalar>(MVT::getValues(A.lclMV_));
01149         for (size_t j=0; j < numVecs; ++j) {
01150           ArrayRCP<Scalar> vj =   getSubArrayRCP( vptr,j),
01151                           avj = A.getSubArrayRCP(avptr,j);
01152           MVT::initializeValues(a,myLen, 1, avj, myLen);
01153           MVT::initializeValues(v,myLen, 1,  vj, myLen);
01154           MVT::Recip(v,(const KMV &)a);
01155         }
01156       }
01157     }
01158     catch (std::runtime_error &e) {
01159       TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true,std::runtime_error,
01160           ": caught exception from Kokkos:" << std::endl
01161           << e.what() << std::endl);
01162     }
01163   }
01164 
01165 
01166   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01167   void MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::abs(const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &A) {
01168     using Teuchos::arcp_const_cast;
01169     using Teuchos::ArrayRCP;
01170     using Teuchos::as;
01171 
01172     const std::string tfecfFuncName("abs()");
01173 #ifdef HAVE_TPETRA_DEBUG
01174     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( !this->getMap()->isCompatible(*A.getMap()), std::runtime_error,
01175         ": MultiVectors do not have compatible Maps:" << std::endl
01176         << "this->getMap(): " << std::endl << *this->getMap()
01177         << "A.getMap(): " << std::endl << *A.getMap() << std::endl);
01178 #else
01179     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( getLocalLength() != A.getLocalLength(), std::runtime_error,
01180         ": MultiVectors do not have the same local length.");
01181 #endif
01182     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(A.getNumVectors() != this->getNumVectors(), std::runtime_error,
01183         ": MultiVectors must have the same number of vectors.");
01184     const size_t myLen = getLocalLength();
01185     const size_t numVecs = getNumVectors();
01186     if (isConstantStride() && A.isConstantStride()) {
01187       MVT::Abs(lclMV_,(const KMV&)A.lclMV_);
01188     }
01189     else {
01190       KMV v(MVT::getNode(lclMV_)), a(MVT::getNode(lclMV_));
01191       ArrayRCP<Scalar> vptr = MVT::getValuesNonConst(lclMV_),
01192                       avptr = arcp_const_cast<Scalar>(MVT::getValues(A.lclMV_));
01193       for (size_t j=0; j < numVecs; ++j) {
01194         ArrayRCP<Scalar> vj =   getSubArrayRCP( vptr,j),
01195                         avj = A.getSubArrayRCP(avptr,j);
01196         MVT::initializeValues(a,myLen, 1, avj, myLen);
01197         MVT::initializeValues(v,myLen, 1,  vj, myLen);
01198         MVT::Abs(v,(const KMV &)a);
01199       }
01200     }
01201   }
01202 
01203 
01204   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01205   void MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::update(
01206                       const Scalar &alpha, const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &A,
01207                       const Scalar &beta)
01208   {
01209     using Teuchos::arcp_const_cast;
01210     using Teuchos::ArrayRCP;
01211     using Teuchos::as;
01212 
01213     const std::string tfecfFuncName("update()");
01214     // this = beta*this + alpha*A
01215     // must support case where &this == &A
01216     // can't short circuit on alpha==0.0 or beta==0.0, because 0.0*NaN != 0.0
01217 #ifdef HAVE_TPETRA_DEBUG
01218     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( !this->getMap()->isCompatible(*A.getMap()), std::runtime_error,
01219         ": MultiVectors do not have compatible Maps:" << std::endl
01220         << "this->getMap(): " << std::endl << this->getMap()
01221         << "A.getMap(): " << std::endl << *A.getMap() << std::endl);
01222 #else
01223     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( getLocalLength() != A.getLocalLength(), std::runtime_error,
01224         ": MultiVectors do not have the same local length.");
01225 #endif
01226     const size_t myLen = getLocalLength();
01227     const size_t numVecs = getNumVectors();
01228     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(A.getNumVectors() != this->getNumVectors(), std::runtime_error,
01229         ": MultiVectors must have the same number of vectors.");
01230     if (isConstantStride() && A.isConstantStride()) {
01231       MVT::GESUM(lclMV_,alpha,(const KMV&)A.lclMV_,beta);
01232     }
01233     else {
01234       KMV v(MVT::getNode(lclMV_)), a(MVT::getNode(lclMV_));
01235       ArrayRCP<Scalar> vptr = MVT::getValuesNonConst(lclMV_),
01236                       avptr = arcp_const_cast<Scalar>(MVT::getValues(A.lclMV_));
01237       for (size_t j=0; j < numVecs; ++j) {
01238         ArrayRCP<Scalar> vj =   getSubArrayRCP( vptr,j),
01239                         avj = A.getSubArrayRCP(avptr,j);
01240         MVT::initializeValues(a,myLen, 1, avj, myLen);
01241         MVT::initializeValues(v,myLen, 1,  vj, myLen);
01242         MVT::GESUM(v,alpha,(const KMV &)a,beta);
01243       }
01244     }
01245   }
01246 
01247 
01248   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01249   void MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::update(
01250                       const Scalar &alpha, const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &A,
01251                       const Scalar &beta,  const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &B,
01252                       const Scalar &gamma)
01253   {
01254     using Teuchos::arcp_const_cast;
01255     using Teuchos::ArrayRCP;
01256     using Teuchos::as;
01257 
01258     const std::string tfecfFuncName("update()");
01259     // this = alpha*A + beta*B + gamma*this
01260     // must support case where &this == &A or &this == &B
01261     // can't short circuit on alpha==0.0 or beta==0.0 or gamma==0.0, because 0.0*NaN != 0.0
01262 #ifdef HAVE_TPETRA_DEBUG
01263     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( !this->getMap()->isCompatible(*A.getMap()) || !this->getMap()->isCompatible(*B.getMap()),
01264         std::runtime_error,
01265         ": MultiVectors do not have compatible Maps:" << std::endl
01266         << "this->getMap(): " << std::endl << *this->getMap()
01267         << "A.getMap(): " << std::endl << *A.getMap() << std::endl
01268         << "B.getMap(): " << std::endl << *B.getMap() << std::endl);
01269 #else
01270     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( getLocalLength() != A.getLocalLength() || getLocalLength() != B.getLocalLength(), std::runtime_error,
01271         ": MultiVectors do not have the same local length.");
01272 #endif
01273     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(A.getNumVectors() != this->getNumVectors() || B.getNumVectors() != this->getNumVectors(), std::runtime_error,
01274         ": MultiVectors must have the same number of vectors.");
01275     const size_t myLen = getLocalLength();
01276     const size_t numVecs = getNumVectors();
01277     if (isConstantStride() && A.isConstantStride() && B.isConstantStride()) {
01278       MVT::GESUM(lclMV_,alpha,(const KMV&)A.lclMV_,beta,(const KMV&)B.lclMV_,gamma);
01279     }
01280     else {
01281       KMV v(MVT::getNode(lclMV_)), a(MVT::getNode(lclMV_)), b(MVT::getNode(lclMV_));
01282       ArrayRCP<Scalar> vptr = MVT::getValuesNonConst(lclMV_),
01283                       avptr = arcp_const_cast<Scalar>(MVT::getValues(A.lclMV_)),
01284                       bvptr = arcp_const_cast<Scalar>(MVT::getValues(B.lclMV_));
01285       for (size_t j=0; j < numVecs; ++j) {
01286         ArrayRCP<Scalar> vj =   getSubArrayRCP( vptr,j),
01287                         avj = A.getSubArrayRCP(avptr,j),
01288                         bvj = B.getSubArrayRCP(bvptr,j);
01289         MVT::initializeValues(b,myLen, 1, bvj, myLen);
01290         MVT::initializeValues(a,myLen, 1, avj, myLen);
01291         MVT::initializeValues(v,myLen, 1,  vj, myLen);
01292         MVT::GESUM(v,alpha,(const KMV&)a,beta,(const KMV&)b,gamma);
01293       }
01294     }
01295   }
01296 
01297 
01298   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01299   Teuchos::ArrayRCP<const Scalar>
01300   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01301   getData (size_t j) const
01302   {
01303     Teuchos::RCP<Node> node = MVT::getNode(lclMV_);
01304     KOKKOS_NODE_TRACE("MultiVector::getData()")
01305     return node->template viewBuffer<Scalar> (getLocalLength (), getSubArrayRCP (MVT::getValues(lclMV_), j));
01306   }
01307 
01308 
01309   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01310   Teuchos::ArrayRCP<Scalar>
01311   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01312   getDataNonConst(size_t j)
01313   {
01314     Teuchos::RCP<Node> node = MVT::getNode(lclMV_);
01315     KOKKOS_NODE_TRACE("MultiVector::getDataNonConst()")
01316     return node->template viewBufferNonConst<Scalar> (Kokkos::ReadWrite, getLocalLength (), getSubArrayRCP (MVT::getValuesNonConst (lclMV_), j));
01317   }
01318 
01319 
01320   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01321   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>&
01322   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01323   operator= (const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &source)
01324   {
01325     const std::string tfecfFuncName("operator=()");
01326     // Check for special case of this=Source, in which case we do nothing.
01327     if (this != &source) {
01328 #ifdef HAVE_TPETRA_DEBUG
01329       TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( !this->getMap()->isCompatible(*source.getMap()), std::invalid_argument,
01330           ": MultiVectors do not have compatible Maps:" << std::endl
01331           << "this->getMap(): " << std::endl << *this->getMap()
01332           << "source.getMap(): " << std::endl << *source.getMap() << std::endl);
01333 #else
01334       TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( getLocalLength() != source.getLocalLength(), std::invalid_argument,
01335           ": MultiVectors do not have the same local length.");
01336 #endif
01337       TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(source.getNumVectors() != getNumVectors(), std::invalid_argument,
01338           ": MultiVectors must have the same number of vectors.");
01339       Teuchos::RCP<Node> node = MVT::getNode (lclMV_);
01340       const size_t numVecs = getNumVectors();
01341       if (isConstantStride() && source.isConstantStride() && getLocalLength()==getStride() && source.getLocalLength()==source.getStride()) {
01342         // Both multivectors' data are stored contiguously, so we can
01343         // copy in one call.
01344         KOKKOS_NODE_TRACE("MultiVector::operator=()")
01345         node->template copyBuffers<Scalar>(getLocalLength()*numVecs, MVT::getValues(source.lclMV_), MVT::getValuesNonConst(lclMV_) );
01346       }
01347       else {
01348         // We have to copy the columns one at a time.
01349         for (size_t j=0; j < numVecs; ++j) {
01350           KOKKOS_NODE_TRACE("MultiVector::operator=()")
01351           node->template copyBuffers<Scalar>(getLocalLength(), source.getSubArrayRCP(MVT::getValues(source.lclMV_),j),
01352                                                                       getSubArrayRCP(MVT::getValuesNonConst(lclMV_),j) );
01353         }
01354       }
01355     }
01356     return(*this);
01357   }
01358 
01359 
01360   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01361   Teuchos::RCP<MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> >
01362   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01363   subCopy (const Teuchos::ArrayView<const size_t> &cols) const
01364   {
01365     using Teuchos::RCP;
01366     using Teuchos::rcp;
01367 
01368     TEUCHOS_TEST_FOR_EXCEPTION(cols.size() < 1, std::runtime_error,
01369         "Tpetra::MultiVector::subCopy(cols): cols must contain at least one column.");
01370     size_t numCopyVecs = cols.size();
01371     const bool zeroData = false;
01372     RCP<Node> node = MVT::getNode(lclMV_);
01373     RCP<MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> > mv;
01374     // mv is allocated with constant stride
01375     mv = rcp (new MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> (this->getMap (), numCopyVecs, zeroData));
01376     // copy data from *this into mv
01377     for (size_t j=0; j<numCopyVecs; ++j) {
01378       KOKKOS_NODE_TRACE("MultiVector::subCopy()")
01379       node->template copyBuffers<Scalar> (getLocalLength (),
01380                                           getSubArrayRCP (MVT::getValues (lclMV_), cols[j]),
01381                                           MVT::getValuesNonConst (mv->lclMV_, j));
01382     }
01383     return mv;
01384   }
01385 
01386 
01387   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01388   Teuchos::RCP<MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> >
01389   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01390   subCopy (const Teuchos::Range1D &colRng) const
01391   {
01392     using Teuchos::RCP;
01393     using Teuchos::rcp;
01394 
01395     TEUCHOS_TEST_FOR_EXCEPTION(colRng.size() == 0, std::runtime_error,
01396         "Tpetra::MultiVector::subCopy(Range1D): range must include at least one vector.");
01397     size_t numCopyVecs = colRng.size();
01398     const bool zeroData = false;
01399     RCP<Node> node = MVT::getNode(lclMV_);
01400     RCP<MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> > mv;
01401     // mv is allocated with constant stride
01402     mv = rcp (new MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> (this->getMap (), numCopyVecs, zeroData));
01403     // copy data from *this into mv
01404     for (size_t js=colRng.lbound(), jd=0; jd<numCopyVecs; ++jd, ++js) {
01405       KOKKOS_NODE_TRACE("MultiVector::subCopy()")
01406       node->template copyBuffers<Scalar> (getLocalLength (),
01407                                           getSubArrayRCP (MVT::getValues (lclMV_), js),
01408                                           MVT::getValuesNonConst (mv->lclMV_, jd));
01409     }
01410     return mv;
01411   }
01412 
01413 
01414   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01415   Teuchos::RCP<const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> >
01416   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01417   offsetView (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> >& subMap,
01418               size_t offset) const
01419   {
01420     using Teuchos::arcp_const_cast;
01421     using Teuchos::ArrayRCP;
01422     using Teuchos::RCP;
01423     using Teuchos::rcp;
01424 
01425     typedef const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> CMV;
01426     TEUCHOS_TEST_FOR_EXCEPTION( subMap->getNodeNumElements() + offset > this->getLocalLength(), std::runtime_error,
01427         "Tpetra::MultiVector::offsetView(subMap,offset): sizes are not sane.\noffset == " << offset << "\nsubMap: " << subMap->description() << "\nthis->rowMap: " << this->getMap()->description());
01428     const size_t numVecs = this->getNumVectors(),
01429                 myStride = MVT::getStride(lclMV_),
01430                   newLen = subMap->getNodeNumElements();
01431     // this is const, so the lclMV_ is const, so that we can only get const buffers
01432     // we will cast away the const; this is okay, because
01433     //   a) the constructor doesn't modify the data, and
01434     //   b) we are encapsulating in a const MV before returning
01435     ArrayRCP<const Scalar> cbuf = MVT::getValues(lclMV_);
01436     ArrayRCP<Scalar>      ncbuf = arcp_const_cast<Scalar> (cbuf);
01437     RCP<CMV> constViewMV;
01438     if (isConstantStride()) {
01439       // view goes from first entry of first vector to last entry of last vector
01440       ArrayRCP<Scalar> subdata = ncbuf.persistingView( offset, myStride * (numVecs-1) + newLen );
01441       constViewMV = rcp (new MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> (subMap, subdata, myStride, numVecs, COMPUTE_VIEW_CONSTRUCTOR));
01442     }
01443     else {
01444       // use same which index, but with an offset start pointer
01445       size_t maxSubVecIndex = *std::max_element(whichVectors_.begin(), whichVectors_.end());
01446       ArrayRCP<Scalar> subdata = ncbuf.persistingView( offset, myStride * maxSubVecIndex + newLen );
01447       constViewMV = rcp (new MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> (subMap, subdata, myStride, whichVectors_, COMPUTE_VIEW_CONSTRUCTOR));
01448     }
01449     return constViewMV;
01450   }
01451 
01452 
01453   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01454   Teuchos::RCP<MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> >
01455   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01456   offsetViewNonConst (const Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Node> >& subMap,
01457                       size_t offset)
01458   {
01459     using Teuchos::arcp_const_cast;
01460     using Teuchos::ArrayRCP;
01461     using Teuchos::RCP;
01462     using Teuchos::rcp;
01463 
01464     typedef MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> MV;
01465     TEUCHOS_TEST_FOR_EXCEPTION( subMap->getNodeNumElements() + offset > this->getLocalLength(), std::runtime_error,
01466       "Tpetra::MultiVector::offsetView(subMap,offset): sizes are not sane.\noffset == "
01467       << offset << "\nsubMap: " << subMap->description() << "\nthis->rowMap: "
01468       << this->getMap()->description());
01469     const size_t numVecs = this->getNumVectors(),
01470                 myStride = MVT::getStride(lclMV_),
01471                   newLen = subMap->getNodeNumElements();
01472     // this is const, so the lclMV_ is const, so that we can only get const buffers
01473     // we will cast away the const; this is okay, because
01474     //   a) the constructor doesn't modify the data, and
01475     //   b) we are encapsulating in a const MV before returning
01476     ArrayRCP<Scalar> buf = MVT::getValuesNonConst(lclMV_);
01477     RCP<MV> subViewMV;
01478     if (isConstantStride()) {
01479       // view goes from first entry of first vector to last entry of last vector
01480       ArrayRCP<Scalar> subdata = buf.persistingView( offset, myStride * (numVecs-1) + newLen );
01481       subViewMV = rcp (new MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> (subMap, subdata, myStride, numVecs, COMPUTE_VIEW_CONSTRUCTOR));
01482     }
01483     else {
01484       // use same which index, but with an offset start pointer
01485       size_t maxSubVecIndex = *std::max_element(whichVectors_.begin(), whichVectors_.end());
01486       ArrayRCP<Scalar> subdata = buf.persistingView( offset, myStride * maxSubVecIndex + newLen );
01487       subViewMV = rcp (new MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> (subMap, subdata, myStride, whichVectors_, COMPUTE_VIEW_CONSTRUCTOR));
01488     }
01489     return subViewMV;
01490   }
01491 
01492 
01493   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01494   Teuchos::RCP<const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> >
01495   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01496   subView (const ArrayView<const size_t> &cols) const
01497   {
01498     using Teuchos::arcp_const_cast;
01499     using Teuchos::Array;
01500     using Teuchos::ArrayRCP;
01501     using Teuchos::RCP;
01502     using Teuchos::rcp;
01503     using Teuchos::rcp_const_cast;
01504     typedef MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> MV;
01505 
01506     TEUCHOS_TEST_FOR_EXCEPTION(cols.size() == 0, std::runtime_error,
01507       "Tpetra::MultiVector::subView(ArrayView): range must include at least one vector.");
01508     // this is const, so the lclMV_ is const, so that we can only get const buffers
01509     // we will cast away the const; this is okay, because
01510     //   a) the constructor doesn't modify the data, and
01511     //   b) we are encapsulating in a const MV before returning
01512     const size_t myStride = MVT::getStride(lclMV_),
01513                  myLen    = MVT::getNumRows(lclMV_),
01514               numViewCols = cols.size();
01515     // use the smallest view possible of the buffer: from the first element of the minInd vector to the last element of the maxInd vector
01516     // this minimizes overlap between views, and keeps view of the minimum amount necessary, in order to allow node to achieve maximum efficiency.
01517     // adjust the indices appropriately; shift so that smallest index is 0
01518     ArrayRCP<const Scalar> cbuf = MVT::getValues(lclMV_);
01519     ArrayRCP<Scalar>      ncbuf = arcp_const_cast<Scalar> (cbuf);
01520     Array<size_t> newCols (numViewCols);
01521     size_t minInd = Teuchos::OrdinalTraits<size_t>::max();
01522     size_t maxInd = Teuchos::OrdinalTraits<size_t>::zero();
01523     if (isConstantStride()) {
01524       for (size_t j=0; j < numViewCols; ++j) {
01525         newCols[j] = cols[j];
01526         if (newCols[j] < minInd) minInd = newCols[j];
01527         if (maxInd < newCols[j]) maxInd = newCols[j];
01528       }
01529     }
01530     else {
01531       for (size_t j=0; j < numViewCols; ++j) {
01532         newCols[j] = whichVectors_[cols[j]];
01533         if (newCols[j] < minInd) minInd = newCols[j];
01534         if (maxInd < newCols[j]) maxInd = newCols[j];
01535       }
01536     }
01537     ArrayRCP<Scalar> minbuf = ncbuf.persistingView(minInd * myStride, myStride * (maxInd - minInd) + myLen);
01538     for (size_t j=0; j < numViewCols; ++j) {
01539       newCols[j] -= minInd;
01540     }
01541     RCP<const MV> constViewMV;
01542     return rcp_const_cast<const MV> (rcp (new MV (this->getMap (), minbuf, myStride, newCols (), COMPUTE_VIEW_CONSTRUCTOR)));
01543   }
01544 
01545 
01546   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01547   Teuchos::RCP<const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> >
01548   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01549   subView (const Teuchos::Range1D &colRng) const
01550   {
01551     using Teuchos::arcp_const_cast;
01552     using Teuchos::Array;
01553     using Teuchos::ArrayRCP;
01554     using Teuchos::RCP;
01555     using Teuchos::rcp;
01556     typedef MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> MV;
01557 
01558     TEUCHOS_TEST_FOR_EXCEPTION(colRng.size() == 0, std::runtime_error,
01559       "Tpetra::MultiVector::subView(Range1D): range must include at least one vector.");
01560     size_t numViewVecs = colRng.size();
01561     // this is const, so the lclMV_ is const, so that we can only get const buffers
01562     // we will cast away the const; this is okay, because
01563     //   a) the constructor doesn't modify the data, and
01564     //   b) we are encapsulating in a const MV before returning
01565     ArrayRCP<const Scalar> cbuf = MVT::getValues(lclMV_);
01566     ArrayRCP<Scalar>      ncbuf = arcp_const_cast<Scalar>(cbuf);
01567     // resulting MultiVector is constant stride only if *this is
01568     if (isConstantStride()) {
01569       // view goes from first entry of first vector to last entry of last vector
01570       ArrayRCP<Scalar> subdata = ncbuf.persistingView( MVT::getStride(lclMV_) * colRng.lbound(),
01571                                                        MVT::getStride(lclMV_) * (numViewVecs-1) + getLocalLength() );
01572       return rcp (new MV (this->getMap (), subdata, MVT::getStride (lclMV_), numViewVecs, COMPUTE_VIEW_CONSTRUCTOR));
01573     }
01574     // otherwise, use a subset of this whichVectors_ to construct new multivector
01575     Array<size_t> whchvecs( whichVectors_.begin()+colRng.lbound(), whichVectors_.begin()+colRng.ubound()+1 );
01576     return rcp (new MV (this->getMap (), ncbuf, MVT::getStride (lclMV_), whchvecs, COMPUTE_VIEW_CONSTRUCTOR));
01577   }
01578 
01579 
01580   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01581   Teuchos::RCP<MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> >
01582   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01583   subViewNonConst (const ArrayView<const size_t> &cols)
01584   {
01585     using Teuchos::as;
01586     using Teuchos::arcp_const_cast;
01587     using Teuchos::Array;
01588     using Teuchos::ArrayRCP;
01589     using Teuchos::RCP;
01590     using Teuchos::rcp;
01591     typedef MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> MV;
01592 
01593     TEUCHOS_TEST_FOR_EXCEPTION(cols.size() == 0, std::runtime_error,
01594       "Tpetra::MultiVector::subViewNonConst(ArrayView): range must include at least one vector.");
01595     if (isConstantStride()) {
01596       return rcp (new MV (this->getMap (), MVT::getValuesNonConst (lclMV_), MVT::getStride (lclMV_), cols, COMPUTE_VIEW_CONSTRUCTOR));
01597     }
01598     // else, lookup current whichVectors_ using cols
01599     Array<size_t> newcols(cols.size());
01600     for (size_t j = 0; j < as<size_t> (cols.size ()); ++j) {
01601       newcols[j] = whichVectors_[cols[j]];
01602     }
01603     return rcp (new MV (this->getMap (), MVT::getValuesNonConst (lclMV_), MVT::getStride (lclMV_), newcols (), COMPUTE_VIEW_CONSTRUCTOR));
01604   }
01605 
01606 
01607   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01608   Teuchos::RCP<MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> >
01609   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01610   subViewNonConst (const Teuchos::Range1D &colRng)
01611   {
01612     using Teuchos::as;
01613     using Teuchos::arcp_const_cast;
01614     using Teuchos::Array;
01615     using Teuchos::ArrayRCP;
01616     using Teuchos::RCP;
01617     using Teuchos::rcp;
01618     typedef MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> MV;
01619 
01620     TEUCHOS_TEST_FOR_EXCEPTION(colRng.size() == 0, std::runtime_error,
01621       "Tpetra::MultiVector::subViewNonConst(Range1D): range must include at least one vector.");
01622     size_t numViewVecs = colRng.size();
01623     // resulting MultiVector is constant stride only if *this is
01624     if (isConstantStride ()) {
01625       // view goes from first entry of first vector to last entry of last vector
01626       const size_t stride = MVT::getStride(lclMV_);
01627       ArrayRCP<Scalar> data = MVT::getValuesNonConst(lclMV_);
01628       ArrayRCP<Scalar> subdata = data.persistingView( stride * colRng.lbound(),
01629                                                       stride * (numViewVecs-1) + getLocalLength() );
01630       return rcp (new MV (this->getMap (), subdata, stride, numViewVecs, COMPUTE_VIEW_CONSTRUCTOR));
01631     }
01632     // otherwise, use a subset of this whichVectors_ to construct new multivector
01633     Array<size_t> whchvecs( whichVectors_.begin()+colRng.lbound(), whichVectors_.begin()+colRng.ubound()+1 );
01634     const size_t stride = MVT::getStride(lclMV_);
01635     ArrayRCP<Scalar> data = MVT::getValuesNonConst(lclMV_);
01636     return rcp (new MV (this->getMap (), data, stride, whchvecs, COMPUTE_VIEW_CONSTRUCTOR));
01637   }
01638 
01639 
01640   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01641   Teuchos::RCP<const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node> >
01642   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01643   getVector (size_t j) const
01644   {
01645     using Teuchos::arcp_const_cast;
01646     using Teuchos::ArrayRCP;
01647     using Teuchos::rcp;
01648     using Teuchos::rcp_const_cast;
01649     typedef Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node> V;
01650 
01651 #ifdef HAVE_TPETRA_DEBUG
01652     TEUCHOS_TEST_FOR_EXCEPTION( vectorIndexOutOfRange(j), std::runtime_error,
01653         "Tpetra::MultiVector::getVector(j): index j (== " << j << ") exceeds valid column range for this multivector.");
01654 #endif
01655     // this is const, so lclMV_ is const, so we get const buff
01656     // it is safe to cast away the const because we will wrap it in a const Vector below
01657     ArrayRCP<Scalar> ncbuff;
01658     if (getLocalLength() > 0) {
01659       ArrayRCP<const Scalar> cbuff = getSubArrayRCP(MVT::getValues(lclMV_),j);
01660       ncbuff = arcp_const_cast<Scalar>(cbuff);
01661     }
01662     return rcp_const_cast<const V> (rcp (new V (this->getMap (), ncbuff, COMPUTE_VIEW_CONSTRUCTOR)));
01663   }
01664 
01665 
01666   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01667   Teuchos::RCP<Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node> >
01668   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getVectorNonConst(size_t j)
01669   {
01670     using Teuchos::ArrayRCP;
01671     using Teuchos::rcp;
01672     typedef Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node> V;
01673 
01674 #ifdef HAVE_TPETRA_DEBUG
01675     TEUCHOS_TEST_FOR_EXCEPTION( vectorIndexOutOfRange(j), std::runtime_error,
01676         "Tpetra::MultiVector::getVectorNonConst(j): index j (== " << j << ") exceeds valid column range for this multivector.");
01677 #endif
01678     ArrayRCP<Scalar> ncbuff;
01679     if (getLocalLength() > 0) {
01680       ncbuff = getSubArrayRCP (MVT::getValuesNonConst (lclMV_), j);
01681     }
01682     return rcp (new V (this->getMap (), ncbuff, COMPUTE_VIEW_CONSTRUCTOR));
01683   }
01684 
01685 
01686   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01687   void
01688   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01689   get1dCopy (Teuchos::ArrayView<Scalar> A, size_t LDA) const
01690   {
01691     using Teuchos::ArrayRCP;
01692     using Teuchos::ArrayView;
01693     using Teuchos::RCP;
01694     using Teuchos::rcp;
01695 
01696     const std::string tfecfFuncName("get1dCopy(A,LDA)");
01697     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(LDA < getLocalLength(), std::runtime_error,
01698       ": specified stride is not large enough for local vector length.");
01699     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(Teuchos::as<size_t>(A.size()) < LDA*(getNumVectors()-1)+getLocalLength(), std::runtime_error,
01700       ": specified stride/storage is not large enough for the number of vectors.");
01701     RCP<Node> node = MVT::getNode(lclMV_);
01702     const size_t myStride = MVT::getStride(lclMV_),
01703                   numCols = getNumVectors(),
01704                   myLen   = getLocalLength();
01705     if (myLen > 0) {
01706       ArrayRCP<const Scalar> mydata = MVT::getValues(lclMV_);
01707       KOKKOS_NODE_TRACE("MultiVector::getVectorNonConst()")
01708       ArrayRCP<const Scalar> myview = node->template viewBuffer<Scalar>(myStride*(numCols-1)+myLen,mydata);
01709       typename ArrayView<Scalar>::iterator Aptr = A.begin();
01710       for (size_t j=0; j<numCols; j++) {
01711         ArrayRCP<const Scalar> myviewj = getSubArrayRCP(myview,j);
01712         std::copy(myviewj,myviewj+myLen,Aptr);
01713         Aptr += LDA;
01714       }
01715       myview = Teuchos::null;
01716       mydata = Teuchos::null;
01717     }
01718   }
01719 
01720 
01721   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01722   void
01723   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01724   get2dCopy (Teuchos::ArrayView<const Teuchos::ArrayView<Scalar> > ArrayOfPtrs) const
01725   {
01726     using Teuchos::as;
01727     using Teuchos::ArrayRCP;
01728     using Teuchos::ArrayView;
01729     using Teuchos::RCP;
01730     using Teuchos::rcp;
01731 
01732     const std::string tfecfFuncName("get2dCopy(ArrayOfPtrs)");
01733     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(Teuchos::as<size_t>(ArrayOfPtrs.size()) != getNumVectors(), std::runtime_error,
01734         ": Array of pointers must contain as many pointers as the MultiVector has rows.");
01735     RCP<Node> node = MVT::getNode(lclMV_);
01736     const size_t numCols = getNumVectors(),
01737                    myLen = getLocalLength();
01738     if (myLen > 0) {
01739       ArrayRCP<const Scalar> mybuff = MVT::getValues(lclMV_);
01740       KOKKOS_NODE_TRACE("MultiVector::get2dCopy()")
01741       ArrayRCP<const Scalar> myview = node->template viewBuffer<Scalar>(mybuff.size(), mybuff);
01742       for (size_t j=0; j<numCols; ++j) {
01743 #ifdef HAVE_TPETRA_DEBUG
01744         TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(as<size_t>(ArrayOfPtrs[j].size()) != getLocalLength(), std::runtime_error,
01745           ": The ArrayView provided in ArrayOfPtrs[" << j << "] was not large enough to contain the local entries.");
01746 #endif
01747         ArrayRCP<const Scalar> myviewj = getSubArrayRCP(myview,j);
01748         std::copy(myviewj,myviewj+myLen,ArrayOfPtrs[j].begin());
01749       }
01750       myview = Teuchos::null;
01751       mybuff = Teuchos::null;
01752     }
01753   }
01754 
01755 
01756   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01757   Teuchos::ArrayRCP<const Scalar>
01758   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::get1dView () const
01759   {
01760     TEUCHOS_TEST_FOR_EXCEPTION(!isConstantStride(), std::runtime_error,
01761       "Tpetra::MultiVector::get1dView() requires that this MultiVector have constant stride.");
01762     Teuchos::RCP<Node> node = MVT::getNode(lclMV_);
01763     KOKKOS_NODE_TRACE("MultiVector::get1dView()")
01764     return node->template viewBuffer<Scalar>( getStride()*(getNumVectors()-1)+getLocalLength(), MVT::getValues(lclMV_) );
01765   }
01766 
01767 
01768   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01769   Teuchos::ArrayRCP<Scalar>
01770   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::get1dViewNonConst ()
01771   {
01772     TEUCHOS_TEST_FOR_EXCEPTION(!isConstantStride(), std::runtime_error,
01773       "Tpetra::MultiVector::get1dViewNonConst(): requires that this MultiVector have constant stride.");
01774     Teuchos::RCP<Node> node = MVT::getNode(lclMV_);
01775     KOKKOS_NODE_TRACE("MultiVector::get1dViewNonConst()")
01776     return node->template viewBufferNonConst<Scalar>( Kokkos::ReadWrite, getStride()*(getNumVectors()-1)+getLocalLength(), MVT::getValuesNonConst(lclMV_) );
01777   }
01778 
01779 
01780   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01781   Teuchos::ArrayRCP<Teuchos::ArrayRCP<Scalar> >
01782   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::get2dViewNonConst()
01783   {
01784     using Teuchos::arcp;
01785     using Teuchos::ArrayRCP;
01786     using Teuchos::RCP;
01787 
01788     RCP<Node> node = MVT::getNode(lclMV_);
01789     ArrayRCP<ArrayRCP<Scalar> > views = arcp<ArrayRCP<Scalar> > (getNumVectors ());
01790     if (isConstantStride ()) {
01791       const size_t myStride = MVT::getStride(lclMV_),
01792                     numCols = getNumVectors(),
01793                     myLen   = getLocalLength();
01794       if (myLen > 0) {
01795         KOKKOS_NODE_TRACE("MultiVector::get2dViewNonConst()")
01796         ArrayRCP<Scalar> myview = node->template viewBufferNonConst<Scalar>(Kokkos::ReadWrite,myStride*(numCols-1)+myLen,MVT::getValuesNonConst(lclMV_));
01797         for (size_t j=0; j<numCols; ++j) {
01798           views[j] = myview.persistingView(0,myLen);
01799           myview += myStride;
01800         }
01801       }
01802     }
01803     else {
01804       const size_t myStride = MVT::getStride(lclMV_),
01805                     numCols = MVT::getNumCols(lclMV_),
01806                      myCols = getNumVectors(),
01807                      myLen  = MVT::getNumRows(lclMV_);
01808       if (myLen > 0) {
01809         KOKKOS_NODE_TRACE("MultiVector::get2dViewNonConst()")
01810         ArrayRCP<Scalar> myview = node->template viewBufferNonConst<Scalar>(Kokkos::ReadWrite,myStride*(numCols-1)+myLen,MVT::getValuesNonConst(lclMV_));
01811         for (size_t j=0; j<myCols; ++j) {
01812           views[j] = myview.persistingView(whichVectors_[j]*myStride,myLen);
01813         }
01814       }
01815     }
01816     return views;
01817   }
01818 
01819 
01820   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01821   Teuchos::ArrayRCP<Teuchos::ArrayRCP<const Scalar> >
01822   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::get2dView() const
01823   {
01824     using Teuchos::arcp;
01825     using Teuchos::ArrayRCP;
01826     using Teuchos::RCP;
01827 
01828     RCP<Node> node = MVT::getNode(lclMV_);
01829     ArrayRCP< ArrayRCP<const Scalar> > views = arcp<ArrayRCP<const Scalar> >(getNumVectors());
01830     if (isConstantStride()) {
01831       const size_t myStride = MVT::getStride(lclMV_),
01832                     numCols = getNumVectors(),
01833                     myLen   = getLocalLength();
01834       if (myLen > 0) {
01835         KOKKOS_NODE_TRACE("MultiVector::get2dView()")
01836         ArrayRCP<const Scalar> myview = node->template viewBuffer<Scalar>(myStride*(numCols-1)+myLen,MVT::getValues(lclMV_));
01837         for (size_t j=0; j<numCols; ++j) {
01838           views[j] = myview.persistingView(0,myLen);
01839           myview += myStride;
01840         }
01841       }
01842     }
01843     else {
01844       const size_t myStride = MVT::getStride(lclMV_),
01845                     numCols = MVT::getNumCols(lclMV_),
01846                      myCols = getNumVectors(),
01847                      myLen  = MVT::getNumRows(lclMV_);
01848       if (myLen > 0) {
01849         KOKKOS_NODE_TRACE("MultiVector::get2dView()")
01850         ArrayRCP<const Scalar> myview = node->template viewBuffer<Scalar>(myStride*(numCols-1)+myLen,MVT::getValues(lclMV_));
01851         for (size_t j=0; j<myCols; ++j) {
01852           views[j] = myview.persistingView(whichVectors_[j]*myStride,myLen);
01853         }
01854       }
01855     }
01856     return views;
01857   }
01858 
01859 
01860   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01861   void
01862   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01863   multiply (Teuchos::ETransp transA,
01864             Teuchos::ETransp transB,
01865             const Scalar &alpha,
01866             const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& A,
01867             const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& B,
01868             const Scalar &beta)
01869   {
01870     using Teuchos::NO_TRANS;      // enums
01871     using Teuchos::TRANS;
01872     using Teuchos::CONJ_TRANS;
01873     using Teuchos::null;
01874     using Teuchos::ScalarTraits;  // traits
01875     using Teuchos::as;
01876     using Teuchos::RCP;
01877     using Teuchos::rcp;
01878     typedef MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> MV;
01879 
01880     // This routine performs a variety of matrix-matrix multiply operations, interpreting
01881     // the MultiVector (this-aka C , A and B) as 2D matrices.  Variations are due to
01882     // the fact that A, B and C can be local replicated or global distributed
01883     // MultiVectors and that we may or may not operate with the transpose of
01884     // A and B.  Possible cases are:
01885     //                                       Num
01886     //      OPERATIONS                        cases  Notes
01887     //  1) C(local) = A^X(local) * B^X(local)  4    (X=Trans or Not, No comm needed)
01888     //  2) C(local) = A^T(distr) * B  (distr)  1    (2D dot product, replicate C)
01889     //  3) C(distr) = A  (distr) * B^X(local)  2    (2D vector update, no comm needed)
01890     //
01891     // The following operations are not meaningful for 1D distributions:
01892     //
01893     // u1) C(local) = A^T(distr) * B^T(distr)  1
01894     // u2) C(local) = A  (distr) * B^X(distr)  2
01895     // u3) C(distr) = A^X(local) * B^X(local)  4
01896     // u4) C(distr) = A^X(local) * B^X(distr)  4
01897     // u5) C(distr) = A^T(distr) * B^X(local)  2
01898     // u6) C(local) = A^X(distr) * B^X(local)  4
01899     // u7) C(distr) = A^X(distr) * B^X(local)  4
01900     // u8) C(local) = A^X(local) * B^X(distr)  4
01901     //
01902     // Total of 32 case (2^5).
01903 
01904     const std::string errPrefix("Tpetra::MultiVector::multiply(transOpA,transOpB,alpha,A,B,beta): ");
01905 
01906     TEUCHOS_TEST_FOR_EXCEPTION( ScalarTraits<Scalar>::isComplex && (transA == TRANS || transB == TRANS), std::invalid_argument,
01907         errPrefix << "non-conjugate transpose not supported for complex types.");
01908     transA = (transA == NO_TRANS ? NO_TRANS : CONJ_TRANS);
01909     transB = (transB == NO_TRANS ? NO_TRANS : CONJ_TRANS);
01910 
01911     // Compute effective dimensions, w.r.t. transpose operations on
01912     size_t A_nrows = (transA==CONJ_TRANS) ? A.getNumVectors() : A.getLocalLength();
01913     size_t A_ncols = (transA==CONJ_TRANS) ? A.getLocalLength() : A.getNumVectors();
01914     size_t B_nrows = (transB==CONJ_TRANS) ? B.getNumVectors() : B.getLocalLength();
01915     size_t B_ncols = (transB==CONJ_TRANS) ? B.getLocalLength() : B.getNumVectors();
01916 
01917     Scalar beta_local = beta; // local copy of beta; might be reassigned below
01918 
01919     TEUCHOS_TEST_FOR_EXCEPTION( getLocalLength() != A_nrows || getNumVectors() != B_ncols || A_ncols != B_nrows, std::runtime_error,
01920         errPrefix << "dimension of *this, op(A) and op(B) must be consistent.");
01921 
01922     bool A_is_local = !A.isDistributed();
01923     bool B_is_local = !B.isDistributed();
01924     bool C_is_local = !this->isDistributed();
01925     bool Case1 = ( C_is_local &&  A_is_local &&  B_is_local);                                           // Case 1: C(local) = A^X(local) * B^X(local)
01926     bool Case2 = ( C_is_local && !A_is_local && !B_is_local && transA==CONJ_TRANS && transB==NO_TRANS); // Case 2: C(local) = A^T(distr) * B  (distr)
01927     bool Case3 = (!C_is_local && !A_is_local &&  B_is_local && transA==NO_TRANS  );                     // Case 3: C(distr) = A  (distr) * B^X(local)
01928 
01929     // Test that we are considering a meaningful cases
01930     TEUCHOS_TEST_FOR_EXCEPTION( !Case1 && !Case2 && !Case3, std::runtime_error,
01931         errPrefix << "multiplication of op(A) and op(B) into *this is not a supported use case.");
01932 
01933     if (beta != ScalarTraits<Scalar>::zero() && Case2)
01934     {
01935       // if Case2, then C is local and contributions must be summed across all nodes
01936       // however, if beta != 0, then accumulate beta*C into the sum
01937       // when summing across all nodes, we only want to accumulate this once, so
01938       // set beta == 0 on all nodes except node 0
01939       int MyPID = this->getMap()->getComm()->getRank();
01940       if (MyPID!=0) beta_local = ScalarTraits<Scalar>::zero();
01941     }
01942 
01943     // Check if A, B, C have constant stride, if not then make temp copy (strided)
01944     RCP<const MV> Atmp, Btmp;
01945     RCP<MV>       Ctmp;
01946     if (isConstantStride() == false) Ctmp = rcp (new MV (*this));
01947     else Ctmp = rcp(this,false);
01948 
01949     if (A.isConstantStride() == false) Atmp = rcp (new MV (A));
01950     else Atmp = rcp(&A,false);
01951 
01952     if (B.isConstantStride() == false) Btmp = rcp (new MV (B));
01953     else Btmp = rcp(&B,false);
01954 
01955 #ifdef HAVE_TEUCHOS_DEBUG
01956     TEUCHOS_TEST_FOR_EXCEPTION(!Ctmp->isConstantStride() || !Btmp->isConstantStride() || !Atmp->isConstantStride(), std::logic_error,
01957         errPrefix << "failed making temporary strided copies of input multivectors.");
01958 #endif
01959 
01960     KMV &C_mv = Ctmp->lclMV_;
01961     {
01962       // get local multivectors
01963       const KMV &A_mv = Atmp->lclMV_;
01964       const KMV &B_mv = Btmp->lclMV_;
01965       // do the multiply (GEMM)
01966       MVT::GEMM(C_mv,transA,transB,alpha,A_mv,B_mv,beta_local);
01967     }
01968 
01969     // Dispose of (possibly) extra copies of A, B
01970     Atmp = null;
01971     Btmp = null;
01972 
01973     RCP<Node> node = MVT::getNode(lclMV_);
01974     // If *this was not strided, copy the data from the strided version and then delete it
01975     if (! isConstantStride ()) {
01976       // *this is not strided, we must put data from Ctmp into *this
01977       TEUCHOS_TEST_FOR_EXCEPT(&C_mv != &lclMV_);
01978       const size_t numVecs = MVT::getNumCols(lclMV_);
01979       for (size_t j=0; j < numVecs; ++j) {
01980         KOKKOS_NODE_TRACE("MultiVector::multiply()")
01981         node->template copyBuffers<Scalar>(getLocalLength(),MVT::getValues(C_mv,j),MVT::getValuesNonConst(lclMV_,whichVectors_[j]));
01982       }
01983     }
01984 
01985     // If Case 2 then sum up *this and distribute it to all processors.
01986     if (Case2) {
01987       this->reduce();
01988     }
01989   }
01990 
01991   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
01992   void
01993   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
01994   elementWiseMultiply (Scalar scalarAB,
01995                        const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& A,
01996                        const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& B,
01997                        Scalar scalarThis)
01998   {
01999     using Teuchos::arcp_const_cast;
02000 
02001     const std::string tfecfFuncName("elementWiseMultiply()");
02002 
02003 #ifdef HAVE_TPETRA_DEBUG
02004     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( !this->getMap()->isCompatible(*A.getMap()) || !this->getMap()->isCompatible(*B.getMap()), std::runtime_error,
02005       ": MultiVectors do not have compatible Maps:" << std::endl
02006       << "this->getMap(): " << std::endl << *this->getMap()
02007       << "A.getMap(): " << std::endl << *A.getMap() << std::endl
02008       << "B.getMap(): " << std::endl << *B.getMap() << std::endl);
02009 #else
02010     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( getLocalLength() != A.getLocalLength() || getLocalLength() != B.getLocalLength(), std::runtime_error,
02011         ": MultiVectors do not have the same local length.");
02012 #endif
02013     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(B.getNumVectors() != this->getNumVectors(), std::runtime_error,
02014         ": MultiVectors 'this' and B must have the same number of vectors.");
02015     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(A.getNumVectors() != 1, std::runtime_error,
02016         ": MultiVectors A must have just 1 vector.");
02017     try {
02018       MVT::ElemMult(lclMV_,scalarThis,scalarAB,(const KMV &)A.lclMV_,(const KMV &)B.lclMV_);
02019     }
02020     catch (std::runtime_error &e) {
02021       TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true,std::runtime_error,
02022           ": caught exception from Kokkos:" << std::endl
02023           << e.what() << std::endl);
02024     }
02025   }
02026 
02027   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02028   void
02029   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::reduce()
02030   {
02031     using Teuchos::Array;
02032     using Teuchos::ArrayView;
02033     using Teuchos::ArrayRCP;
02034     using Teuchos::as;
02035     using Teuchos::RCP;
02036     using Teuchos::reduceAll;
02037     using Teuchos::REDUCE_SUM;
02038 
02039     // This method should be called only for locally replicated
02040     // MultiVectors (! isDistributed ()).
02041     TEUCHOS_TEST_FOR_EXCEPTION(this->isDistributed (), std::runtime_error,
02042       "Tpetra::MultiVector::reduce() should only be called with locally "
02043       "replicated or otherwise not distributed MultiVector objects.");
02044     RCP<const Teuchos::Comm<int> > comm = this->getMap()->getComm();
02045     if (comm->getSize() == 1) return;
02046     RCP<Node> node = MVT::getNode(lclMV_);
02047     // sum the data across all multivectors
02048     // need to have separate packed buffers for send and receive
02049     // if we're packed, we'll just set the receive buffer as our data, the send as a copy
02050     // if we're not packed, we'll use allocated buffers for both.
02051     ArrayView<Scalar> target;
02052     const size_t myStride = MVT::getStride(lclMV_),
02053                  numCols  = MVT::getNumCols(lclMV_),
02054                  myLen    = MVT::getNumRows(lclMV_);
02055     Array<Scalar> sourceBuffer(numCols*myLen), tmparr(0);
02056     bool packed = isConstantStride() && (myStride == myLen);
02057     KOKKOS_NODE_TRACE("MultiVector::reduce()")
02058     ArrayRCP<Scalar> bufView = node->template viewBufferNonConst<Scalar>(
02059                                           Kokkos::ReadWrite,myStride*(numCols-1)+myLen,
02060                                           MVT::getValuesNonConst(lclMV_) );
02061     if (packed) {
02062       // copy data from view to sourceBuffer, reduce into view below
02063       target = bufView(0,myLen*numCols);
02064       std::copy(target.begin(),target.end(),sourceBuffer.begin());
02065     }
02066     else {
02067       // copy data into sourceBuffer, reduce from sourceBuffer into tmparr below, copy back to view after that
02068       tmparr.resize(myLen*numCols);
02069       Scalar *sptr = sourceBuffer.getRawPtr();
02070       ArrayRCP<const Scalar> vptr = bufView;
02071       for (size_t j=0; j<numCols; ++j) {
02072         std::copy(vptr,vptr+myLen,sptr);
02073         sptr += myLen;
02074         vptr += myStride;
02075       }
02076       target = tmparr();
02077     }
02078     // reduce
02079     reduceAll<int,Scalar> (*comm, REDUCE_SUM, as<int> (numCols*myLen),
02080                            sourceBuffer.getRawPtr (), target.getRawPtr ());
02081     if (! packed) {
02082       // copy tmparr back into view
02083       const Scalar *sptr = tmparr.getRawPtr();
02084       ArrayRCP<Scalar> vptr = bufView;
02085       for (size_t j=0; j<numCols; ++j) {
02086         std::copy (sptr, sptr+myLen, vptr);
02087         sptr += myLen;
02088         vptr += myStride;
02089       }
02090     }
02091     bufView = Teuchos::null;
02092   }
02093 
02094 
02095   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02096   void
02097   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
02098   replaceLocalValue (LocalOrdinal MyRow,
02099                      size_t VectorIndex,
02100                      const Scalar &ScalarValue)
02101   {
02102 #ifdef HAVE_TPETRA_DEBUG
02103     const std::string tfecfFuncName("replaceLocalValue()");
02104     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(MyRow < this->getMap()->getMinLocalIndex() || MyRow > this->getMap()->getMaxLocalIndex(),
02105       std::runtime_error, ": row index is invalid.");
02106     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( vectorIndexOutOfRange(VectorIndex),
02107       std::runtime_error, ": vector index is invalid.");
02108 #endif
02109     getDataNonConst(VectorIndex)[MyRow] = ScalarValue;
02110   }
02111 
02112 
02113   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02114   void
02115   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
02116   sumIntoLocalValue (LocalOrdinal MyRow,
02117                      size_t VectorIndex,
02118                      const Scalar &ScalarValue)
02119   {
02120 #ifdef HAVE_TPETRA_DEBUG
02121     const std::string tfecfFuncName("sumIntoLocalValue");
02122     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(MyRow < this->getMap()->getMinLocalIndex() || MyRow > this->getMap()->getMaxLocalIndex(), std::runtime_error,
02123       ": row index is invalid.");
02124     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( vectorIndexOutOfRange(VectorIndex), std::runtime_error,
02125       ": vector index is invalid.");
02126 #endif
02127     getDataNonConst(VectorIndex)[MyRow] += ScalarValue;
02128   }
02129 
02130 
02131   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02132   void
02133   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
02134   replaceGlobalValue (GlobalOrdinal GlobalRow, size_t VectorIndex, const Scalar &ScalarValue)
02135   {
02136     LocalOrdinal MyRow = this->getMap()->getLocalElement(GlobalRow);
02137 #ifdef HAVE_TPETRA_DEBUG
02138     const std::string tfecfFuncName("replaceGlobalValue()");
02139     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(MyRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid(), std::runtime_error,
02140         ": row index is not present on this processor.");
02141     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( vectorIndexOutOfRange(VectorIndex), std::runtime_error,
02142         ": vector index is invalid.");
02143 #endif
02144     getDataNonConst(VectorIndex)[MyRow] = ScalarValue;
02145   }
02146 
02147   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02148   void MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::sumIntoGlobalValue(GlobalOrdinal GlobalRow, size_t VectorIndex, const Scalar &ScalarValue)
02149   {
02150     LocalOrdinal MyRow = this->getMap()->getLocalElement(GlobalRow);
02151 #ifdef HAVE_TEUCHOS_DEBUG
02152     const std::string tfecfFuncName("sumIntoGlobalValue()");
02153     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(MyRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid(), std::runtime_error,
02154       ": row index is not present on this processor.");
02155     TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( vectorIndexOutOfRange(VectorIndex), std::runtime_error,
02156       ": vector index is invalid.");
02157 #endif
02158     getDataNonConst(VectorIndex)[MyRow] += ScalarValue;
02159   }
02160 
02161   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02162   template <class T>
02163   Teuchos::ArrayRCP<T>
02164   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
02165   getSubArrayRCP (Teuchos::ArrayRCP<T> arr,
02166                   size_t j) const
02167   {
02168     const size_t stride = MVT::getStride (lclMV_);
02169     const size_t myLen = getLocalLength ();
02170     Teuchos::ArrayRCP<T> ret;
02171     if (isConstantStride()) {
02172       ret = arr.persistingView (j*stride, myLen);
02173     }
02174     else {
02175       ret = arr.persistingView (whichVectors_[j]*stride, myLen);
02176     }
02177     return ret;
02178   }
02179 
02180   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02181   const Kokkos::MultiVector<Scalar,Node>&
02182   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getLocalMV() const {
02183     return lclMV_;
02184   }
02185 
02186   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02187   Kokkos::MultiVector<Scalar,Node>&
02188   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getLocalMVNonConst() {
02189     return lclMV_;
02190   }
02191 
02192   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02193   std::string
02194   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::description() const
02195   {
02196     std::ostringstream oss;
02197     oss << Teuchos::Describable::description();
02198     oss << "{length = "<<getGlobalLength()
02199         << ", getNumVectors = "<<getNumVectors()
02200         << ", isConstantStride() = "<<isConstantStride()
02201         << "}";
02202     return oss.str();
02203   }
02204 
02205   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02206   void
02207   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
02208   describe (Teuchos::FancyOStream &out,
02209             const Teuchos::EVerbosityLevel verbLevel) const
02210   {
02211     using Teuchos::ArrayRCP;
02212     using Teuchos::RCP;
02213     using Teuchos::VERB_DEFAULT;
02214     using Teuchos::VERB_NONE;
02215     using Teuchos::VERB_LOW;
02216     using Teuchos::VERB_MEDIUM;
02217     using Teuchos::VERB_HIGH;
02218     using Teuchos::VERB_EXTREME;
02219     using std::endl;
02220     using std::setw;
02221 
02222     // Set default verbosity if applicable.
02223     const Teuchos::EVerbosityLevel vl =
02224       (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
02225 
02226     RCP<const Teuchos::Comm<int> > comm = this->getMap()->getComm();
02227     const int myImageID = comm->getRank();
02228     const int numImages = comm->getSize();
02229 
02230     if (vl != VERB_NONE) {
02231       // Don't set the tab level unless we're printing something.
02232       Teuchos::OSTab tab (out);
02233 
02234       if (myImageID == 0) { // >= VERB_LOW prints description()
02235         out << this->description() << endl;
02236       }
02237       for (int imageCtr = 0; imageCtr < numImages; ++imageCtr) {
02238         if (myImageID == imageCtr) {
02239           if (vl != VERB_LOW) {
02240             // At verbosity > VERB_LOW, each process prints something.
02241             out << "Process " << myImageID << ":" << endl;
02242 
02243             Teuchos::OSTab procTab (out);
02244             // >= VERB_MEDIUM: print the local vector length.
02245             out << "local length=" << getLocalLength();
02246             if (vl != VERB_MEDIUM) {
02247               // >= VERB_HIGH: print isConstantStride() and getStride()
02248               if (isConstantStride()) {
02249                 out << ", constant stride=" << getStride() << endl;
02250               }
02251               else {
02252                 out << ", not constant stride" << endl;
02253               }
02254               if (vl == VERB_EXTREME) {
02255                 // VERB_EXTREME: print all the values in the multivector.
02256                 out << "Values:" << endl;
02257                 ArrayRCP<ArrayRCP<const Scalar> > X = this->get2dView();
02258                 for (size_t i = 0; i < getLocalLength(); ++i) {
02259                   for (size_t j = 0; j < getNumVectors(); ++j) {
02260                     out << X[j][i];
02261                     if (j < getNumVectors() - 1) {
02262                       out << " ";
02263                     }
02264                   } // for each column
02265                   out << endl;
02266                 } // for each row
02267               } // if vl == VERB_EXTREME
02268             } // if (vl != VERB_MEDIUM)
02269             else { // vl == VERB_LOW
02270               out << endl;
02271             }
02272           } // if vl != VERB_LOW
02273         } // if it is my process' turn to print
02274         comm->barrier();
02275       } // for each process in the communicator
02276     } // if vl != VERB_NONE
02277   }
02278 
02279   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02280   void
02281   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
02282   createViews() const
02283   {
02284     if (! createViewsRaisedEfficiencyWarning_) {
02285       TPETRA_EFFICIENCY_WARNING(! cview_.is_null (), std::runtime_error,
02286         "::createViews(): The const view "
02287         "has already been created and is therefore not null.  (For"
02288         "Tpetra developers: cview_.total_count() = " << cview_.total_count ()
02289         << ".  This "
02290         "means that MultiVector is either creating a view unnecessarily, or "
02291         "hanging on to a view beyond its needed scope (since releaseViews() "
02292         "should always release both the const and nonconst views).  This probably "
02293         "does not affect correctness, it but does affect total memory use.  "
02294         "We will only report this warning once per (Multi)Vector instance.  "
02295         "Please report this performance bug to the Tpetra developers.");
02296       createViewsRaisedEfficiencyWarning_ = true;
02297     }
02298 
02299     Teuchos::RCP<Node> node = this->getMap ()->getNode ();
02300     if (cview_.is_null () && getLocalLength () > 0) {
02301       Teuchos::ArrayRCP<const Scalar> buff = MVT::getValues (lclMV_);
02302       KOKKOS_NODE_TRACE("MultiVector::createViews()")
02303       cview_ = node->template viewBuffer<Scalar> (buff.size (), buff);
02304     }
02305   }
02306 
02307   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02308   void
02309   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
02310   createViewsNonConst (Kokkos::ReadWriteOption rwo)
02311   {
02312     if (! createViewsNonConstRaisedEfficiencyWarning_) {
02313       TPETRA_EFFICIENCY_WARNING(! ncview_.is_null (), std::runtime_error,
02314         "::createViewsNonConst(): The nonconst view "
02315         "has already been created and is therefore not null.  (For"
02316         "Tpetra developers: ncview_.total_count() = " << ncview_.total_count ()
02317         << ".  This means that MultiVector is either creating a view "
02318         "unnecessarily, or hanging on to a view beyond its needed scope (since "
02319         "releaseViews() should always release both the const and nonconst "
02320         "views).  This probably does not affect correctness, it but does affect "
02321         "total memory use.  "
02322         "Please report this performance bug to the Tpetra developers.");
02323       createViewsNonConstRaisedEfficiencyWarning_ = true;
02324     }
02325 
02326     Teuchos::RCP<Node> node = this->getMap ()->getNode ();
02327     if (ncview_.is_null () && getLocalLength () > 0) {
02328       Teuchos::ArrayRCP<Scalar> buff = MVT::getValuesNonConst (lclMV_);
02329       KOKKOS_NODE_TRACE("MultiVector::createViewsNonConst()")
02330       ncview_ = node->template viewBufferNonConst<Scalar> (rwo, buff.size (), buff);
02331     }
02332   }
02333 
02334   template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
02335   void
02336   MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::releaseViews () const
02337   {
02338     const int constViewCount = cview_.total_count ();
02339     const int nonconstViewCount = ncview_.total_count ();
02340 
02341     // This prevents unused variable compiler warnings, in case Tpetra
02342     // efficiency warnings aren't enabled.
02343     (void) constViewCount;
02344     (void) nonconstViewCount;
02345 
02346     const bool viewWontGetReleased = constViewCount > 1 || nonconstViewCount > 1;
02347     if (viewWontGetReleased && ! releaseViewsRaisedEfficiencyWarning_) {
02348       const bool both = constViewCount > 1 && nonconstViewCount > 1;
02349       const char* const text = both ? "Both the const view and the nonconst view have" :
02350         ((constViewCount > 1) ? "The const view has" : "The nonconst view has");
02351       // Prevent (unused variable) compiler warning, since the macro
02352       // below doesn't exist unless efficiency warnings are enabled.
02353       (void) text;
02354 
02355       TPETRA_EFFICIENCY_WARNING(viewWontGetReleased, std::runtime_error,
02356         "::releaseViews(): " << text << " a reference count greater than 1.  "
02357         "For Tpetra developers: cview_.total_count() = " << constViewCount
02358         << " and ncview_.total_count() = " << nonconstViewCount << ".  This "
02359         "means that releaseViews() won't actually free memory.  This probably "
02360         "does not affect correctness, it but does affect total memory use.  "
02361         "We will only report this warning once per (Multi)Vector instance.  "
02362         "Please report this performance bug to the Tpetra developers.");
02363       releaseViewsRaisedEfficiencyWarning_ = true;
02364     }
02365 
02366     // Release the views.
02367     cview_ = Teuchos::null;
02368     ncview_ = Teuchos::null;
02369   }
02370 } // namespace Tpetra
02371 
02372 //
02373 // Explicit instantiation macro
02374 //
02375 // Must be expanded from within the Tpetra namespace!
02376 //
02377 
02378 #define TPETRA_MULTIVECTOR_INSTANT(SCALAR,LO,GO,NODE) \
02379   \
02380   template class MultiVector< SCALAR , LO , GO , NODE >; \
02381 
02382 
02383 #endif // TPETRA_MULTIVECTOR_DEF_HPP
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines