Tpetra Matrix/Vector Services Version of the Day
RTIOperatorExample.cpp
Go to the documentation of this file.
00001 #include <Teuchos_GlobalMPISession.hpp>
00002 #include <Teuchos_oblackholestream.hpp>
00003 #include <Teuchos_CommandLineProcessor.hpp>
00004 #include <Teuchos_Tuple.hpp>
00005 #include <Teuchos_TimeMonitor.hpp>
00006 
00007 #include <Kokkos_DefaultNode.hpp>
00008 
00009 #include "Tpetra_Version.hpp"
00010 #include "Tpetra_DefaultPlatform.hpp"
00011 #include "Tpetra_MultiVector.hpp"
00012 #include "Tpetra_CrsMatrix.hpp"
00013 #include "Tpetra_RTIOp.hpp"
00014 
00015 #include <iostream>
00016 #include <iomanip>
00017 #include <functional>
00018 
00024 namespace TpetraExamples {
00025 
00027   template <class S>
00028   class FDStencil : public Tpetra::RTI::detail::StdOpKernel<S> 
00029   {
00030     protected:
00031       int _myImageID, _numImages;
00032       int _n;
00033       S _sub, _diag, _super;
00034     public:
00035       FDStencil(int myImageID, int numImages, int n, const S &sub, const S &diag, const S &super) : _myImageID(myImageID), _numImages(numImages), _n(n), _sub(sub), _diag(diag), _super(super) {}
00036       inline void execute(int i) const
00037       { 
00038         S res = _diag * this->_vec_in2[i];
00039         if (i >  0 || _myImageID != 0           ) res +=   _sub * this->_vec_in2[i-1];
00040         if (i < _n || _myImageID != _numImages-1) res += _super * this->_vec_in2[i+1];
00041         if (this->_alpha == Teuchos::ScalarTraits<S>::one() && this->_beta == Teuchos::ScalarTraits<S>::zero()) {
00042           this->_vec_inout[i] = res;
00043         }
00044         else {
00045           this->_vec_inout[i] = this->_alpha * res + this->_beta * this->_vec_inout[i];
00046         }
00047       }
00048   };
00049 
00051   template <class S>
00052   class ScaleKernel : public Tpetra::RTI::detail::StdOpKernel<S>
00053   {
00054     protected:
00055       S _gamma;
00056     public:
00057       ScaleKernel(const S & gamma) : _gamma(gamma) {}
00058       inline void execute(int i) { 
00059         if (this->_alpha == Teuchos::ScalarTraits<S>::one() && this->_beta == Teuchos::ScalarTraits<S>::zero()) {
00060           this->_vec_inout[i] = _gamma * this->_vec_in2[i];
00061         }
00062         else {
00063           this->_vec_inout[i] = this->_alpha * _gamma * this->_vec_in2[i] + this->_beta * this->_vec_inout[i];
00064         }
00065       }
00066   };
00067 
00068 }
00069 
00070 int main(int argc, char *argv[])
00071 {
00072   Teuchos::oblackholestream blackhole;
00073   Teuchos::GlobalMPISession mpiSession(&argc,&argv,&blackhole);
00074 
00075   using Teuchos::TimeMonitor;
00076   using TpetraExamples::FDStencil;
00077   using TpetraExamples::ScaleKernel;
00078 
00079   // 
00080   // Get the default communicator and node
00081   //
00082   auto &platform = Tpetra::DefaultPlatform::getDefaultPlatform();
00083   auto comm = platform.getComm();
00084   auto node = platform.getNode();
00085   const int myImageID = comm->getRank();
00086   const int numImages = comm->getSize();
00087 
00088   //
00089   // Get example parameters from command-line processor
00090   //  
00091   bool verbose = (myImageID==0);
00092   int numGlobal_user = 100*comm->getSize();
00093   int numTimeTrials = 3;
00094   Teuchos::CommandLineProcessor cmdp(false,true);
00095   cmdp.setOption("verbose","quiet",&verbose,"Print messages and results.");
00096   cmdp.setOption("global-size",&numGlobal_user,"Global test size.");
00097   cmdp.setOption("num-time-trials",&numTimeTrials,"Number of trials in timing loops.");
00098   if (cmdp.parse(argc,argv) != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL) {
00099     return -1;
00100   }
00101 
00102   // 
00103   // Say hello, print some communicator info
00104   //
00105   if (verbose) {
00106     std::cout << "\n" << Tpetra::version() << std::endl;
00107     std::cout << "Comm info: " << *comm;
00108     std::cout << "Node type: " << Teuchos::typeName(*node) << std::endl;
00109     std::cout << std::endl;
00110   }
00111 
00112   // 
00113   // Create a simple map for domain and range
00114   // 
00115   Tpetra::global_size_t numGlobalRows = numGlobal_user;
00116   auto map = Tpetra::createUniformContigMapWithNode<int,int>(numGlobalRows, comm, node);
00117   // const size_t numLocalRows = map->getNodeNumElements();
00118   auto x = Tpetra::createVector<double>(map),
00119        y = Tpetra::createVector<double>(map);
00120 
00121   // Create a simple diagonal operator using lambda function
00122   auto fTwoOp = Tpetra::RTI::binaryOp<double>( [](double /*y*/, double x) { return 2.0 * x; } , map );
00123   // y = 3*fTwoOp*x + 2*y = 3*2*1 + 2*1 = 8
00124   x->putScalar(1.0);
00125   y->putScalar(1.0);
00126   fTwoOp->apply( *x, *y, Teuchos::NO_TRANS, 3.0, 2.0 );
00127   // check that y == eights
00128   double norm = y->norm1();
00129   if (verbose) {
00130     std::cout << "Tpetra::RTI::binaryOp" << std::endl
00131               << "norm(y) result == " << std::setprecision(2) << std::scientific << norm 
00132               << ", expected value is " << numGlobalRows * 8.0 << std::endl;
00133   }
00134 
00135   // Create the same diagonal operator using a Kokkos kernel
00136   auto kTwoOp = Tpetra::RTI::kernelOp<double>( ScaleKernel<double>(2.0), map );
00137   // y = 0.5*kTwop*x + 0.75*y = 0.5*2*1 + 0.75*8 = 7
00138   kTwoOp->apply( *x, *y, Teuchos::NO_TRANS, 0.5, 0.75 );
00139   // check that y == sevens
00140   norm = y->norm1();
00141   if (verbose) {
00142     std::cout << "Tpetra::RTI::kernelOp" << std::endl
00143               << "norm(y) result == " << std::setprecision(2) << std::scientific << norm 
00144               << ", expected value is " << numGlobalRows * 7.0 << std::endl;
00145   }
00146 
00147   //
00148   // Create a finite-difference stencil using a Kokkos kernel and non-trivial maps
00149   //
00150   decltype(map) colmap;
00151   if (numImages > 1) {
00152     Teuchos::Array<int>           colElements;
00153     Teuchos::ArrayView<const int> rowElements = map->getNodeElementList();
00154     // This isn't the most efficient Map/Import layout, but it makes for a very straight-forward kernel
00155     if (myImageID != 0) colElements.push_back( map->getMinGlobalIndex() - 1 );
00156     colElements.insert(colElements.end(), rowElements.begin(), rowElements.end());
00157     if (myImageID != numImages-1) colElements.push_back( map->getMaxGlobalIndex() + 1 );
00158     colmap = Tpetra::createNonContigMapWithNode<int,int>(colElements(), comm, node);
00159   }
00160   else {
00161     colmap = map;
00162   }
00163   auto importer = createImport(map,colmap);
00164   // Finite-difference kernel = tridiag(-1, 2, -1)
00165   FDStencil<double> kern(myImageID, numImages, map->getNodeNumElements(), -1.0, 2.0, -1.0);
00166   auto FDStencilOp = Tpetra::RTI::kernelOp<double>( kern, map, map, importer );
00167   // x = ones(), FD(x) = [1 zeros() 1]
00168   auto timeFDStencil = TimeMonitor::getNewTimer("FD RTI Stencil");
00169   {
00170     TimeMonitor lcltimer(*timeFDStencil);
00171     for (int l=0; l != numTimeTrials; ++l) {
00172       FDStencilOp->apply( *x, *y );
00173     }
00174   }
00175   norm = y->norm1();
00176   if (verbose) {
00177     std::cout << std::endl
00178               << "TpetraExamples::FDStencil" << std::endl
00179               << "norm(y) result == " << std::setprecision(2) << std::scientific << norm 
00180               << ", expected value is " << 2.0 << std::endl;
00181   }
00182 
00183   //
00184   // Create a finite-difference stencil using a CrsMatrix
00185   // 
00186   auto FDMatrix = Tpetra::createCrsMatrix<double>(map);  
00187   for (int r=map->getMinGlobalIndex(); r <= map->getMaxGlobalIndex(); ++r) {
00188     if (r == map->getMinAllGlobalIndex()) {
00189       FDMatrix->insertGlobalValues(r, Teuchos::tuple<int>(r,r+1), Teuchos::tuple<double>(2.0,-1.0));
00190     }
00191     else if (r == map->getMaxAllGlobalIndex()) {
00192       FDMatrix->insertGlobalValues(r, Teuchos::tuple<int>(r-1,r), Teuchos::tuple<double>(-1.0,2.0));
00193     }
00194     else {
00195       FDMatrix->insertGlobalValues(r, Teuchos::tuple<int>(r-1,r,r+1), Teuchos::tuple<double>(-1.0,2.0,-1.0));
00196     }
00197   }
00198   FDMatrix->fillComplete();
00199   auto timeFDMatrix = TimeMonitor::getNewTimer("FD CrsMatrix");
00200   {
00201     TimeMonitor lcltimer(*timeFDMatrix);
00202     for (int l=0; l != numTimeTrials; ++l) {
00203       FDMatrix->apply(*x, *y);
00204     }
00205   }
00206 
00207   //
00208   // Print timings
00209   //
00210   if (verbose) {
00211     std::cout << std::endl;
00212     TimeMonitor::summarize( std::cout );
00213   }
00214 
00215   if (verbose) {
00216     std::cout << "\nEnd Result: TEST PASSED" << std::endl;
00217   }
00218   return 0;
00219 }
00220 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines