Tpetra Matrix/Vector Services Version of the Day
RTIExample.cpp
Go to the documentation of this file.
00001 #include <Teuchos_GlobalMPISession.hpp>
00002 #include <Teuchos_oblackholestream.hpp>
00003 #include <Teuchos_CommandLineProcessor.hpp>
00004 #include <Teuchos_TimeMonitor.hpp>
00005 
00006 #include <Kokkos_DefaultNode.hpp>
00007 
00008 #include "Tpetra_Version.hpp"
00009 #include "Tpetra_DefaultPlatform.hpp"
00010 #include "Tpetra_MultiVector.hpp"
00011 #include "Tpetra_RTI.hpp"
00012 
00013 #include <iostream>
00014 #include <iomanip>
00015 #include <functional>
00016 #include <utility>
00017 
00022 namespace TpetraExamples {
00023 
00037   template <class Arg1, class Arg2, class MultPrec>
00038   class mprec_mult : public std::binary_function<Arg1,Arg2,MultPrec> {
00039   public:
00040     mprec_mult() {}
00041     inline MultPrec 
00042     operator() (const Arg1& x, const Arg2& y) const {
00043       return (MultPrec)(x) * (MultPrec)(y);
00044     }
00045   };
00046 
00062   template <class T1, class T2, class Op>
00063   class pair_op : 
00064     public std::binary_function<std::pair<T1,T2>, std::pair<T1,T2>, std::pair<T1,T2>> {
00065   private:
00066     Op op_;
00067   public:
00068     pair_op(Op op) : op_(op) {}
00069     inline std::pair<T1,T2> 
00070     operator() (const std::pair<T1,T2>& a, const std::pair<T1,T2>& b) const {
00071       return std::make_pair (op_ (a.first, b.first), op_ (a.second, b.second));
00072     }
00073   };
00074 
00077   template <class T1, class T2, class Op>
00078   pair_op<T1,T2,Op> make_pair_op (Op op) { 
00079     return pair_op<T1,T2,Op>(op); 
00080   }
00081 
00082 }
00083 
00084 namespace Tpetra {
00085   namespace RTI {
00086     //
00087     // Specialization of ZeroOp for a pair of two different types.
00088     //
00089     template <class T1, class T2>
00090     class ZeroOp<std::pair<T1,T2>> {
00091       public:
00092       static inline std::pair<T1,T2> identity() {
00093         return std::make_pair( Teuchos::ScalarTraits<T1>::zero(), 
00094                                Teuchos::ScalarTraits<T2>::zero() );
00095       }
00096     };
00097   }
00098 }
00099 
00100 int main(int argc, char *argv[])
00101 {
00102   //
00103   // Set up MPI, if applicable.
00104   //
00105   Teuchos::oblackholestream blackhole;
00106   Teuchos::GlobalMPISession mpiSession(&argc,&argv,&blackhole);
00107 
00108   using Teuchos::TimeMonitor;
00109   using Tpetra::RTI::ZeroOp;
00110   using Tpetra::RTI::reductionGlob;
00111   using TpetraExamples::mprec_mult;
00112   using std::make_pair;
00113   using std::pair;
00114 
00115   // 
00116   // Get the default communicator and node
00117   //
00118   auto &platform = Tpetra::DefaultPlatform::getDefaultPlatform();
00119   auto comm = platform.getComm();
00120   auto node = platform.getNode();
00121   const int myImageID = comm->getRank();
00122 
00123   //
00124   // Get example parameters from command-line processor
00125   //  
00126   bool verbose = (myImageID==0);
00127   int numGlobal_user = 100*comm->getSize();
00128   int numTimeTrials = 3;
00129   Teuchos::CommandLineProcessor cmdp(false,true);
00130   cmdp.setOption("verbose","quiet",&verbose,"Print messages and results.");
00131   cmdp.setOption("global-size",&numGlobal_user,"Global test size.");
00132   cmdp.setOption("num-time-trials",&numTimeTrials,"Number of trials in timing loops.");
00133   if (cmdp.parse(argc,argv) != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL) {
00134     return -1;
00135   }
00136 
00137   // 
00138   // Say hello, print some communicator info
00139   //
00140   if (verbose) {
00141     std::cout << "\n" << Tpetra::version() << std::endl;
00142     std::cout << "Comm info: " << *comm;
00143     std::cout << "Node type: " << Teuchos::typeName(*node) << std::endl;
00144     std::cout << std::endl;
00145   }
00146 
00147 
00148   // 
00149   // Create a simple map with 5 local entries per node
00150   // 
00151   Tpetra::global_size_t numGlobalRows = numGlobal_user;
00152   auto map = Tpetra::createUniformContigMapWithNode<int,int>(numGlobalRows, comm, node);
00153   const size_t numLocalRows = map->getNodeNumElements();
00154   auto x = Tpetra::createVector<float>(map),
00155        y = Tpetra::createVector<float>(map);
00156   auto z = Tpetra::createVector<double>(map),
00157        w = Tpetra::createVector<double>(map),
00158        v = Tpetra::createVector<double>(map);
00159 
00160   //
00161   // Initialization and simple reduction
00162   //
00163 
00164   // sets x[i] = 1.0f
00165   Tpetra::RTI::unary_transform( *x, [](float xi){return 1.0f;} );
00166   // sets y[i] = x[i]
00167   Tpetra::RTI::binary_transform( *y, *x, [](float /*yi*/, float xi) {return xi;} );
00168   // sets y[i] = plus(y[i], x[i]) = y[i] + x[i]
00169   Tpetra::RTI::binary_transform( *y, *x, std::plus<float>() );
00170   // compute dot(x,y) in single precision, sum all x[i]*y[i]
00171   float fresult = Tpetra::RTI::reduce( *x, *y, 
00172                                        reductionGlob<
00173                                          ZeroOp<float>>( 
00174                                          std::multiplies<float>(), 
00175                                          std::plus<float>()
00176                                        ) );
00177   if (verbose) {
00178     std::cout << std::left << "dot( ones, twos ) result == " << fresult 
00179         << ", expected value is " << numGlobalRows*2.0f << std::endl;
00180   }
00181 
00182   //
00183   // Single precision testing
00184   //
00185 
00186   // set x = [1, 1e-4, ..., 1e-4]
00187   {
00188     Teuchos::ArrayRCP<float> xdata = x->get1dViewNonConst();
00189     for (size_t i=1; i < numLocalRows; ++i) {
00190       xdata[i] = 1.0f / 4096.0f;
00191     }
00192   }
00193   // set y[i] = x while computing square of norm2(x) (using y): multiply x[i] * y[i] in float, sum them in float, 0.0f is the additive identity
00194   fresult = Tpetra::RTI::binary_pre_transform_reduce(*y, *x, reductionGlob<ZeroOp<float>>( [](float /*yi*/, float xi){return xi;}, std::multiplies<float>(), std::plus<float>()) );
00195 
00196   // compute pure float inner product alone, timing it for comparison
00197   auto timePF = TimeMonitor::getNewTimer("Pure float dot()");
00198   {
00199     TimeMonitor lcltimer(*timePF);    
00200     for (int l=0; l != numTimeTrials; ++l) 
00201        fresult = Tpetra::RTI::reduce(*x, *y, reductionGlob<ZeroOp<float>>(std::multiplies<float>(), std::plus<float>()) );
00202   }
00203   if (verbose) {
00204     std::cout << std::left << std::endl << std::setw(25) << "pure float result" << " == " << std::setprecision(12) << std::scientific << fresult << std::endl;
00205   }
00206 
00207   //
00208   // Mixed precision testing
00209   // 
00210 
00211   // compute dot(x,y) with double accumulator: multiply x[i] * y[i] in float, sum them in double, 0.0 is the additive identity
00212   auto timeAD = TimeMonitor::getNewTimer("Double acc. dot()");
00213   {
00214     TimeMonitor lcltimer(*timeAD);
00215     for (int l=0; l != numTimeTrials; ++l) 
00216        fresult = Tpetra::RTI::reduce(*x, *y, reductionGlob<ZeroOp<double>>(std::multiplies<float>(), std::plus<double>()) );
00217   }
00218   if (verbose) {
00219     std::cout << std::left << std::setw(25) << "double acc. result" << " == " << std::setprecision(12) << std::scientific << fresult << std::endl;
00220   }
00221 
00222   // compute dot(x,y) in full double precision: multiply x[i] * y[i] in double, sum them in double, 0.0 is the additive identity
00223   double dresult = 0.0;
00224   auto timeMAD = TimeMonitor::getNewTimer("Double mult./acc. dot()");
00225   {
00226     TimeMonitor lcltimer(*timeMAD);
00227     for (int l=0; l != numTimeTrials; ++l) 
00228        dresult = Tpetra::RTI::reduce(*x, *y, reductionGlob< ZeroOp<double>>(
00229                                                             mprec_mult<float,float,double>(), 
00230                                                             std::plus<double>()) );
00231   }
00232   if (verbose) {
00233     std::cout << std::left << std::setw(25) << "double mult/add result" << " == " << std::setprecision(12) << std::scientific << dresult << std::endl;
00234   }
00235 
00236   // compute dot(x,y) in full double precision: multiply x[i] * y[i] in double, sum them in double, 0.0 is the additive identity
00237   auto timePD = TimeMonitor::getNewTimer("Pure double dot()");
00238   // set [w,z] = x
00239   Tpetra::RTI::binary_transform( *w, *x, [](double /*wi*/, float xi) -> double {return xi;} );
00240   Tpetra::RTI::binary_transform( *z, *x, [](double /*zi*/, float xi) -> double {return xi;} );
00241   {
00242     TimeMonitor lcltimer(*timePD);
00243     for (int l=0; l != numTimeTrials; ++l) 
00244        dresult = Tpetra::RTI::reduce(*w, *z, reductionGlob< ZeroOp<double>>(
00245                                                             std::multiplies<double>(), 
00246                                                             std::plus<double>()) );
00247   }
00248   if (verbose) {
00249     std::cout << std::left << std::setw(25) << "pure double result" << " == " << std::setprecision(12) << std::scientific << dresult << std::endl
00250               << std::endl;
00251   }
00252 
00253   //
00254   // Compute z = alpha*x, where z is double precision and x is single precision
00255   //
00256   Tpetra::RTI::binary_transform( *z, *x, [](double /*zi*/, float xi) -> double {return 2.0*xi;} );
00257 
00258   // 
00259   // Two simultaneous dot products: z'*w and z'*v at the same time, saves a pass through z
00260   // 
00261   Tpetra::RTI::unary_transform( *z, [](double xi){return 1.0f;} );
00262   Tpetra::RTI::unary_transform( *w, [](double xi){return 2.0f;} );
00263   Tpetra::RTI::unary_transform( *v, [](double xi){return 3.0f;} );
00264   auto timeTwoDotInd = TimeMonitor::getNewTimer("dot(z,w) and dot(z,v) independently");
00265   {
00266     TimeMonitor lcltimer(*timeTwoDotInd);
00267     for (int l=0; l != numTimeTrials; ++l) 
00268     {
00269        dresult = Tpetra::RTI::reduce(*z, *w, reductionGlob< ZeroOp<double>>(
00270                                                             std::multiplies<double>(), 
00271                                                             std::plus<double>()) );
00272        dresult = Tpetra::RTI::reduce(*z, *v, reductionGlob< ZeroOp<double>>(
00273                                                             std::multiplies<double>(), 
00274                                                             std::plus<double>()) );
00275     }
00276   }
00277   auto timeTwoDotSim = TimeMonitor::getNewTimer("dot(z,w) and dot(z,v) simultaneously");
00278   {
00279     TimeMonitor lcltimer(*timeTwoDotSim);
00280     pair<double,double> tdres;
00281     for (int l=0; l != numTimeTrials; ++l) 
00282     {
00283       tdres = Tpetra::RTI::reduce(*z, *w, *v, 
00284                                   reductionGlob<ZeroOp<pair<double,double>>>(
00285                                                 [](double zi, double wi, double vi) { return make_pair(zi*wi, zi*vi); },
00286                                                 TpetraExamples::make_pair_op<double,double>(std::plus<double>()))
00287                                  );
00288     }
00289   }
00290 
00291   //
00292   // Print timings
00293   //
00294   if (verbose) {
00295     TimeMonitor::summarize( std::cout );
00296   }
00297 
00298   if (verbose) {
00299     std::cout << "\nEnd Result: TEST PASSED" << std::endl;
00300   }
00301   return 0;
00302 }
00303 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines