Tpetra Matrix/Vector Services Version of the Day
RTIExample.cpp
Go to the documentation of this file.
00001 /*
00002 // @HEADER
00003 // ***********************************************************************
00004 // 
00005 //          Tpetra: Templated Linear Algebra Services Package
00006 //                 Copyright (2008) Sandia Corporation
00007 // 
00008 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00009 // the U.S. Government retains certain rights in this software.
00010 // 
00011 // Redistribution and use in source and binary forms, with or without
00012 // modification, are permitted provided that the following conditions are
00013 // met:
00014 //
00015 // 1. Redistributions of source code must retain the above copyright
00016 // notice, this list of conditions and the following disclaimer.
00017 //
00018 // 2. Redistributions in binary form must reproduce the above copyright
00019 // notice, this list of conditions and the following disclaimer in the
00020 // documentation and/or other materials provided with the distribution.
00021 //
00022 // 3. Neither the name of the Corporation nor the names of the
00023 // contributors may be used to endorse or promote products derived from
00024 // this software without specific prior written permission.
00025 //
00026 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
00027 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00028 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00029 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
00030 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00031 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00032 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00033 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00034 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00035 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00036 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00037 //
00038 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00039 // 
00040 // ************************************************************************
00041 // @HEADER
00042 */
00043 
00044 #include <Teuchos_GlobalMPISession.hpp>
00045 #include <Teuchos_oblackholestream.hpp>
00046 #include <Teuchos_CommandLineProcessor.hpp>
00047 #include <Teuchos_TimeMonitor.hpp>
00048 
00049 #include <Kokkos_DefaultNode.hpp>
00050 
00051 #include "Tpetra_Version.hpp"
00052 #include "Tpetra_DefaultPlatform.hpp"
00053 #include "Tpetra_MultiVector.hpp"
00054 #include "Tpetra_RTI.hpp"
00055 
00056 #include <iostream>
00057 #include <iomanip>
00058 #include <functional>
00059 #include <utility>
00060 
00065 namespace TpetraExamples {
00066 
00080   template <class Arg1, class Arg2, class MultPrec>
00081   class mprec_mult : public std::binary_function<Arg1,Arg2,MultPrec> {
00082   public:
00083     mprec_mult() {}
00084     inline MultPrec 
00085     operator() (const Arg1& x, const Arg2& y) const {
00086       return (MultPrec)(x) * (MultPrec)(y);
00087     }
00088   };
00089 
00105   template <class T1, class T2, class Op>
00106   class pair_op : 
00107     public std::binary_function<std::pair<T1,T2>, std::pair<T1,T2>, std::pair<T1,T2>> {
00108   private:
00109     Op op_;
00110   public:
00111     pair_op(Op op) : op_(op) {}
00112     inline std::pair<T1,T2> 
00113     operator() (const std::pair<T1,T2>& a, const std::pair<T1,T2>& b) const {
00114       return std::make_pair (op_ (a.first, b.first), op_ (a.second, b.second));
00115     }
00116   };
00117 
00120   template <class T1, class T2, class Op>
00121   pair_op<T1,T2,Op> make_pair_op (Op op) { 
00122     return pair_op<T1,T2,Op>(op); 
00123   }
00124 
00125 }
00126 
00127 namespace Tpetra {
00128   namespace RTI {
00129     //
00130     // Specialization of ZeroOp for a pair of two different types.
00131     //
00132     template <class T1, class T2>
00133     class ZeroOp<std::pair<T1,T2>> {
00134       public:
00135       static inline std::pair<T1,T2> identity() {
00136         return std::make_pair( Teuchos::ScalarTraits<T1>::zero(), 
00137                                Teuchos::ScalarTraits<T2>::zero() );
00138       }
00139     };
00140   }
00141 }
00142 
00143 int main(int argc, char *argv[])
00144 {
00145   //
00146   // Set up MPI, if applicable.
00147   //
00148   Teuchos::oblackholestream blackhole;
00149   Teuchos::GlobalMPISession mpiSession(&argc,&argv,&blackhole);
00150 
00151   using Teuchos::TimeMonitor;
00152   using Tpetra::RTI::ZeroOp;
00153   using Tpetra::RTI::reductionGlob;
00154   using TpetraExamples::mprec_mult;
00155   using std::make_pair;
00156   using std::pair;
00157 
00158   // 
00159   // Get the default communicator and node
00160   //
00161   auto &platform = Tpetra::DefaultPlatform::getDefaultPlatform();
00162   auto comm = platform.getComm();
00163   auto node = platform.getNode();
00164   const int myImageID = comm->getRank();
00165 
00166   //
00167   // Get example parameters from command-line processor
00168   //  
00169   bool verbose = (myImageID==0);
00170   int numGlobal_user = 100*comm->getSize();
00171   int numTimeTrials = 3;
00172   Teuchos::CommandLineProcessor cmdp(false,true);
00173   cmdp.setOption("verbose","quiet",&verbose,"Print messages and results.");
00174   cmdp.setOption("global-size",&numGlobal_user,"Global test size.");
00175   cmdp.setOption("num-time-trials",&numTimeTrials,"Number of trials in timing loops.");
00176   if (cmdp.parse(argc,argv) != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL) {
00177     return -1;
00178   }
00179 
00180   // 
00181   // Say hello, print some communicator info
00182   //
00183   if (verbose) {
00184     std::cout << "\n" << Tpetra::version() << std::endl;
00185     std::cout << "Comm info: " << *comm;
00186     std::cout << "Node type: " << Teuchos::typeName(*node) << std::endl;
00187     std::cout << std::endl;
00188   }
00189 
00190 
00191   // 
00192   // Create a simple map with 5 local entries per node
00193   // 
00194   Tpetra::global_size_t numGlobalRows = numGlobal_user;
00195   auto map = Tpetra::createUniformContigMapWithNode<int,int>(numGlobalRows, comm, node);
00196   const size_t numLocalRows = map->getNodeNumElements();
00197   auto x = Tpetra::createVector<float>(map),
00198        y = Tpetra::createVector<float>(map);
00199   auto z = Tpetra::createVector<double>(map),
00200        w = Tpetra::createVector<double>(map),
00201        v = Tpetra::createVector<double>(map);
00202 
00203   //
00204   // Initialization and simple reduction
00205   //
00206 
00207   // sets x[i] = 1.0f
00208   Tpetra::RTI::unary_transform( *x, [](float xi){return 1.0f;} );
00209   // sets y[i] = x[i]
00210   Tpetra::RTI::binary_transform( *y, *x, [](float /*yi*/, float xi) {return xi;} );
00211   // sets y[i] = plus(y[i], x[i]) = y[i] + x[i]
00212   Tpetra::RTI::binary_transform( *y, *x, std::plus<float>() );
00213   // compute dot(x,y) in single precision, sum all x[i]*y[i]
00214   float fresult = Tpetra::RTI::reduce( *x, *y, 
00215                                        reductionGlob<
00216                                          ZeroOp<float>>( 
00217                                          std::multiplies<float>(), 
00218                                          std::plus<float>()
00219                                        ) );
00220   if (verbose) {
00221     std::cout << std::left << "dot( ones, twos ) result == " << fresult 
00222         << ", expected value is " << numGlobalRows*2.0f << std::endl;
00223   }
00224 
00225   //
00226   // Single precision testing
00227   //
00228 
00229   // set x = [1, 1e-4, ..., 1e-4]
00230   {
00231     Teuchos::ArrayRCP<float> xdata = x->get1dViewNonConst();
00232     for (size_t i=1; i < numLocalRows; ++i) {
00233       xdata[i] = 1.0f / 4096.0f;
00234     }
00235   }
00236   // set y[i] = x while computing square of norm2(x) (using y): multiply x[i] * y[i] in float, sum them in float, 0.0f is the additive identity
00237   fresult = Tpetra::RTI::binary_pre_transform_reduce(*y, *x, reductionGlob<ZeroOp<float>>( [](float /*yi*/, float xi){return xi;}, std::multiplies<float>(), std::plus<float>()) );
00238 
00239   // compute pure float inner product alone, timing it for comparison
00240   auto timePF = TimeMonitor::getNewTimer("Pure float dot()");
00241   {
00242     TimeMonitor lcltimer(*timePF);    
00243     for (int l=0; l != numTimeTrials; ++l) 
00244        fresult = Tpetra::RTI::reduce(*x, *y, reductionGlob<ZeroOp<float>>(std::multiplies<float>(), std::plus<float>()) );
00245   }
00246   if (verbose) {
00247     std::cout << std::left << std::endl << std::setw(25) << "pure float result" << " == " << std::setprecision(12) << std::scientific << fresult << std::endl;
00248   }
00249 
00250   //
00251   // Mixed precision testing
00252   // 
00253 
00254   // compute dot(x,y) with double accumulator: multiply x[i] * y[i] in float, sum them in double, 0.0 is the additive identity
00255   auto timeAD = TimeMonitor::getNewTimer("Double acc. dot()");
00256   {
00257     TimeMonitor lcltimer(*timeAD);
00258     for (int l=0; l != numTimeTrials; ++l) 
00259        fresult = Tpetra::RTI::reduce(*x, *y, reductionGlob<ZeroOp<double>>(std::multiplies<float>(), std::plus<double>()) );
00260   }
00261   if (verbose) {
00262     std::cout << std::left << std::setw(25) << "double acc. result" << " == " << std::setprecision(12) << std::scientific << fresult << std::endl;
00263   }
00264 
00265   // compute dot(x,y) in full double precision: multiply x[i] * y[i] in double, sum them in double, 0.0 is the additive identity
00266   double dresult = 0.0;
00267   auto timeMAD = TimeMonitor::getNewTimer("Double mult./acc. dot()");
00268   {
00269     TimeMonitor lcltimer(*timeMAD);
00270     for (int l=0; l != numTimeTrials; ++l) 
00271        dresult = Tpetra::RTI::reduce(*x, *y, reductionGlob< ZeroOp<double>>(
00272                                                             mprec_mult<float,float,double>(), 
00273                                                             std::plus<double>()) );
00274   }
00275   if (verbose) {
00276     std::cout << std::left << std::setw(25) << "double mult/add result" << " == " << std::setprecision(12) << std::scientific << dresult << std::endl;
00277   }
00278 
00279   // compute dot(x,y) in full double precision: multiply x[i] * y[i] in double, sum them in double, 0.0 is the additive identity
00280   auto timePD = TimeMonitor::getNewTimer("Pure double dot()");
00281   // set [w,z] = x
00282   Tpetra::RTI::binary_transform( *w, *x, [](double /*wi*/, float xi) -> double {return xi;} );
00283   Tpetra::RTI::binary_transform( *z, *x, [](double /*zi*/, float xi) -> double {return xi;} );
00284   {
00285     TimeMonitor lcltimer(*timePD);
00286     for (int l=0; l != numTimeTrials; ++l) 
00287        dresult = Tpetra::RTI::reduce(*w, *z, reductionGlob< ZeroOp<double>>(
00288                                                             std::multiplies<double>(), 
00289                                                             std::plus<double>()) );
00290   }
00291   if (verbose) {
00292     std::cout << std::left << std::setw(25) << "pure double result" << " == " << std::setprecision(12) << std::scientific << dresult << std::endl
00293               << std::endl;
00294   }
00295 
00296   //
00297   // Compute z = alpha*x, where z is double precision and x is single precision
00298   //
00299   Tpetra::RTI::binary_transform( *z, *x, [](double /*zi*/, float xi) -> double {return 2.0*xi;} );
00300 
00301   // 
00302   // Two simultaneous dot products: z'*w and z'*v at the same time, saves a pass through z
00303   // 
00304   Tpetra::RTI::unary_transform( *z, [](double xi){return 1.0f;} );
00305   Tpetra::RTI::unary_transform( *w, [](double xi){return 2.0f;} );
00306   Tpetra::RTI::unary_transform( *v, [](double xi){return 3.0f;} );
00307   auto timeTwoDotInd = TimeMonitor::getNewTimer("dot(z,w) and dot(z,v) independently");
00308   {
00309     TimeMonitor lcltimer(*timeTwoDotInd);
00310     for (int l=0; l != numTimeTrials; ++l) 
00311     {
00312        dresult = Tpetra::RTI::reduce(*z, *w, reductionGlob< ZeroOp<double>>(
00313                                                             std::multiplies<double>(), 
00314                                                             std::plus<double>()) );
00315        dresult = Tpetra::RTI::reduce(*z, *v, reductionGlob< ZeroOp<double>>(
00316                                                             std::multiplies<double>(), 
00317                                                             std::plus<double>()) );
00318     }
00319   }
00320   auto timeTwoDotSim = TimeMonitor::getNewTimer("dot(z,w) and dot(z,v) simultaneously");
00321   {
00322     TimeMonitor lcltimer(*timeTwoDotSim);
00323     pair<double,double> tdres;
00324     for (int l=0; l != numTimeTrials; ++l) 
00325     {
00326       tdres = Tpetra::RTI::reduce(*z, *w, *v, 
00327                                   reductionGlob<ZeroOp<pair<double,double>>>(
00328                                                 [](double zi, double wi, double vi) { return make_pair(zi*wi, zi*vi); },
00329                                                 TpetraExamples::make_pair_op<double,double>(std::plus<double>()))
00330                                  );
00331     }
00332   }
00333 
00334   //
00335   // Print timings
00336   //
00337   if (verbose) {
00338     TimeMonitor::summarize( std::cout );
00339   }
00340 
00341   if (verbose) {
00342     std::cout << "\nEnd Result: TEST PASSED" << std::endl;
00343   }
00344   return 0;
00345 }
00346 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines