test/Polling/cxx_main.cpp

Go to the documentation of this file.
00001 // @HEADER
00002 // ***********************************************************************
00003 // 
00004 //                    Teuchos: Common Tools Package
00005 //                 Copyright (2004) Sandia Corporation
00006 // 
00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
00008 // license for use of this work by or on behalf of the U.S. Government.
00009 // 
00010 // This library is free software; you can redistribute it and/or modify
00011 // it under the terms of the GNU Lesser General Public License as
00012 // published by the Free Software Foundation; either version 2.1 of the
00013 // License, or (at your option) any later version.
00014 //  
00015 // This library is distributed in the hope that it will be useful, but
00016 // WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //  
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00023 // USA
00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 
00025 // 
00026 // ***********************************************************************
00027 // @HEADER
00028 
00029 #include "Teuchos_GlobalMPISession.hpp"
00030 #include "Teuchos_MPIComm.hpp"
00031 #include "Teuchos_ErrorPolling.hpp"
00032 #include "Teuchos_Version.hpp"
00033 
00034 using namespace Teuchos;
00035 using std::string;
00036 
00037 /* \example Test of polling for exceptions on other processors */
00038 
00039 int main( int argc, char* argv[] )
00040 {
00041   /* return value */
00042   int state=0;
00043 
00044   Teuchos::GlobalMPISession mpiSession(&argc, &argv);
00045 
00046   std::cout << Teuchos::Teuchos_Version() << std::endl << std::endl;
00047 
00048   try
00049     {
00050 
00051       MPIComm comm = MPIComm::world();
00052 
00053      
00054       /*----- Demonstrate detection of an off-processor error  -------- */
00055       
00056       try
00057         {
00058           /* Try some code that will fail on one of the processors */
00059           try
00060             {
00061               /* Generate an std::exception on proc 1 */
00062               TEST_FOR_EXCEPTION(comm.getRank()==1, std::runtime_error,
00063                                  "std::exception [expected] detected on proc="
00064                                  << comm.getRank());
00065               /* On all other procs, do some calculation */
00066               double x=0;
00067               for (int i=0; i<100; i++) x += i;
00068 
00069             }
00070           catch(std::exception& ex1)
00071             {
00072               /* If we catch an std::exception, report the failure to the other 
00073                * processors. This call to reportFailure() must be
00074                * paired with a call to pollForFailures() in the 
00075                * branch that did not detect an std::exception.
00076                */
00077               ErrorPolling::reportFailure(comm);
00078               TEUCHOS_TRACE(ex1);
00079             }
00080 
00081           /* 
00082            * Here we poll for the state of other processors. If all processors
00083            * report OK, pollForFailures() will return zero and an
00084            * std::exception will not be thrown. If another
00085            * processor has called reportFailure(), then pollForFailures()
00086            * will return a nonzero number and an std::exception will be thrown.
00087            */
00088           TEST_FOR_EXCEPTION(ErrorPolling::pollForFailures(comm),
00089                              std::runtime_error, 
00090                              "off-processor error [expected] detected "
00091                              "on proc=" << comm.getRank());
00092 
00093 
00094 
00095           /* Do a collective operation. In the present example,
00096            * this code should never be reached
00097            * because all processors should have detected either a local
00098            * std::exception or a remote std::exception. */
00099           std::cerr << "this is bad! Processor=" << comm.getRank() 
00100                << "should not have reached this point" << std::endl;
00101 
00102           /* report the bad news to the testharness 
00103            * using the return value... */
00104           state = 1;
00105 
00106           /* Throw an std::exception. This is not a drill!!! */
00107           TEST_FOR_EXCEPTION(state, std::runtime_error,
00108                              "std::exception [UNEXPECTED!!!] detected in test "
00109                              "of polling on processor=" << comm.getRank());
00110 
00111           /* This collective operation would fail if executed here, because
00112            * one of the processors has thrown an std::exception and never
00113            * reached this point. Good thing we've polled for errors! */
00114           int x=comm.getRank();
00115           int sum;
00116           comm.allReduce( (void*) &x, (void*) &sum, 1, MPIComm::INT,
00117                           MPIComm::SUM);
00118           std::cerr << "sum=" << sum << std::endl;
00119         }
00120       catch(std::exception& ex)
00121         {
00122           std::cerr << ex.what() << std::endl;
00123         }
00124 
00125       std::cerr << "p=" << MPIComm::world().getRank() 
00126            << ": std::exception polling successful" << std::endl;
00127 
00128 
00129       /*-- Demonstrate safe pass-through when no off-proc error happens --- */
00130 
00131       try
00132         {
00133           /* Try some code that will not fail on any processors */
00134           try
00135             {
00136               /* On all procs, do some foolproof calculation */
00137               double x=0;
00138               for (int i=0; i<100; i++) x += i;
00139 
00140             }
00141           catch(std::exception& ex1)
00142             {
00143               /* If we catch an std::exception, report the failure to the other 
00144                * processors. This call to reportFailure() must be
00145                * paired with a call to pollForFailures() in the 
00146                * branch that did not detect an std::exception.
00147                */
00148               ErrorPolling::reportFailure(comm);
00149               TEUCHOS_TRACE(ex1);
00150             }
00151 
00152           /* 
00153            * Here we poll for the state of other processors. If all processors
00154            * report OK, pollForFailures() will return zero and an
00155            * std::exception will not be thrown. If another
00156            * processor has called reportFailure(), then pollForFailures()
00157            * will return a nonzero number and an std::exception will be thrown.
00158            */
00159           TEST_FOR_EXCEPTION(ErrorPolling::pollForFailures(comm),
00160                              std::runtime_error, 
00161                              "off-processor error [UNEXPECTED!!!] detected "
00162                              "on proc=" << comm.getRank());
00163 
00164 
00165 
00166           /* 
00167            * Do a collective operation. In the present example,
00168            * this code will be reached on all processors because
00169            * no std::exception has been thrown by any processor.
00170            */
00171           std::cerr << "Processor=" << comm.getRank() 
00172                << "ready to do collective operation" << std::endl;
00173 
00174           /* 
00175            * This collective operation is safe because we have polled
00176            * all processors and known that everyone is still up and running.
00177            */
00178           int x=comm.getRank();
00179           int sum;
00180           comm.allReduce( (void*) &x, (void*) &sum, 1, MPIComm::INT,
00181                           MPIComm::SUM);
00182           if (comm.getRank()==0) std::cerr << "sum=" << sum << std::endl;
00183         }
00184       catch(std::exception& ex)
00185         {
00186           std::cerr << "std::exception [UNEXPECTED!!!] detected" << std::endl;
00187           std::cerr << ex.what() << std::endl;
00188           state = 1;
00189         }
00190     }
00191   catch(std::exception& e)
00192     {
00193       std::cerr << e.what() << std::endl;
00194       state = 1;
00195     }
00196 
00197   return state;
00198 
00199 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

Generated on Tue Oct 20 10:13:59 2009 for Teuchos Package Browser (Single Doxygen Collection) by  doxygen 1.6.1