Sierra Toolkit Version of the Day
Exception.cpp
00001 
00010 #include <stdexcept>
00011 #include <exception>
00012 #include <new>
00013 #include <typeinfo>
00014 #include <ios>
00015 #include <string>
00016 #include <sstream>
00017 #include <iostream>
00018 
00019 #include <assert.h>
00020 
00021 #include <stk_util/diag/Env.hpp>
00022 #include <stk_util/diag/Platform.hpp>
00023 #include <stk_util/parallel/Exception.hpp>
00024 #include <stk_util/parallel/ExceptionReport.hpp>
00025 #include <stk_util/parallel/ExceptionIos.hpp>
00026 #include <stk_util/diag/String.hpp>
00027 #include <stk_util/diag/Trace.hpp>
00028 
00029 #include <stk_util/parallel/mpih.hpp>
00030 
00031 namespace sierra {
00032 
00033 void
00034 sierra_exception_throw()
00035 {}
00036 
00037 
00038 ParallelThrowRegistry &
00039 ParallelThrowRegistry::instance()
00040 {
00041   static ParallelThrowRegistry s_parallelThrowRegistry;
00042 
00043   return s_parallelThrowRegistry;
00044 }
00045 
00046 
00047 ParallelThrowRegistry::Registry::Registry()
00048 {}
00049 
00050 
00051 ParallelThrowRegistry::Registry::~Registry()
00052 {
00053   // Truely sick.  Each is registered twice, once for the parallel version of the
00054   // exception and once for the <stdexcept> base class version.  The double increment
00055   // keeps from deleting it twice.  See ParallelThrowRegistry::registerException.
00056   for (iterator it = begin(); it != end(); ++it, ++it)
00057     delete (*it).second;
00058 }
00059 
00060 
00061 ExParallel &
00062 ParallelThrowRegistry::register_exception_a(
00063   const std::type_info &  exception_type,
00064   ExParallel *      exception)
00065 {
00066   if (!findException(exception_type)) {
00067     m_registry.push_back(Registry::value_type(&exception_type, exception));
00068     mpih::Add_Handle(*exception);
00069   }
00070   return *exception;
00071 }
00072 
00073 ExParallel *
00074 ParallelThrowRegistry::findException(
00075   const std::type_info &    exception_type)
00076 {
00077   for (Registry::iterator it = m_registry.begin(); it != m_registry.end(); ++it)
00078     if (*(*it).first == exception_type)
00079       return (*it).second;
00080 
00081   return NULL;
00082 }
00083 
00084 
00085 void
00086 ExParallel::parallel_handler()
00087 {}
00088 
00089 
00090 void
00091 throw_copy(
00092   const std::exception &  x,
00093   const std::string &   append_message)
00094 {
00095   ExParallel *exception = ParallelThrowRegistry::instance().findException(typeid(x));
00096   if (!exception)
00097     exception = ParallelThrowRegistry::instance().findException(typeid(Exception));
00098 
00099   exception->clear();
00100   *exception << x.what() << append_message;
00101 
00102   exception->throw_copy();
00103 }
00104 
00105 
00106 void
00107 set_exception()
00108 {
00109   BadException x;
00110   x << "Unknown exception";
00111   set_exception(static_cast<ExParallel &>(x));
00112 }
00113 
00114 
00115 void
00116 set_exception(
00117   std::exception &    x)
00118 {
00119   ExParallel *registered_exception = ParallelThrowRegistry::instance().findException(typeid(x));
00120 
00121   if (!registered_exception)
00122     registered_exception = ParallelThrowRegistry::instance().findException(typeid(Exception));
00123 
00124   registered_exception->setDescription(x.what());
00125   registered_exception->setTraceback(Diag::Traceback::printTraceback(Diag::Traceback::snapshot()));
00126 
00127 //  std::cerr << "Exception " << demangle(typeid(*registered_exception).name()) << " will be thrown from processor " << Env::parallel_rank() << " on the next MPIH function:" << std::endl
00128 //      << registered_exception->getDescription() << std::endl
00129 //      << registered_exception->getTraceback() << std::endl;
00130 
00131   mpih::Set_Local_Handle(const_cast<ExParallel &>(*registered_exception));
00132 }
00133 
00134 
00135 void
00136 set_exception(
00137   ExParallel &      x)
00138 {
00139   ExParallel *registered_exception = ParallelThrowRegistry::instance().findException(typeid(x));
00140 
00141   if (!registered_exception)
00142     registered_exception = ParallelThrowRegistry::instance().findException(typeid(Exception));
00143 
00144   registered_exception->setDescription(x.getDescription());
00145   registered_exception->setTraceback(Diag::Traceback::printTraceback(Diag::Traceback::snapshot()));
00146 
00147 //  std::cerr << "Exception " << demangle(typeid(*registered_exception).name()) << " will be thrown from processor " << Env::parallel_rank() << " on the next MPIH function:" << std::endl
00148 //      << registered_exception->getDescription() << std::endl
00149 //      << registered_exception->getTraceback() << std::endl;
00150 
00151   mpih::Set_Local_Handle(const_cast<ExParallel &>(*registered_exception));
00152 }
00153 
00154 
00155 void
00156 register_stl_parallel_exceptions()
00157 {
00158   mpih::Enable();
00159 
00160   Exception::registerException();
00161   BadAlloc::registerException();
00162   BadCast::registerException();
00163   BadTypeid::registerException();
00164   LogicError::registerException();
00165   DomainError::registerException();
00166   InvalidArgument::registerException();
00167   LengthError::registerException();
00168   OutOfRange::registerException();
00169   RuntimeError::registerException();
00170   RangeError::registerException();
00171   OverflowError::registerException();
00172   UnderflowError::registerException();
00173   BadException::registerException();
00174 
00175   mpih::Activate_Handles();
00176 }
00177 
00178 
00179 void
00180 parallel_throw(
00181   MPI_Comm    mpi_comm)
00182 {
00183   int nprocs;
00184   MPI_Comm_size(mpi_comm, &nprocs);
00185 
00186   ExParallel **handles = new ExParallel* [nprocs];
00187 
00188   mpih::Get_Global_Handles(handles);
00189 
00190   MPIH_Handler_compete handler_compete_fn;
00191   MPIH_Handler_execute handler_execute_fn;
00192   mpih::Get_Functions(&handler_compete_fn ,
00193           &handler_execute_fn);
00194 
00195   /* Now that we have the handles,
00196    * reset the handles so we don't throw again.  This way
00197    * whatever function catches the exception we are about to
00198    * throw can call mpih and mpih will not just throw again.
00199    */
00200   mpih::Reset_Local_Handle();
00201 
00202   /* First iterate through all of the exceptions thrown on all of
00203    * the processors, and if any of them were thrown on this
00204    * processor, print an error message and a traceback.
00205    * only the owning processor will have the traceback information.
00206    */
00207   /* Iterate through all of the exceptions thrown on all of the processors
00208    * and call the parallel_handler() function defined by any derived from
00209    * ExParallel.  This is done across all processors so that
00210    * it is valid to do collective communication inside of parallel_handler()
00211    */
00212   for (int i = 0; i < nprocs; ++i) {
00213     if (handles[i]) {
00214       ExParallel *x = dynamic_cast<ExParallel *>(handles[i]);
00215       if (x)
00216   x->parallel_handler();
00217     }
00218   }
00219 
00220   /* Iterate through all of the exceptions thrown on all of the processors
00221    * and select the one to throw in parallel on all processors.  We would
00222    * like to find one derived from ExParallel.
00223    */
00224 
00225   ExParallel *the_exception = NULL;
00226   int originating_processor = -1;
00227 
00228   for (int i = 0; i < nprocs; ++i) {
00229     if (handles[i]) {
00230       ExParallel *x = dynamic_cast<ExParallel *>(handles[i]);
00231       if (x) {
00232   if (handler_compete_fn)
00233     (handler_compete_fn) (reinterpret_cast<void **>(&handles[i]), the_exception);
00234   if ( handles[i] != the_exception ) {
00235     the_exception = x;
00236     originating_processor = i;
00237   }
00238       }
00239     }
00240   }
00241 
00242   delete [] handles;
00243 
00244   /* Since this function is called in parallel, it is possible
00245    * to perform collective communication.  Here the traceback
00246    * and error messages are broadcast and set on all processors.
00247    * These are the only two fields that are guarenteeded to be
00248    * in each exception class.  Other data stored in specialized
00249    * derived classes will have to be communicated seperately.
00250    * If needed this communication could be added to a virtual
00251    * base class.  That is a future enhancements depending on
00252    * the demand.
00253    */
00254   if (the_exception) {
00255     // Copy the description from the originating process to everywhere.
00256     std::string description(the_exception->getDescriptionStream().str());
00257     int description_len = description.length();
00258     MPI_Bcast(&description_len,
00259         1,
00260         MPI_INT,
00261         originating_processor,
00262         mpi_comm);
00263 
00264     char *description_buf = new char[description_len];
00265     description.copy(description_buf, description_len);
00266 
00267     MPI_Bcast(description_buf,
00268         description_len,
00269         MPI_CHAR,
00270         originating_processor,
00271         mpi_comm);
00272 
00273     // Copy the traceback stack from the originating process to everywhere.
00274     const std::string &traceback(the_exception->getTraceback());
00275     int traceback_len = traceback.length();
00276     MPI_Bcast(&traceback_len,
00277         1,
00278         MPI_INT,
00279         originating_processor,
00280         mpi_comm);
00281 
00282     char *traceback_buf = new char[traceback_len];
00283     traceback.copy(traceback_buf, traceback_len);
00284 
00285     MPI_Bcast(traceback_buf,
00286         traceback_len,
00287         MPI_CHAR,
00288         originating_processor,
00289         mpi_comm);
00290 
00291     // Rebuild the exception from the broadcasted data
00292     the_exception->setDescription(std::string(description_buf, description_len));
00293     the_exception->setTraceback(std::string(traceback_buf, traceback_len));
00294     the_exception->setParallel(originating_processor);
00295 
00296 //     std::cerr << "Throwing exception " << demangle(typeid(*the_exception).name()) << " in parallel" << std::endl
00297 //               << the_exception->getDescription() << std::endl
00298 //               << the_exception->getTraceback() << std::endl;
00299 
00300 #ifdef SIERRA_MPIH_VERBOSE
00301     Env::outputP0()
00302       <<"*************** Exception handling ***************"<<endl
00303       <<" A parallel exception of type "<< typeid(*the_exception).name()<<endl
00304       <<" will be thrown on all processors."<<endl;
00305 #endif
00306 
00307     delete [] traceback_buf;
00308     delete [] description_buf;
00309     the_exception->throw_copy();
00310   }
00311   else {
00312 #ifdef SIERRA_MPIH_VERBOSE
00313     Env::outputP0()
00314       <<"*************** Exception handling ***************"<<endl
00315       <<" A parallel exception of type Unknown_Exception"<<endl
00316       <<" will be thrown on all processors."<<endl;
00317 #endif
00318     throw Exception();
00319   }
00320 }
00321 
00322 } // namespace sierra
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines