Sierra Toolkit Version of the Day
Env.cpp
00001 
00010 #include <pwd.h>
00011 #include <unistd.h>
00012 
00013 #include <ostream>
00014 #include <fstream>
00015 #include <sstream>
00016 #include <string>
00017 #include <cstring>
00018 #include <cstdlib>
00019 #include <stdexcept>
00020 #include <numeric>
00021 #include <iomanip>
00022 #include <algorithm>
00023 #include <locale>
00024 #include <map>
00025 
00026 #include <stk_util/util/Null_Streambuf.hpp>
00027 #include <stk_util/parallel/mpi_filebuf.hpp>
00028 
00029 #include <stk_util/diag/Timer.hpp>
00030 #include <stk_util/diag/Writer.hpp>
00031 #include <stk_util/diag/WriterRegistry.hpp>
00032 #include <stk_util/diag/Env.hpp>
00033 #include <stk_util/diag/Platform.hpp>
00034 #include <stk_util/diag/Signal.hpp>
00035 #include <stk_util/parallel/Exception.hpp>
00036 #include <stk_util/parallel/ExceptionReport.hpp>
00037 #include <stk_util/parallel/MPI.hpp>
00038 #include <stk_util/environment/OutputLog.hpp>
00039 #include <stk_util/environment/ProductRegistry.hpp>
00040 #include <stk_util/diag/StringUtil.hpp>
00041 #include <stk_util/diag/UserPlugin.hpp>
00042 #include <stk_util/parallel/mpih.hpp>
00043 #include <stk_util/diag/PreParse.hpp>
00044 
00045 #include <stk_util/environment/OutputLog.hpp>
00046 #include <stk_util/environment/ProgramOptions.hpp>
00047 #include <stk_util/environment/RuntimeMessage.hpp>
00048 #include <stk_util/parallel/BroadcastArg.hpp>
00049 #include <stk_util/parallel/ParallelReduce.hpp>
00050 #include <stk_util/util/Bootstrap.hpp>
00051 #include <stk_util/util/IndentStreambuf.hpp>
00052 
00053 namespace sierra {
00054 namespace Env {
00055 
00056 namespace {
00057 
00058 void bootstrap()
00059 {
00060   // Add my command line options to the option descriptions.
00061   boost::program_options::options_description desc("Runtime environment", 120);
00062   desc.add_options()
00063     ("help,h", "Display command line options")
00064     ("directory,d", boost::program_options::value<std::string>()->default_value("./"), "Set working directory")
00065     ("output-log,o", boost::program_options::value<std::string>()->default_value(""), "Output log file path, one of : 'cout', 'cerr', or a file path")
00066     ("logfile,l", boost::program_options::value<std::string>()->default_value(""), "Output log file path, one of : 'cout', 'cerr', or a file path")
00067     ("pout", boost::program_options::value<std::string>()->implicit_value("-"), "Per-processor log file path")
00068     ("dout", boost::program_options::value<std::string>()->implicit_value("out"), "Diagnostic output stream one of: 'cout', 'cerr', 'out' or a file path")
00069 //    ("timer", boost::program_options::value<std::string>(), "Wall and CPU time options") // , &Diag::Timer::theTimerParser())
00070     ("version", "Display version information")
00071     ("jamsub", boost::program_options::value<std::string>(), "Display user subroutine build command")
00072     ("runtest", boost::program_options::value<std::string>()->implicit_value("pid"), "Record process host and pid to this file")
00073     ("developer-mode", "Activate developer specific features")
00074     ("architecture", boost::program_options::value<std::string>(), "Specifies the architecture running the sierra application");
00075 
00076   stk::get_options_description().add(desc);
00077 }
00078 
00079 stk::Bootstrap x(&bootstrap);
00080 
00081 struct EnvData
00082 {
00083   typedef std::map<ExecType, ExecInfo>    ExecMap;
00084 
00085   static EnvData &instance() {
00086     static EnvData s_env;
00087 
00088     return s_env;
00089   }
00090 
00091   EnvData()
00092     : m_productName("not specified"),
00093       m_vm(stk::get_variables_map()),
00094       m_nullBuf(),
00095       m_outputNull(&m_nullBuf),
00096       m_outputP0(&std::cout),
00097       m_output(),
00098       m_startTime((double) ::time(NULL)),
00099       m_executablePath(),
00100       m_shutdownRequested(false),
00101       m_inputFileRequired(true),
00102       m_checkSubCycle(false),
00103       m_worldComm(MPI_COMM_NULL),
00104       m_parallelComm(MPI_COMM_NULL),
00105       m_parallelSize(-1),
00106       m_parallelRank(-1),
00107       m_emptyString(),
00108       m_onString(PARAM_ON),
00109       m_inputFile("")
00110   {
00111     m_execMap[EXEC_TYPE_LAG].m_master      = -1;
00112     m_execMap[EXEC_TYPE_LAG].m_groupComm   = MPI_COMM_NULL;
00113     m_execMap[EXEC_TYPE_FLUID].m_master    = -1;
00114     m_execMap[EXEC_TYPE_FLUID].m_groupComm = MPI_COMM_NULL;
00115     stk::register_log_ostream(std::cout, "cout");
00116     stk::register_log_ostream(std::cerr, "cerr");
00117     
00118     stk::register_ostream(sierra::out(), "out");
00119     stk::register_ostream(sierra::pout(), "pout");
00120     stk::register_ostream(sierra::dout(), "dout");
00121     stk::register_ostream(sierra::tout(), "tout");
00122     
00123     static_cast<stk::indent_streambuf *>(sierra::dwout().rdbuf())->redirect(sierra::dout().rdbuf());
00124   }
00125 
00126   ~EnvData()
00127   {
00128     static_cast<stk::indent_streambuf *>(sierra::dwout().rdbuf())->redirect(std::cout.rdbuf());
00129   
00130     stk::unregister_ostream(tout());
00131     stk::unregister_ostream(dout());
00132     stk::unregister_ostream(pout());
00133     stk::unregister_ostream(out());
00134 
00135     stk::unregister_log_ostream(std::cerr);
00136     stk::unregister_log_ostream(std::cout);
00137   }
00138   
00139   std::string           m_productName;
00140 
00141   boost::program_options::variables_map & m_vm;
00142   
00143   null_streambuf  m_nullBuf;
00144   std::ostream    m_outputNull;
00145   std::ostream *  m_outputP0;
00146   std::ostringstream  m_output;
00147 
00148   double    m_startTime;
00149   std::string   m_executablePath;
00150 
00151   bool      m_shutdownRequested;
00152   bool                  m_inputFileRequired;
00153   bool                  m_checkSubCycle;
00154 
00155   MPI_Comm    m_worldComm;
00156   
00157   MPI_Comm    m_parallelComm;
00158   int     m_parallelSize;
00159   int     m_parallelRank;
00160 
00161   ExecMap               m_execMap;
00162   
00163   const std::string m_emptyString;
00164   const std::string m_onString;
00165 
00166   std::string           m_inputFile;
00167 };
00168 
00169 } // namespace <unnamed>
00170 
00171 const std::string &
00172 product_name()
00173 {
00174   return EnvData::instance().m_productName;
00175 }
00176 
00177 
00178 const std::string &
00179 executable_file()
00180 {
00181   return EnvData::instance().m_executablePath;
00182 }
00183 
00184 
00185 const std::string &
00186 executable_date()
00187 {
00188   static std::string executable_date;
00189 
00190   if (executable_date.empty()) 
00191     executable_date = ProductRegistry::instance().getProductAttribute(EnvData::instance().m_productName, ProductRegistry::BUILD_TIME);
00192 
00193   return executable_date;
00194 }
00195 
00196 
00197 const std::string &
00198 startup_date()
00199 {
00200   static std::string startup_date;
00201 
00202   if (startup_date.empty())
00203     startup_date = format_time(EnvData::instance().m_startTime).c_str();
00204 
00205   return startup_date;
00206 }
00207 
00208 
00209 double
00210 start_time()
00211 {
00212   return EnvData::instance().m_startTime;
00213 }
00214 
00215 
00216 bool
00217 developer_mode()
00218 {
00219   return !get_param("developer-mode").empty();
00220 }
00221 
00222 
00223 void setInputFileName(std::string name) {
00224   EnvData::instance().m_inputFile = name;
00225 }
00226 
00227 std::string getInputFileName() {
00228   return EnvData::instance().m_inputFile;
00229 }
00230 
00231 void set_input_file_required(bool value)
00232 {
00233     EnvData::instance().m_inputFileRequired = value;
00234 }
00235 
00236 void set_check_subcycle(bool value)
00237 {
00238     EnvData::instance().m_checkSubCycle = value;
00239 }
00240 
00241 
00242 const std::string &
00243 architecture()
00244 {
00245   return get_param("architecture");
00246 }
00247 
00248 
00249 const std::string
00250 working_directory() {
00251   char cwd[PATH_MAX];
00252   std::string directory = get_param("directory");
00253   if (directory[0] != '/' && getcwd(cwd, PATH_MAX) != NULL) {
00254     directory = cwd;
00255     directory += '/';
00256   }
00257   return directory;
00258 }
00259 
00260 
00261 std::ostream &
00262 output()
00263 {
00264   return EnvData::instance().m_output;
00265 }
00266 
00267 
00268 std::ostream &
00269 outputP0()
00270 {
00271   return *EnvData::instance().m_outputP0;
00272 }
00273 
00274 
00275 std::ostream &
00276 outputNull() {
00277   return EnvData::instance().m_outputNull;
00278 }
00279 
00280 
00281 const char *
00282 section_separator()
00283 {
00284   static const char *s_sectionSeparator = "+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----";
00285 
00286   return s_sectionSeparator;
00287 }
00288 
00289 
00290 const char *
00291 subsection_separator()
00292 {
00293   static const char *s_subsectionSeparator = "---------------------------------------------------";
00294 
00295   return s_subsectionSeparator;
00296 }
00297 
00298 
00299 std::string
00300 section_title(
00301   const std::string & title)
00302 {
00303   static size_t s_sectionSeparatorLength = std::strlen(section_separator());
00304 
00305   std::ostringstream strout;
00306 
00307   strout << std::left << std::setw(s_sectionSeparatorLength - 20) << title << std::right << std::setw(20) << format_time(Env::wall_now());
00308   return strout.str();
00309 }
00310 
00311 
00312 int parallel_size() {
00313   return EnvData::instance().m_parallelSize;
00314 }
00315 
00316 int parallel_rank() {
00317   return EnvData::instance().m_parallelRank;
00318 }
00319 
00320 MPI_Comm
00321 parallel_comm()
00322 {
00323   return EnvData::instance().m_parallelComm;
00324 }
00325 
00326 MPI_Comm
00327 parallel_world_comm()
00328 {
00329   return EnvData::instance().m_worldComm;
00330 }
00331 
00332 int parallel_lag_master() {
00333   return EnvData::instance().m_execMap[EXEC_TYPE_LAG].m_master;
00334 }
00335 
00336 int parallel_fluid_master() {
00337   return EnvData::instance().m_execMap[EXEC_TYPE_FLUID].m_master;
00338 }
00339 
00340 int peer_group() {
00341   return EnvData::instance().m_execMap[EXEC_TYPE_PEER].m_master;
00342 }
00343 
00344 std::string
00345 get_program_path(const char *program)
00346 {
00347   // If we already have the full path, just return it
00348   if (program[0] == '/')
00349     return program;
00350 
00351   char full_path[PATH_MAX];
00352   if (strchr(program, '/') != NULL) {
00353     realpath(program, full_path);
00354     return full_path;
00355   }
00356 
00357   char *PATH = getenv("PATH");
00358   while (PATH && *PATH) {
00359     // Get the character past the end of the next directory in PATH, i.e.
00360     // either the '/' or the '\0'
00361     char *end = strchr(PATH, ':');
00362     if (!end) {
00363       end = PATH+strlen(PATH);
00364     }
00365 
00366     // Set current = directory + '/' + program
00367     strncpy(full_path, PATH, end-PATH);
00368     full_path[end-PATH] = '/';
00369     strcpy(&full_path[end-PATH+1], program);
00370 
00371     // Check whether possible exists
00372     if (access(full_path, X_OK) == 0)
00373       return full_path;
00374 
00375     // Advance to the next directory
00376     PATH = *end ? end+1 : end;
00377   }
00378 
00379   // Not found; this shouldn't happen, but maybe the executable got deleted
00380   // after it was invoked before we got here -- or we have some crazy
00381   // parallel machine where the executable is inaccessible on the compute
00382   // nodes despite it somehow having been loaded.  No big deal, just return
00383   // the non-absolute path.
00384   return program;
00385 }
00386 
00387 void parse_options(MPI_Comm comm, int *argc, char ***argv);
00388 void startup_multi_exec(MPI_Comm world_comm, ExecType my_executable_type, const std::vector<int> *peer_sizes);
00389 
00390 
00391 
00392 bool StartupSierra(int *        argc,
00393   char ***      argv,
00394   const char *      product_name,
00395   const char *      build_time,
00396   ExecType                mpi_key,
00397   const std::vector<int> *peer_sizes) {
00398   bool returnValue = false;
00399  
00400   stk::Bootstrap::bootstrap();
00401   
00402   EnvData &env_data = EnvData::instance();
00403 
00404   env_data.m_executablePath = get_program_path(*argv[0]);
00405   env_data.m_productName = product_name;
00406 
00407   ProductRegistry::instance().setProductName(product_name);
00408 
00409   ProductRegistry::AttributeMap &product_attributes = ProductRegistry::instance().getProductAttributeMap(product_name);
00410   product_attributes[ProductRegistry::BUILD_TIME] = build_time;
00411   product_attributes[ProductRegistry::EXECUTABLE] = env_data.m_executablePath;
00412 
00413   // Add Utility runtime library to the product registry
00414   sierra::register_product();
00415 
00416   // Add mpih to the product registry
00417   sierra::mpih::register_product();
00418 
00419   // Add operating system information to the product registry.
00420   ProductRegistry::AttributeMap &attr_map = ProductRegistry::instance().addProduct(osname().c_str());
00421   attr_map[ProductRegistry::VERSION]      = osversion().c_str();
00422 
00423   // Process the broadcast command line arguments
00424   namespace opt = boost::program_options;
00425     
00426   opt::variables_map &vm = stk::get_variables_map();
00427   opt::options_description &od = stk::get_options_description();
00428   {
00429     boost::program_options::options_description desc("Diagnostic writers", 120);
00430     
00431     for (Diag::WriterRegistry::iterator it = Diag::getWriterRegistry().begin(); it != Diag::getWriterRegistry().end(); ++it) {
00432       std::ostringstream str;
00433       str << "Diagnostic writer " << (*it).first << std::endl;
00434       (*it).second.second->describe(str);  
00435       desc.add_options()((*it).first.c_str(), boost::program_options::value<std::string>(), str.str().c_str());
00436     }
00437     
00438     std::ostringstream str;
00439     str << "Wall and CPU time options" << std::endl;
00440     Diag::theTimerParser().describe(str);  
00441     desc.add_options()("timer", boost::program_options::value<std::string>(), str.str().c_str());
00442     
00443     od.add(desc);
00444   }
00445 
00446   for (int i = 0; i < *argc; ++i) {
00447     const std::string s((*argv)[i]);
00448     if (s == "-h" || s == "-help" || s == "--help") {
00449       std::cout << std::endl
00450                 << "Sierra Usage: sierra " << lower(product_name) << " [sierra-options...] -O \"[" << lower(product_name) << "-options...]\"" << std::endl << std::endl
00451 //                << "Usage: (MPI run) " << env_data.m_executablePath << " [options...]" << std::endl
00452                 << "For example:" << std::endl
00453                 << "" << std::endl
00454                 << "  sierra " << lower(product_name) << " -i input_deck.i -o sierra.log" << std::endl
00455                 << "    This creates the normal output file sierra.log" << std::endl
00456                 << "" << std::endl
00457                 << "  sierra " << lower(product_name) << " -i input_deck.i -o sierra.log -O \"--pout=pp.log\"" << std::endl
00458                 << "    The per-processor output is written to pp.log.n.r for each rank, r, of n processors." << std::endl
00459                 << "" << std::endl
00460                 << "  sierra " << lower(product_name) << " -i input_deck.i -o sierra.log -O \"--fmwkout=field,parameters\"" << std::endl
00461                 << "    Enable the framework field and parameter diagnostics" << std::endl
00462                 << "" << std::endl
00463                 << "  sierra " << lower(product_name) << " -i input_deck.i -o sierra.log -O \"--timer=all\"" << std::endl
00464                 << "    Enable the all timers" << std::endl
00465                 << std::endl
00466                 << "  For additional information see:" << std::endl
00467                 << "      http://sierra-dev.sandia.gov/stk/group__stk__util__output__log__detail.html#stk_util_output_log_howto_use_in_sierra_app" << std::endl << std::endl
00468                 << product_name << " options are:" << std::endl
00469                 << stk::get_options_description() << std::endl;
00470       std::exit(0);
00471     }
00472   }
00473 
00474   for (int i = 0; i < *argc; ++i) {
00475     const std::string s((*argv)[i]);
00476     if (s == "-jamsub" || s == "--jamsub") {
00477       const char *t = (*argv)[i + 1];
00478       const char **symbol = sierra::Plugin::Registry::getsym<const char **>(t);
00479       if (symbol) {
00480         std::cout << *symbol << std::endl;
00481         std::exit(0);
00482       }
00483       else
00484         std::exit(1);
00485     }
00486   }
00487 
00488   try {
00489     startup_preparallel_platform();
00490 
00491     // Communicator has not been set, initialize MPI if not already initialized
00492     int mpi_init_val = 0 ;
00493     if ( MPI_SUCCESS != MPI_Initialized( &mpi_init_val ) ) {
00494       throw RuntimeError() << "MPI_Initialized failed";
00495     }
00496 
00497     // Default startup communicator
00498     MPI_Comm startup_mpi_comm = MPI_COMM_WORLD;
00499 
00500     // If we are initializing the comm, see if there are differing
00501     // executables running.  If there are, find our partition and the
00502     // leads of the other partitions.
00503     if ( mpi_init_val == 0 ) {
00504       if ( MPI_SUCCESS != MPI_Init( argc , argv ) ) {
00505   throw RuntimeError() << "MPI_Init failed";
00506       }
00507 
00508       returnValue = true ;
00509 
00510       if (mpi_key != EXEC_TYPE_WORLD) startup_multi_exec(startup_mpi_comm, mpi_key, peer_sizes);
00511     }
00512     
00513     // Ready to reset the environment from NULL, we are the Lagrangian application at this point.
00514     MPI_Comm new_comm = mpi_key != EXEC_TYPE_WORLD ? env_data.m_execMap[mpi_key].m_groupComm : MPI_COMM_WORLD;
00515     reset(new_comm);
00516   }
00517   catch (const std::exception &x) {
00518     std::cerr << "SIERRA execution failed during mpi initialization with the following exception:" << std::endl
00519         << x.what() << std::endl;
00520     MPI_Abort(env_data.m_parallelComm , MPI_ERR_OTHER);
00521   }
00522   catch (...) {
00523     std::cerr << "SIERRA execution failed during mpi initialization  with unknown exception:" << std::endl;
00524 
00525     MPI_Abort(env_data.m_parallelComm, MPI_ERR_OTHER);
00526   }
00527 
00528   parse_options(env_data.m_parallelComm, argc, argv);
00529   
00530   {
00531     std::ostringstream output_description;
00532 
00533     // On processor 0:
00534     //   [outfile=path] [poutfile=path.n.r] [doutfile=path.n.r] out>{-|cout|cerr|outfile}+pout pout>{null|poutfile} dout>{out|doutfile}
00535 
00536     // On processor 1..n:
00537     //   [poutfile=path.n.r] [doutfile=path.n.r] out>pout pout>{null|poutfile} dout>{out|doutfile}
00538     
00539     std::string out_path1 = vm["output-log"].as<std::string>();
00540     std::string out_path2 = vm["logfile"].as<std::string>();
00541 
00542 
00543     
00544     std::string originalFileName = Env::get_param("input-deck");
00545     std::string modifiedFileName = originalFileName;
00546 
00547     if(originalFileName == "") {
00548       //
00549       //  If no input file specified, error out (unless just running the --version or --help option)
00550       //         
00551       if ( get_param("version").empty() && get_param("help").empty() ) {
00552         if (env_data.m_inputFileRequired) {
00553           throw RuntimeError() << "No input file specified.  An input file must be specified with the '-i' option";
00554         } else {
00555           std::cerr << "WARNING: No input file specified.  An input file should be specified with the '-i' option!" << std::endl;
00556         }
00557       }
00558     } else if ( env_data.m_checkSubCycle ) {
00559       // Alter input-deck if subcycle present
00560       bool debugSubCycleSplit = false;
00561       std::string subCycleRegexp("^\\s*subcycle\\s+blocks\\s*=");
00562       bool subCycleSet = CaseInSensitiveRegexInFile(subCycleRegexp, originalFileName, debugSubCycleSplit);
00563       std::string coarseRegionRegexp("^\\s*begin\\s+presto\\s+region\\s+\\w+_AutoCoarseRegion\\>");
00564       bool coarseRegionMade = CaseInSensitiveRegexInFile( coarseRegionRegexp, originalFileName, debugSubCycleSplit);
00565       std::string fineRegionRegexp("^\\s*begin\\s+presto\\s+region\\s+\\w+_AutoFineRegion\\>");
00566       bool fineRegionMade = CaseInSensitiveRegexInFile( fineRegionRegexp, originalFileName, debugSubCycleSplit);
00567       if ( subCycleSet ) {
00568         if ( !coarseRegionMade && !fineRegionMade ) {
00569           modifiedFileName = CreateSubCycleInputFile( originalFileName );
00570         } else {
00571           if(Env::parallel_rank() == 0) {
00572       std::cout<<"Input File: " << originalFileName << " Appears to have already been converted for subcycling.  ";
00573       std::cout<<"Skipping input conversion " << std::endl;
00574     }
00575         }
00576       }
00577     }
00578 
00579     setInputFileName(modifiedFileName);
00580 
00581 
00582     std::string trueOut;
00583     if(out_path2 != "") {
00584       trueOut = out_path2;
00585     } else if(out_path1 != "") {
00586       //
00587       //  Old syntax compatibility, access the old output-file executable option if the logfile is not defined
00588       //
00589       trueOut = out_path1;
00590     } else {
00591      //
00592       //  If log file name is unspecified, default it to (Base Input File Name).log
00593       //  Use the following logic:
00594       //   If the input file has an extension, replace the last ".extension" with ".log"
00595       //   If the input file has no extension, append ".log" to the input file name
00596       //   If the input file contains the word '.aprepro', assume aprepro was used to convert and strip out the aprepro
00597       //   If the input file contains any directory movement (like ../) strip them out so log file is written to current director
00598       //
00599 
00600       int dotPos = originalFileName.rfind(".");
00601 
00602       if(dotPos == -1) {  //No extension
00603         trueOut = originalFileName + ".log";
00604       } else {  //Extension found
00605         trueOut = originalFileName.substr(0, dotPos) + ".log";
00606       }
00607       //
00608       //  If the output path contains a ".aprepro" tag get rid of it
00609       //
00610       int apreproPos = trueOut.rfind(".aprepro"); 
00611       if(apreproPos != -1) {
00612         trueOut.erase(apreproPos, 8);
00613       }
00614       //
00615       //  If the output path contains a "aaa/input.i" pull off the initial directory redirects so that the log file is written int the current directory
00616       //
00617       int lastSlashPos = trueOut.rfind("/");
00618 
00619       if(lastSlashPos != -1) {
00620         trueOut.erase(0,lastSlashPos+1);
00621       }
00622 
00623 
00624     }
00625  
00626     std::string out_path = trueOut;
00627 
00628     if (out_path == "-")
00629       out_path = "cout";
00630     
00631     std::string out_ostream;
00632 
00633     if (!stk::get_log_ostream(out_path))
00634       if (out_path.size() && out_path[0] != '/')
00635         out_path = working_directory() + out_path;
00636 
00637     if (parallel_rank() == 0) {
00638       if (!stk::get_log_ostream(out_path)) {
00639         output_description << "outfile=\"" << out_path << "\"";
00640         out_ostream = "outfile";
00641       }
00642       else {
00643         out_ostream = out_path;
00644       }
00645     }
00646     else
00647       out_ostream = "null";
00648 
00649     std::string pout_ostream = "null";
00650     if (vm.count("pout")) {
00651       std::string pout_path = vm["pout"].as<std::string>();
00652       if (pout_path == "-") {
00653         std::ostringstream s;
00654 
00655         if (stk::get_log_ostream(out_path))
00656           s << working_directory() << "sierra.log." << parallel_size() << "." << parallel_rank();
00657         else
00658           s << out_path << "." << parallel_size() << "." << parallel_rank();
00659         pout_path = s.str();
00660       }
00661       else if (pout_path.find("/") == std::string::npos && !stk::get_log_ostream(pout_path)) {
00662         std::ostringstream s;
00663 
00664         s << working_directory() << pout_path << "." << parallel_size() << "." << parallel_rank();
00665         pout_path = s.str();
00666       }
00667       
00668       if (!stk::get_log_ostream(pout_path)) {
00669         output_description << " poutfile=\"" << pout_path << "\"";
00670         pout_ostream = "poutfile";
00671       }
00672       else
00673         pout_ostream = pout_path;
00674     }
00675 
00676 
00677     std::string dout_ostream;    
00678     if (vm.count("dout")) {
00679       std::string dout_path = vm["dout"].as<std::string>();
00680       if (!dout_path.empty() && stk::is_registered_ostream(dout_path))
00681         dout_ostream = dout_path;
00682       else {
00683         std::ostringstream s;
00684         if (dout_path.size() && dout_path[0] != '/')
00685           s << working_directory() << dout_path << "." << parallel_size() << "." << parallel_rank();
00686         else
00687           s << dout_path << parallel_size() << "." << parallel_rank();;
00688         dout_path = s.str();
00689         output_description << " doutfile=\"" << dout_path << "\"";
00690         dout_ostream = "doutfile";
00691       }
00692     }
00693     else
00694       dout_ostream = "out";
00695 
00696     if (parallel_rank() == 0)
00697       output_description << " out>" << out_ostream << "+pout";
00698     else
00699       output_description << " out>pout";
00700 
00701     output_description << " pout>" << pout_ostream << " dout>" << dout_ostream;
00702 
00703 
00704     stk::bind_output_streams(output_description.str());
00705   }
00706   
00707   env_data.m_outputP0 = &sierra::out();
00708   
00709 #ifdef SIERRA_EXPORT_CONTROL_EAR99
00710   // If you are using an EAR99 export controlled version of Sierra,
00711   // any attempt to modify or bypass this section of code is a 
00712   // violation of U.S. Export Control Regulations and subject to
00713   // criminal prosecution.
00714   if (parallel_size() > SIERRA_EXPORT_CONTROL_EAR99) {
00715     if (parallel_rank() == 0) {
00716       std::cerr << "ERROR: You are running an EAR99 export controlled version of\n";
00717       std::cerr << "       Sierra. For this export control level, a maximum of\n";
00718       std::cerr << "       "<<SIERRA_EXPORT_CONTROL_EAR99<<" processors is permitted\n";
00719     }
00720     MPI_Abort(env_data.m_parallelComm, MPI_ERR_OTHER);
00721   }
00722 #endif  
00723 
00724   try {
00725     // Create pid file if runtest command line option specified
00726     if ( !get_param("runtest").empty() ) {
00727 
00728       mpi_filebuf mpi_buf;
00729 
00730       mpi_buf.open(env_data.m_parallelComm, 0, std::ios::out, get_param("runtest").c_str());
00731 
00732       if ( ! mpi_buf.is_open() )
00733   throw RuntimeError() << "failed to open pid file " << get_param("runtest");
00734 
00735       std::ostream s( &mpi_buf );
00736       s << parallel_rank() << ":" << hostname() << domainname() << ":" << pid() << ":" << pgrp() << std::endl;
00737     }
00738 
00739     // Enable the timers
00740     if (!get_param("timer").empty()) {
00741       Diag::TimerParser parser;
00742 
00743       Diag::sierraTimerSet().setEnabledTimerMask(parser.parse(get_param("timer").c_str()));
00744     }
00745 
00746     // Enable parallel exception handling, waited until now because it needs the Env output streams
00747     register_stl_parallel_exceptions();
00748   }
00749   catch (const std::exception &x) {
00750     std::cerr << "SIERRA execution failed during diagnostic and timer initialization with the following exception:" << std::endl
00751         << x.what() << std::endl;
00752     abort();
00753   }
00754   catch (...) {
00755     std::cerr << "SIERRA execution failed during diagnostic and timer initialization with unknown exception:" << std::endl;
00756     abort();
00757   }
00758 
00759 // Setup the hangup, segmentation violation, illegal instruction, bus error and
00760 //    terminate signal handlers.
00761   if (get_param("nosignal").empty())
00762     activate_signals();
00763 
00764 
00765   return returnValue;
00766 }
00767 
00768 
00769 
00770 
00771 void
00772 Startup::startup(
00773   int *       argc,
00774   char ***      argv,
00775   const char *      product_name,
00776   const char *      build_time,
00777   ExecType                mpi_key,
00778   const std::vector<int> *peer_sizes) {
00779   m_mpiInitFlag = StartupSierra(argc, argv, product_name, build_time, mpi_key, peer_sizes);
00780 }
00781 
00782           
00783 Startup::Startup(
00784   int *                 argc,
00785   char ***              argv,
00786   const char *          product_name,
00787   const char *          build_date_time,
00788   ExecType              mpi_key,
00789   const std::vector<int> *peer_sizes)
00790   : m_mpiInitFlag(false)
00791 {
00792   startup(argc, argv, product_name, build_date_time, mpi_key, peer_sizes);
00793 }
00794 
00795 
00796 void ShutDownSierra(bool mpiInitFlag) {
00797   if (get_param("nosignal").empty())
00798     deactivate_signals();
00799 
00800   mpih::Delete_Handles();
00801 
00802   EnvData &env_data = EnvData::instance();
00803   mpih::Keyval_delete(env_data.m_parallelComm);
00804 
00805   reset(MPI_COMM_NULL);
00806 
00807   if (mpiInitFlag)
00808     MPI_Finalize();
00809 }
00810 
00811 
00812 
00813 Startup::~Startup() {
00814   ShutDownSierra(m_mpiInitFlag);
00815 }
00816 
00817 
00818 void parse_options(MPI_Comm  comm,
00819        int *     argc,
00820        char ***  argv)
00821 {
00822   try {
00823     char ** argv2 = new char *[*argc];
00824     for (int i = 0; i < *argc; ++i) {
00825       if (std::strlen((*argv)[i]) > 2 && (*argv)[i][0] == '-' && (*argv)[i][1] != '-') {
00826         argv2[i] = new char[std::strlen((*argv)[i]) + 2];
00827         argv2[i][0] = '-';
00828         std::strcpy(&argv2[i][1], (*argv)[i]);
00829       }
00830       else {  
00831         argv2[i] = new char[std::strlen((*argv)[i]) + 1]; 
00832   std::strcpy(argv2[i], (*argv)[i]);
00833       }      
00834     }
00835   
00836     // Broadcast argc and argv to all processors.
00837     stk::BroadcastArg b_arg(comm, *argc, argv2);
00838 
00839     for (int i = 0; i < *argc; ++i)
00840       delete[] argv2[i];
00841     delete[] argv2;
00842 
00843     namespace opt = boost::program_options;
00844     opt::variables_map &vm = stk::get_variables_map();
00845     opt::options_description &od = stk::get_options_description();
00846     opt::store(opt::parse_command_line(b_arg.m_argc, b_arg.m_argv, od, opt::command_line_style::unix_style), vm);
00847     opt::notify(vm);
00848 
00849     for (Diag::WriterRegistry::iterator it = Diag::getWriterRegistry().begin(); it != Diag::getWriterRegistry().end(); ++it)
00850       if (vm.count((*it).first.c_str()))
00851         (*it).second.second->parse(vm[(*it).first.c_str()].as<std::string>().c_str());
00852     
00853 
00854     // Must have a working directory
00855     const std::string &working_dir = get_param("directory");
00856     if ( working_dir.empty() || working_dir == PARAM_ON )
00857       throw RuntimeError() << "working directory must be specified";
00858     if (working_dir[working_dir.length() - 1] != '/')
00859       const_cast<std::string &>(working_dir) += '/';
00860     
00861   }
00862   catch (const std::exception &x) {
00863     std::cerr << "SIERRA execution failed during command line processing with the following exception:" << std::endl
00864         << x.what() << std::endl;
00865     MPI_Abort(comm, MPI_ERR_OTHER);
00866   }
00867   catch (...) {
00868     std::cerr << "SIERRA execution failed during command line processing with unknown exception:" << std::endl;
00869 
00870     MPI_Abort(comm, MPI_ERR_OTHER);
00871   }
00872 } 
00873 
00874 void
00875 startup_multi_exec(MPI_Comm                world_comm,
00876        ExecType                my_executable_type,
00877                    const std::vector<int> *peer_sizes)  // can be NULL.
00878 {
00879   EnvData &env_data = EnvData::instance();
00880 
00881   // MPI interface construction
00882   int world_size = -1 ;
00883   int world_rank = -1 ;
00884       
00885   if ( MPI_Comm_size(world_comm, &world_size) != MPI_SUCCESS)
00886     throw RuntimeError() << "MPI_Comm_size failed";
00887 
00888   if ( MPI_Comm_rank(world_comm, &world_rank) != MPI_SUCCESS || -1 == world_rank )
00889     throw RuntimeError() << "MPI_Comm_rank failed";
00890 
00891   if (my_executable_type == EXEC_TYPE_FLUID || my_executable_type == EXEC_TYPE_LAG) {
00892     // This is specific for gemini.  Gemini performs three broadcasts, one for the
00893     // EXEC_TYPE_FLUID and one for the EXEC_TYPE_LAG.  Also note that the ranks of processors must
00894     // be ordered such that all gemini processors come first.  Gemini mandates that it;s master is
00895     // processor 0 and use ranks through its size.
00896     int lag_master = 0;
00897     int lag_rank_size = -1;
00898     int fluid_master = 0;
00899       
00900     if (world_rank == 0) {
00901       typedef std::map<ExecType, std::vector<int> > ExecTypeRanks;
00902 
00903       ExecTypeRanks exec_type_ranks;
00904 
00905       exec_type_ranks[my_executable_type].push_back(0);
00906 
00907       for (int i = 1; i < world_size; ++i) {
00908         MPI_Status status;
00909         int proc_stat[2];         // rank, ExecType
00910         if (MPI_Recv(proc_stat, 2, MPI_INTEGER, i, MPI_ANY_TAG, world_comm, &status) != MPI_SUCCESS)
00911           throw RuntimeError() << "MPI_Recv failed";
00912 
00913         exec_type_ranks[(ExecType) proc_stat[1]].push_back(proc_stat[0]);
00914       }        
00915 
00916       std::vector<int> &fluid_ranks = exec_type_ranks[EXEC_TYPE_FLUID];
00917       if (fluid_ranks.size())
00918         fluid_master = fluid_ranks.front();
00919 
00920       if (MPI_Bcast(&fluid_master, 1, MPI_INTEGER, 0, world_comm) != MPI_SUCCESS)
00921         throw RuntimeError() << "MPI_Bcast failed";
00922 
00923       std::vector<int> &lag_ranks = exec_type_ranks[EXEC_TYPE_LAG];      
00924       if (lag_ranks.size())
00925         lag_master = lag_ranks.front();
00926 
00927       if (MPI_Bcast(&lag_master, 1, MPI_INTEGER, 0, world_comm) != MPI_SUCCESS)
00928         throw RuntimeError() << "MPI_Bcast failed";
00929 
00930       lag_rank_size = lag_ranks.size();
00931       if (MPI_Bcast(&lag_rank_size, 1, MPI_INTEGER, 0, world_comm) != MPI_SUCCESS)
00932         throw RuntimeError() << "MPI_Bcast failed";
00933     }
00934     else {
00935       int proc_stat[2];
00936       proc_stat[0] = world_rank;
00937       proc_stat[1] = my_executable_type;
00938 
00939       if (MPI_Send(proc_stat, 2, MPI_INTEGER, 0, 0, world_comm) != MPI_SUCCESS)
00940         throw RuntimeError() << "MPI_Send failed";
00941 
00942       if (MPI_Bcast(&fluid_master, 1, MPI_INTEGER, 0, world_comm) != MPI_SUCCESS)
00943         throw RuntimeError() << "MPI_Bcast failed";
00944 
00945       if (MPI_Bcast(&lag_master, 1, MPI_INTEGER, 0, world_comm) != MPI_SUCCESS)
00946         throw RuntimeError() << "MPI_Bcast failed";
00947 
00948       if (MPI_Bcast(&lag_rank_size, 1, MPI_INTEGER, 0, world_comm) != MPI_SUCCESS)
00949         throw RuntimeError() << "MPI_Bcast failed";
00950     }
00951 
00952     MPI_Comm lag_comm   = world_comm;
00953     MPI_Comm fluid_comm = MPI_COMM_NULL;
00954     const int fluid_rank_size = world_size - lag_rank_size;
00955     if (fluid_rank_size) {
00956 
00957       MPI_Group world_group;
00958       MPI_Group lag_group;
00959       MPI_Group fluid_group;
00960 
00961       if (MPI_Comm_group(world_comm, &world_group) != MPI_SUCCESS) 
00962         throw RuntimeError() << "MPI_Comm_group failed";
00963 
00964       std::vector<int> lag_ranks;
00965       for (int i = 0; i < lag_rank_size; ++i)
00966         lag_ranks.push_back(lag_master + i);
00967 
00968       if (MPI_Group_incl(world_group, lag_ranks.size(), &lag_ranks[0], &lag_group) != MPI_SUCCESS)
00969         throw RuntimeError() << "MPI_Group_incl failed";
00970       if (MPI_Comm_create(world_comm, lag_group, &lag_comm) != MPI_SUCCESS)
00971         throw RuntimeError() << "MPI_Comm_create failed";
00972 
00973       std::vector<int> fluid_ranks;
00974       for (int i = 0; i < fluid_rank_size; ++i)
00975         fluid_ranks.push_back(fluid_master + i);
00976 
00977       if (MPI_Group_incl(world_group, fluid_ranks.size(), &fluid_ranks[0], &fluid_group) != MPI_SUCCESS)
00978         throw RuntimeError() << "MPI_Group_incl failed";
00979       if (MPI_Comm_create(world_comm, fluid_group, &fluid_comm) != MPI_SUCCESS)
00980         throw RuntimeError() << "MPI_Comm_create failed";
00981     }
00982 
00983     env_data.m_worldComm                            = world_comm;
00984     env_data.m_execMap[EXEC_TYPE_LAG].m_master      = lag_master;
00985     env_data.m_execMap[EXEC_TYPE_LAG].m_groupComm   = lag_comm;
00986     env_data.m_execMap[EXEC_TYPE_FLUID].m_master    = fluid_master;
00987     env_data.m_execMap[EXEC_TYPE_FLUID].m_groupComm = fluid_comm;
00988   }
00989   else if (my_executable_type == EXEC_TYPE_PEER) {
00990     // This executable will run on 2 or more communicators.
00991 
00992     // NOTE: Only 2 communicators is currently supported...
00993 
00994     // If peer_sizes is NULL, then split world_comm into two equal
00995     // size communicators (peer(1) is larger if world_comm size is
00996     // odd)
00997     // If peer_sizes is not NULL, then split world_comm into
00998     // peer_sizes.size() sub communicators with peer(i) of size
00999     // peer_sizes(i). 
01000 
01001     // Sync 'peer_sizes' across all processors if non-null
01002     // For now, we limit the number of peer applications to 2.
01003 
01004     if (peer_sizes != NULL && peer_sizes->size() > 2) {
01005       throw RuntimeError() << "The total number of peer application processor sizes specfied is "
01006          << peer_sizes->size()
01007          << ",  but the current limit is 2.";
01008     }
01009 
01010     // Peer sizes is only set correctly on processor 0 since it was passed in by the
01011     // main routine prior to MPI_Init being called.  Broadcast the values to all processors.
01012     int peers[2];
01013     if (world_rank == 0) {
01014       if (peer_sizes != NULL) {
01015   peers[0] = (*peer_sizes)[0];
01016   peers[1] = (*peer_sizes)[1];
01017       } else {
01018   peers[0] = world_size / 2;
01019   peers[1] = world_size - world_size/2;
01020       }
01021     }
01022     if (MPI_Bcast(peers, 2, MPI_INTEGER, 0, world_comm) != MPI_SUCCESS)
01023       throw RuntimeError() << "MPI_Broadcast -- peers failed";
01024 
01025     // Check that the number of processes specified is equal to the
01026     // total number of processes
01027     int peer_proc_count = peers[0] + peers[1];
01028     if (peer_proc_count != world_size) {
01029       throw RuntimeError() << "The total number of peer processors specfied is " << peer_proc_count
01030          << " which is not equal to the total number of processors (" << world_size << ").";
01031     }
01032 
01033     int my_peer_group = MPI_UNDEFINED;
01034     int sum = 0;
01035     for (size_t i=0; i < 2; i++) {
01036       sum += peers[i];
01037       if (world_rank < sum) {
01038   my_peer_group = i;
01039   break;
01040       }
01041     }
01042 
01043     MPI_Comm peer_comm;
01044     if (MPI_Comm_split(world_comm, my_peer_group, world_rank, &peer_comm) != MPI_SUCCESS) {
01045       throw RuntimeError() << "MPI_Comm_split failed";
01046     }
01047     env_data.m_worldComm                           = world_comm;
01048     env_data.m_execMap[EXEC_TYPE_PEER].m_groupComm = peer_comm;
01049     env_data.m_execMap[EXEC_TYPE_PEER].m_master    = my_peer_group; // Overloading meaning to peer group.
01050   }
01051 }
01052 
01053 bool
01054 is_comm_valid()
01055 {
01056   EnvData &env_data = EnvData::instance();
01057   if (env_data.m_parallelComm == MPI_COMM_NULL) {
01058     return false;
01059   } else {
01060     return true;
01061   }
01062 }
01063 
01064 void
01065 reset(
01066   MPI_Comm    new_comm)
01067 {
01068   EnvData &env_data = EnvData::instance();
01069 
01070   // Destroy old comm
01071   if (env_data.m_parallelComm != MPI_COMM_NULL) {
01072 
01073     if (new_comm != MPI_COMM_NULL) {
01074       mpih::Sub_Communicator(env_data.m_parallelComm, new_comm);
01075     }
01076 
01077     env_data.m_parallelComm = MPI_COMM_NULL ;
01078     env_data.m_parallelSize = -1;
01079     env_data.m_parallelRank = -1 ;
01080   }
01081 
01082   setMpiCommunicator(new_comm);
01083 }
01084 
01085 void setMpiCommunicator(MPI_Comm communicator)
01086 {
01087     EnvData &env_data = EnvData::instance();
01088     if(communicator != MPI_COMM_NULL)
01089     {
01090         env_data.m_parallelComm = communicator;
01091 
01092         if(MPI_Comm_size(env_data.m_parallelComm, &env_data.m_parallelSize) != MPI_SUCCESS
01093            || MPI_Comm_rank(env_data.m_parallelComm, &env_data.m_parallelRank) != MPI_SUCCESS
01094            || env_data.m_parallelSize == -1
01095            || env_data.m_parallelRank == -1)
01096         {
01097             throw RuntimeError() << "reset given bad MPI communicator";
01098         }
01099     }
01100 }
01101 
01102 void
01103 output_flush()
01104 {
01105   EnvData &env_data = EnvData::instance();
01106 
01107   stk::report_deferred_messages(Env::parallel_comm());
01108   
01109   stk::all_write_string(Env::parallel_comm(), *env_data.m_outputP0, env_data.m_output.str());
01110   env_data.m_output.str("");
01111 }
01112 
01113 
01114 void
01115 request_shutdown(bool shutdown)
01116 {
01117   EnvData::instance().m_shutdownRequested = shutdown;
01118 }
01119 
01120 
01121 bool
01122 is_shutdown_requested()
01123 {
01124   int shutdown_requested_in = EnvData::instance().m_shutdownRequested || Env::HUP_received();
01125   int shutdown_requested;
01126 
01127   MPI_Allreduce(&shutdown_requested_in, &shutdown_requested, 1, MPI_INT, MPI_SUM, Env::parallel_comm());
01128 
01129   return shutdown_requested != 0;
01130 }
01131 
01132 
01133 void abort() {
01134   EnvData &env_data = EnvData::instance();
01135 
01136   // Cannot be sure of parallel synchronization status; therefore, no communications can
01137   // occur.  Grab and dump all pending output buffers to 'std::cerr'.
01138   std::cerr << std::endl
01139             << "*** SIERRA ABORT on P" << EnvData::instance().m_parallelRank << " ***"
01140             << std::endl
01141             << "*** check " << get_param("output-log")
01142             << " file for more information ***"
01143             << std::endl ;
01144 
01145   if (!env_data.m_output.str().empty()) {
01146     std::cerr << "Buffer contents of deferred output stream on processor " << parallel_rank()
01147               << std::endl ;
01148     std::cerr << env_data.m_output.str();
01149   }
01150   
01151   std::cerr.flush();
01152   std::cout.flush();
01153 
01154   ::sleep(1);         // Give the other processors a chance at
01155             // catching up, seems to help hanging problems.
01156   MPI_Abort(env_data.m_parallelComm, MPI_ERR_OTHER);  // First try to die
01157   std::exit( EXIT_FAILURE );                    // Second try to die
01158 }
01159 
01160 
01161 const std::string &
01162 get_param(
01163   const char * const  option)
01164 {
01165   if (EnvData::instance().m_vm.count(option)) {
01166     if (EnvData::instance().m_vm[option].as<std::string>().empty())
01167       return EnvData::instance().m_onString;
01168     else
01169       return EnvData::instance().m_vm[option].as<std::string>();
01170   }
01171   else
01172     return EnvData::instance().m_emptyString;
01173 }
01174 
01175 
01176 void
01177 set_param(
01178   const char *          option,
01179   const std::string &   value) {
01180 
01181 
01182   namespace opt = boost::program_options;
01183 
01184   opt::variables_map &vm = stk::get_variables_map();
01185   opt::options_description &od = stk::get_options_description();
01186 
01187   int argc = 1;
01188   char *s = std::strcpy(new char[std::strlen(option) + 1], option);
01189   
01190   opt::store(opt::parse_command_line(argc, &s, od), vm);
01191   opt::notify(vm);
01192 
01193   delete [] s;
01194 }
01195 
01196 } // namespace Env
01197 } // namespace sierra
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines