00001 // @HEADER 00002 // @HEADER 00003 00004 #ifndef Playa_ERRORPOLLING_H 00005 #define Playa_ERRORPOLLING_H 00006 00007 #include "Teuchos_ConfigDefs.hpp" 00008 #include "Teuchos_Assert.hpp" 00009 00010 /*! \defgroup ErrorPolling_grp Utility code for synchronizing std::exception detection across processors. 00011 */ 00012 //@{ 00013 00014 namespace Playa 00015 { 00016 class MPIComm; 00017 00018 /** \brief ErrorPolling provides utilities for establishing agreement 00019 * between processors on whether an std::exception has been detected on any one 00020 * processor. 00021 * 00022 * The two functions must be used in a coordinated way. The simplest use 00023 * case is to embed a call to reportFailure() whenever an std::exception is 00024 * detected at the top-level try/catch block, and then to do a call to 00025 * pollForFailures() whenever it is desired to check for off-processor 00026 * errors before proceeding. The macro 00027 00028 \code 00029 TEUCHOS_TEST_FOR_FAILURE(comm); 00030 \endcode 00031 00032 * calls pollForFailures() and throws an std::exception if the return value is 00033 * true. 00034 * 00035 * Polling is a collective operation (an MPI_Reduce) and so incurs some 00036 * performance overhead. It can be disabled with a call to 00037 * \code 00038 * Teuchos::ErrorPolling::disable(); 00039 * \endcode 00040 * IMPORTANT: all processors must agree on whether collective error checking 00041 * is enabled or disabled. If there are inconsistent states, the reduction 00042 * operations in pollForFailures() will hang because some processors cannot be 00043 * contacted. 00044 */ 00045 class TEUCHOS_LIB_DLL_EXPORT ErrorPolling 00046 { 00047 public: 00048 /** Call this function upon catching an std::exception in order to 00049 * inform other processors of the error. This function will do an 00050 * AllReduce in conjunction with calls to either this function or 00051 * its partner, pollForFailures(), on the other processors. This 00052 * procedure has the effect of communicating to the other 00053 * processors that an std::exception has been detected on this one. */ 00054 static void reportFailure(const MPIComm& comm); 00055 00056 /** Call this function after std::exception-free completion of a 00057 * try/catch block. This function will do an AllReduce in 00058 * conjunction with calls to either this function or its partner, 00059 * reportFailure(), on the other processors. If a failure has been 00060 * reported by another processor, the call to pollForFailures() 00061 * will return true and an std::exception can be thrown. */ 00062 static bool pollForFailures(const MPIComm& comm); 00063 00064 /** Activate error polling */ 00065 static void enable() {isActive()=true;} 00066 00067 /** Disable error polling */ 00068 static void disable() {isActive()=false;} 00069 00070 private: 00071 /** Set or check whether error polling is active */ 00072 static bool& isActive() {static bool rtn = true; return rtn;} 00073 }; 00074 00075 /** 00076 * This macro polls all processors in the given communicator to find 00077 * out whether an error has been reported by a call to 00078 * ErrorPolling::reportFailure(comm). 00079 * 00080 * @param comm [in] The communicator on which polling will be done 00081 */ 00082 #define TEUCHOS_POLL_FOR_FAILURES(comm) \ 00083 TEUCHOS_TEST_FOR_EXCEPTION(Playa::ErrorPolling::pollForFailures(comm), \ 00084 std::runtime_error, \ 00085 "off-processor error detected by proc=" << (comm).getRank()); 00086 } 00087 00088 //@} 00089 00090 #endif