ENH: improve robustness of MPI start/stop

- warn or fatal if Pstream::init or Pstream::exit are called multiple
  times.

- additional Pstream::initNull method as failsafe to initialize MPI
  when the underlying OpenFOAM process is not running in parallel but
  the application still needs MPI.

- Pstream::exit() can now also be called without having used MPI::init(),
  which means it can be used to cleanup serial process or for
  applications that used the special purpose Pstream::initNull()
  mechanism.
This commit is contained in:
Mark Olesen 2017-11-17 11:29:26 +01:00
parent 00b1ecad60
commit 9c3bef5a99
5 changed files with 136 additions and 27 deletions

View File

@ -69,6 +69,7 @@ public:
nonBlocking
};
//- Names of the communication types
static const Enum<commsTypes> commsTypeNames;
// Public classes
@ -87,8 +88,8 @@ public:
//- procIDs of all processors below (so not just directly below)
labelList allBelow_;
//- procIDs of all processors not below. (inverse set of
// allBelow_ and minus myProcNo)
//- procIDs of all processors not below.
// (inverse set of allBelow_ and minus myProcNo)
labelList allNotBelow_;
@ -102,10 +103,10 @@ public:
//- Construct from components
commsStruct
(
const label,
const labelList&,
const labelList&,
const labelList&
const label above,
const labelList& below,
const labelList& allBelow,
const labelList& allNotBelow
);
//- Construct from components; construct allNotBelow_
@ -113,9 +114,9 @@ public:
(
const label nProcs,
const label myProcID,
const label,
const labelList&,
const labelList&
const label above,
const labelList& below,
const labelList& allBelow
);
@ -255,12 +256,12 @@ public:
// Static data
//- Should compact transfer be used in which floats replace doubles
// reducing the bandwidth requirement at the expense of some loss
// in accuracy
//- reducing the bandwidth requirement at the expense of some loss
//- in accuracy
static bool floatTransfer;
//- Number of processors at which the sum algorithm changes from linear
// to tree
//- to tree
static int nProcsSimpleSum;
//- Default commsType
@ -344,15 +345,15 @@ public:
};
//- Return physical processor number (i.e. processor number in
// worldComm) given communicator and procssor
//- worldComm) given communicator and procssor
static int baseProcNo(const label myComm, const int procID);
//- Return processor number in communicator (given physical processor
// number) (= reverse of baseProcNo)
//- number) (= reverse of baseProcNo)
static label procNo(const label comm, const int baseProcID);
//- Return processor number in communicator (given processor number
// and communicator)
//- and communicator)
static label procNo
(
const label myComm,
@ -361,13 +362,23 @@ public:
);
//- Add the valid option this type of communications library
// adds/requires on the command line
//- adds/requires on the command line
static void addValidParOptions(HashTable<string>& validParOptions);
//- Initialisation function called from main
// Spawns slave processes and initialises inter-communication
// Spawns slave processes and initialises inter-communication.
// \note warns if MPI has already been initialized.
// Fatal if MPI has already been finalized.
static bool init(int& argc, char**& argv);
//- Special purpose initialisation function.
// Performs a basic MPI_Init without any other setup.
// Only used for applications that need MPI communication when
// OpenFOAM is running in a non-parallel mode.
// \note Behaves as a no-op if MPI has already been initialized.
// Fatal if MPI has already been finalized.
static bool initNull();
// Non-blocking comms
//- Get number of outstanding requests
@ -401,7 +412,7 @@ public:
}
//- Set data for parallel running. Special case nProcs=0 to switch off
// parallel running
//- parallel running
static void setParRun(const label nProcs);
//- Number of processes in parallel run

View File

@ -122,8 +122,8 @@ class argList
//- Track enabled/disabled checking of processor directories state
static bool checkProcessorDirectories_;
//- Switch on/off parallel mode. Has to be first to be constructed
// so destructor is done last.
//- Switch on/off parallel mode.
// Must be first to be constructed so destructor is done last.
ParRunControl parRunControl_;
//- The arguments after removing known options

View File

@ -27,6 +27,9 @@ Class
Description
Helper class for initializing parallel jobs from the command arguments.
This class also handles cleanup of parallel or serial jobs in a
uniform manner.
\*---------------------------------------------------------------------------*/
#ifndef parRun_H
@ -60,10 +63,13 @@ public:
if (RunPar)
{
Info<< "Finalising parallel run" << endl;
Pstream::exit(0);
}
// Handles serial and parallel modes.
Pstream::exit(0);
}
//- Initialize Pstream for a parallel run
void runPar(int& argc, char**& argv)
{
RunPar = true;
@ -75,6 +81,8 @@ public:
}
}
//- Is this a parallel run?
bool parRun() const
{
return RunPar;

View File

@ -25,18 +25,28 @@ License
#include "Pstream.H"
#include "PstreamReduceOps.H"
#include "OSspecific.H"
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
void Foam::UPstream::addValidParOptions(HashTable<string>& validParOptions)
{}
bool Foam::UPstream::initNull()
{
WarningInFunction
<< "The dummy Pstream library cannot be used in parallel mode"
<< endl;
return false;
}
bool Foam::UPstream::init(int& argc, char**& argv)
{
FatalErrorInFunction
<< "Trying to use the dummy Pstream library." << nl
<< "This dummy library cannot be used in parallel mode"
<< "The dummy Pstream library cannot be used in parallel mode"
<< endl
<< Foam::exit(FatalError);
return false;
@ -45,13 +55,15 @@ bool Foam::UPstream::init(int& argc, char**& argv)
void Foam::UPstream::exit(int errnum)
{
NotImplemented;
// No MPI - just exit
::exit(errnum);
}
void Foam::UPstream::abort()
{
NotImplemented;
// No MPI - just abort
::abort();
}

View File

@ -66,8 +66,67 @@ void Foam::UPstream::addValidParOptions(HashTable<string>& validParOptions)
}
bool Foam::UPstream::initNull()
{
int flag = 0;
MPI_Finalized(&flag);
if (flag)
{
// Already finalized - this is an error
FatalErrorInFunction
<< "MPI was already finalized - cannot perform MPI_Init" << endl
<< Foam::abort(FatalError);
return false;
}
MPI_Initialized(&flag);
if (flag)
{
// Already initialized - nothing to do
return true;
}
MPI_Init_thread
(
nullptr, // argc
nullptr, // argv
MPI_THREAD_SINGLE,
&flag // provided_thread_support
);
return true;
}
bool Foam::UPstream::init(int& argc, char**& argv)
{
int flag = 0;
MPI_Finalized(&flag);
if (flag)
{
// Already finalized - this is an error
FatalErrorInFunction
<< "MPI was already finalized - cannot perform MPI_Init" << endl
<< Foam::abort(FatalError);
return false;
}
MPI_Initialized(&flag);
if (flag)
{
// Already initialized - issue warning and skip the rest
WarningInFunction
<< "MPI was already initialized - cannot perform MPI_Init" << nl
<< "This could indicate an application programming error!" << endl;
return true;
}
//MPI_Init(&argc, &argv);
int provided_thread_support;
MPI_Init_thread
@ -92,8 +151,7 @@ bool Foam::UPstream::init(int& argc, char**& argv)
if (numprocs <= 1)
{
FatalErrorInFunction
<< "bool IPstream::init(int& argc, char**& argv) : "
"attempt to run parallel on 1 processor"
<< "attempt to run parallel on 1 processor"
<< Foam::abort(FatalError);
}
@ -148,6 +206,26 @@ void Foam::UPstream::exit(int errnum)
Pout<< "UPstream::exit." << endl;
}
int flag = 0;
MPI_Initialized(&flag);
if (!flag)
{
// Not initialized - just exit
::exit(errnum);
return;
}
MPI_Finalized(&flag);
if (flag)
{
// Already finalized
FatalErrorInFunction
<< "MPI was already finalized" << endl
<< Foam::abort(FatalError);
return;
}
#ifndef SGIMPI
{
int size;