ENH: extend parProfiling (#2737)
- separate broadcast times from reduce/gather/scatter time - separate wait times from all-to-all time - support invocation counts, split off requests time/count from others to avoid flooding the counts - support 'detail' switch to increase the output information. Format may change in the future
This commit is contained in:
parent
475ed5cc32
commit
9577a0f6b5
23
etc/caseDicts/profiling/parallel.cfg
Normal file
23
etc/caseDicts/profiling/parallel.cfg
Normal file
@ -0,0 +1,23 @@
|
||||
/*--------------------------------*- C++ -*----------------------------------*\
|
||||
========= |
|
||||
\\ / F ield | OpenFOAM: The Open Source CFD Toolbox
|
||||
\\ / O peration | Version: v2306
|
||||
\\ / A nd | Website: www.openfoam.com
|
||||
\\/ M anipulation |
|
||||
-------------------------------------------------------------------------------
|
||||
Description
|
||||
Configuration for profiling parallel (MPI) timings
|
||||
|
||||
\*---------------------------------------------------------------------------*/
|
||||
|
||||
type parProfiling;
|
||||
libs (utilityFunctionObjects);
|
||||
|
||||
// Level of detail to report
|
||||
detail 0;
|
||||
|
||||
// Report stats on exit only (instead of every time step)
|
||||
executeControl onEnd;
|
||||
writeControl none;
|
||||
|
||||
// ************************************************************************* //
|
@ -5,7 +5,7 @@
|
||||
\\ / A nd | www.openfoam.com
|
||||
\\/ M anipulation |
|
||||
-------------------------------------------------------------------------------
|
||||
Copyright (C) 2019-2020 OpenCFD Ltd.
|
||||
Copyright (C) 2019-2023 OpenCFD Ltd.
|
||||
-------------------------------------------------------------------------------
|
||||
License
|
||||
This file is part of OpenFOAM.
|
||||
@ -31,10 +31,11 @@ License
|
||||
|
||||
std::unique_ptr<Foam::cpuTime> Foam::profilingPstream::timer_(nullptr);
|
||||
|
||||
Foam::profilingPstream::timingList Foam::profilingPstream::times_(Zero);
|
||||
|
||||
bool Foam::profilingPstream::suspend_(false);
|
||||
|
||||
Foam::profilingPstream::timingList Foam::profilingPstream::times_(double(0));
|
||||
Foam::profilingPstream::countList Foam::profilingPstream::counts_(uint64_t(0));
|
||||
|
||||
|
||||
// * * * * * * * * * * * * * * * * Constructors * * * * * * * * * * * * * * //
|
||||
|
||||
@ -52,7 +53,7 @@ Foam::profilingPstream::~profilingPstream()
|
||||
}
|
||||
|
||||
|
||||
// * * * * * * * * * * * * * * * Member Functions * * * * * * * * * * * * * //
|
||||
// * * * * * * * * * * * * * Static Member Functions * * * * * * * * * * * * //
|
||||
|
||||
void Foam::profilingPstream::enable()
|
||||
{
|
||||
@ -63,7 +64,8 @@ void Foam::profilingPstream::enable()
|
||||
else
|
||||
{
|
||||
timer_.reset(new cpuTime);
|
||||
times_ = Zero;
|
||||
times_ = double(0);
|
||||
counts_ = uint64_t(0);
|
||||
}
|
||||
|
||||
suspend_ = false;
|
||||
@ -77,4 +79,16 @@ void Foam::profilingPstream::disable() noexcept
|
||||
}
|
||||
|
||||
|
||||
double Foam::profilingPstream::elapsedTime()
|
||||
{
|
||||
double total = 0;
|
||||
for (const double val : times_)
|
||||
{
|
||||
total += val;
|
||||
}
|
||||
|
||||
return total;
|
||||
}
|
||||
|
||||
|
||||
// ************************************************************************* //
|
||||
|
@ -5,7 +5,7 @@
|
||||
\\ / A nd | www.openfoam.com
|
||||
\\/ M anipulation |
|
||||
-------------------------------------------------------------------------------
|
||||
Copyright (C) 2019-2022 OpenCFD Ltd.
|
||||
Copyright (C) 2019-2023 OpenCFD Ltd.
|
||||
-------------------------------------------------------------------------------
|
||||
License
|
||||
This file is part of OpenFOAM.
|
||||
@ -57,20 +57,26 @@ public:
|
||||
|
||||
// Public Types
|
||||
|
||||
//- Enumeration within times array
|
||||
enum timingType
|
||||
//- The enumerated timing categories (for times and counts arrays)
|
||||
enum timingType : unsigned
|
||||
{
|
||||
GATHER = 0,
|
||||
SCATTER,
|
||||
BROADCAST,
|
||||
BROADCAST = 0,
|
||||
REDUCE,
|
||||
PROBE,
|
||||
REQUEST,
|
||||
WAIT,
|
||||
GATHER,
|
||||
SCATTER,
|
||||
ALL_TO_ALL,
|
||||
OTHER
|
||||
OTHER,
|
||||
nCategories // Dimensioning size
|
||||
};
|
||||
|
||||
//- The timing values
|
||||
typedef FixedList<double, 7> timingList;
|
||||
//- Fixed-size container for timing values
|
||||
typedef FixedList<double, timingType::nCategories> timingList;
|
||||
|
||||
//- Fixed-size container for timing counts
|
||||
typedef FixedList<uint64_t, timingType::nCategories> countList;
|
||||
|
||||
|
||||
private:
|
||||
@ -80,12 +86,15 @@ private:
|
||||
//- The timer to use
|
||||
static std::unique_ptr<cpuTime> timer_;
|
||||
|
||||
//- The timing values
|
||||
static timingList times_;
|
||||
|
||||
//- Is timer in a suspend state?
|
||||
static bool suspend_;
|
||||
|
||||
//- The accumulated values for various timing categories
|
||||
static timingList times_;
|
||||
|
||||
//- The timing frequency for various timing categories
|
||||
static countList counts_;
|
||||
|
||||
|
||||
public:
|
||||
|
||||
@ -125,18 +134,27 @@ public:
|
||||
return !suspend_ && bool(timer_);
|
||||
}
|
||||
|
||||
//- Access to the timing information
|
||||
static timingList& times() noexcept
|
||||
{
|
||||
return times_;
|
||||
}
|
||||
//- The total of times
|
||||
static double elapsedTime();
|
||||
|
||||
//- Access to the timing information at given index
|
||||
//- Access to the timing information
|
||||
static timingList& times() noexcept { return times_; }
|
||||
|
||||
//- Access to the timing counts
|
||||
static countList& counts() noexcept { return counts_; }
|
||||
|
||||
//- Access to the timing information for given timing category
|
||||
static double times(const timingType idx)
|
||||
{
|
||||
return times_[idx];
|
||||
}
|
||||
|
||||
//- Access to the count for given timing category
|
||||
static uint64_t counts(const timingType idx)
|
||||
{
|
||||
return counts_[idx];
|
||||
}
|
||||
|
||||
//- Update timer prior to measurement
|
||||
static void beginTiming()
|
||||
{
|
||||
@ -152,21 +170,10 @@ public:
|
||||
if (active())
|
||||
{
|
||||
times_[idx] += timer_->cpuTimeIncrement();
|
||||
++counts_[idx];
|
||||
}
|
||||
}
|
||||
|
||||
//- Add time increment to \em gather time
|
||||
static void addGatherTime()
|
||||
{
|
||||
addTime(timingType::GATHER);
|
||||
}
|
||||
|
||||
//- Add time increment to \em scatter time
|
||||
static void addScatterTime()
|
||||
{
|
||||
addTime(timingType::SCATTER);
|
||||
}
|
||||
|
||||
//- Add time increment to \em broadcast time
|
||||
static void addBroadcastTime()
|
||||
{
|
||||
@ -179,12 +186,36 @@ public:
|
||||
addTime(timingType::REDUCE);
|
||||
}
|
||||
|
||||
//- Add time increment to \em probe time
|
||||
static void addProbeTime()
|
||||
{
|
||||
addTime(timingType::PROBE);
|
||||
}
|
||||
|
||||
//- Add time increment to \em request time
|
||||
static void addRequestTime()
|
||||
{
|
||||
addTime(timingType::REQUEST);
|
||||
}
|
||||
|
||||
//- Add time increment to \em wait time
|
||||
static void addWaitTime()
|
||||
{
|
||||
addTime(timingType::WAIT);
|
||||
}
|
||||
|
||||
//- Add time increment to \em gather time
|
||||
static void addGatherTime()
|
||||
{
|
||||
addTime(timingType::GATHER);
|
||||
}
|
||||
|
||||
//- Add time increment to \em scatter time
|
||||
static void addScatterTime()
|
||||
{
|
||||
addTime(timingType::SCATTER);
|
||||
}
|
||||
|
||||
//- Add time increment to \em allToAll time
|
||||
static void addAllToAllTime()
|
||||
{
|
||||
|
@ -61,8 +61,7 @@ void Foam::UIPstream::bufferIPCrecv()
|
||||
);
|
||||
MPI_Get_count(&status, MPI_BYTE, &messageSize_);
|
||||
|
||||
// Assume these are from gathers ...
|
||||
profilingPstream::addGatherTime();
|
||||
profilingPstream::addProbeTime();
|
||||
|
||||
recvBuf_.resize(messageSize_);
|
||||
|
||||
@ -206,7 +205,7 @@ Foam::label Foam::UIPstream::read
|
||||
return 0;
|
||||
}
|
||||
|
||||
profilingPstream::addWaitTime();
|
||||
profilingPstream::addRequestTime();
|
||||
|
||||
if (debug)
|
||||
{
|
||||
|
@ -179,7 +179,7 @@ bool Foam::UOPstream::write
|
||||
);
|
||||
}
|
||||
|
||||
profilingPstream::addWaitTime();
|
||||
profilingPstream::addRequestTime();
|
||||
|
||||
if (debug)
|
||||
{
|
||||
|
@ -822,6 +822,8 @@ Foam::UPstream::probeMessage
|
||||
if (UPstream::commsTypes::blocking == commsType)
|
||||
{
|
||||
// Blocking
|
||||
profilingPstream::beginTiming();
|
||||
|
||||
if
|
||||
(
|
||||
MPI_Probe
|
||||
@ -837,11 +839,15 @@ Foam::UPstream::probeMessage
|
||||
<< "MPI_Probe returned with error"
|
||||
<< Foam::abort(FatalError);
|
||||
}
|
||||
|
||||
profilingPstream::addProbeTime();
|
||||
flag = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Non-blocking
|
||||
profilingPstream::beginTiming();
|
||||
|
||||
if
|
||||
(
|
||||
MPI_Iprobe
|
||||
@ -858,6 +864,8 @@ Foam::UPstream::probeMessage
|
||||
<< "MPI_Iprobe returned with error"
|
||||
<< Foam::abort(FatalError);
|
||||
}
|
||||
|
||||
profilingPstream::addRequestTime();
|
||||
}
|
||||
|
||||
if (flag)
|
||||
|
@ -158,14 +158,14 @@ void Foam::PstreamDetail::allReduce
|
||||
error::printStack(Pout);
|
||||
}
|
||||
|
||||
profilingPstream::beginTiming();
|
||||
|
||||
bool handled(false);
|
||||
|
||||
#if defined(MPI_VERSION) && (MPI_VERSION >= 3)
|
||||
// MPI-3 : eg, openmpi-1.7 (2013) and later
|
||||
if (immediate)
|
||||
{
|
||||
profilingPstream::beginTiming();
|
||||
|
||||
handled = true;
|
||||
MPI_Request request;
|
||||
|
||||
@ -198,11 +198,15 @@ void Foam::PstreamDetail::allReduce
|
||||
{
|
||||
*requestID = PstreamGlobals::push_request(request);
|
||||
}
|
||||
|
||||
profilingPstream::addRequestTime();
|
||||
}
|
||||
#endif
|
||||
|
||||
if (!handled)
|
||||
{
|
||||
profilingPstream::beginTiming();
|
||||
|
||||
if (req) req->reset();
|
||||
if (requestID) *requestID = -1;
|
||||
|
||||
@ -224,9 +228,9 @@ void Foam::PstreamDetail::allReduce
|
||||
<< UList<Type>(values, count)
|
||||
<< Foam::abort(FatalError);
|
||||
}
|
||||
}
|
||||
|
||||
profilingPstream::addReduceTime();
|
||||
profilingPstream::addReduceTime();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -283,14 +287,14 @@ void Foam::PstreamDetail::allToAll
|
||||
return;
|
||||
}
|
||||
|
||||
profilingPstream::beginTiming();
|
||||
|
||||
bool handled(false);
|
||||
|
||||
#if defined(MPI_VERSION) && (MPI_VERSION >= 3)
|
||||
// MPI-3 : eg, openmpi-1.7 (2013) and later
|
||||
if (immediate)
|
||||
{
|
||||
profilingPstream::beginTiming();
|
||||
|
||||
handled = true;
|
||||
MPI_Request request;
|
||||
|
||||
@ -326,11 +330,15 @@ void Foam::PstreamDetail::allToAll
|
||||
{
|
||||
*requestID = PstreamGlobals::push_request(request);
|
||||
}
|
||||
|
||||
profilingPstream::addRequestTime();
|
||||
}
|
||||
#endif
|
||||
|
||||
if (!handled)
|
||||
{
|
||||
profilingPstream::beginTiming();
|
||||
|
||||
if (req) req->reset();
|
||||
if (requestID) *requestID = -1;
|
||||
|
||||
@ -355,9 +363,9 @@ void Foam::PstreamDetail::allToAll
|
||||
<< " For " << sendData
|
||||
<< Foam::abort(FatalError);
|
||||
}
|
||||
}
|
||||
|
||||
profilingPstream::addAllToAllTime();
|
||||
profilingPstream::addAllToAllTime();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -438,14 +446,14 @@ void Foam::PstreamDetail::allToAllv
|
||||
return;
|
||||
}
|
||||
|
||||
profilingPstream::beginTiming();
|
||||
|
||||
bool handled(false);
|
||||
|
||||
#if defined(MPI_VERSION) && (MPI_VERSION >= 3)
|
||||
// MPI-3 : eg, openmpi-1.7 (2013) and later
|
||||
if (immediate)
|
||||
{
|
||||
profilingPstream::beginTiming();
|
||||
|
||||
handled = true;
|
||||
MPI_Request request;
|
||||
|
||||
@ -482,11 +490,15 @@ void Foam::PstreamDetail::allToAllv
|
||||
{
|
||||
*requestID = PstreamGlobals::push_request(request);
|
||||
}
|
||||
|
||||
profilingPstream::addRequestTime();
|
||||
}
|
||||
#endif
|
||||
|
||||
if (!handled)
|
||||
{
|
||||
profilingPstream::beginTiming();
|
||||
|
||||
if (req) req->reset();
|
||||
if (requestID) *requestID = -1;
|
||||
|
||||
@ -512,9 +524,10 @@ void Foam::PstreamDetail::allToAllv
|
||||
<< " recvCounts " << recvCounts
|
||||
<< Foam::abort(FatalError);
|
||||
}
|
||||
|
||||
profilingPstream::addAllToAllTime();
|
||||
}
|
||||
|
||||
profilingPstream::addAllToAllTime();
|
||||
}
|
||||
|
||||
|
||||
@ -929,14 +942,14 @@ void Foam::PstreamDetail::gather
|
||||
error::printStack(Pout);
|
||||
}
|
||||
|
||||
profilingPstream::beginTiming();
|
||||
|
||||
bool handled(false);
|
||||
|
||||
#if defined(MPI_VERSION) && (MPI_VERSION >= 3)
|
||||
// MPI-3 : eg, openmpi-1.7 (2013) and later
|
||||
if (immediate)
|
||||
{
|
||||
profilingPstream::beginTiming();
|
||||
|
||||
handled = true;
|
||||
MPI_Request request;
|
||||
|
||||
@ -972,11 +985,15 @@ void Foam::PstreamDetail::gather
|
||||
{
|
||||
*requestID = PstreamGlobals::push_request(request);
|
||||
}
|
||||
|
||||
profilingPstream::addRequestTime();
|
||||
}
|
||||
#endif
|
||||
|
||||
if (!handled)
|
||||
{
|
||||
profilingPstream::beginTiming();
|
||||
|
||||
if (req) req->reset();
|
||||
if (requestID) *requestID = -1;
|
||||
|
||||
@ -1001,9 +1018,9 @@ void Foam::PstreamDetail::gather
|
||||
<< " recvCount " << recvCount
|
||||
<< Foam::abort(FatalError);
|
||||
}
|
||||
}
|
||||
|
||||
profilingPstream::addGatherTime();
|
||||
profilingPstream::addGatherTime();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -1055,14 +1072,14 @@ void Foam::PstreamDetail::scatter
|
||||
error::printStack(Pout);
|
||||
}
|
||||
|
||||
profilingPstream::beginTiming();
|
||||
|
||||
bool handled(false);
|
||||
|
||||
#if defined(MPI_VERSION) && (MPI_VERSION >= 3)
|
||||
// MPI-3 : eg, openmpi-1.7 (2013) and later
|
||||
if (immediate)
|
||||
{
|
||||
profilingPstream::beginTiming();
|
||||
|
||||
handled = true;
|
||||
MPI_Request request;
|
||||
|
||||
@ -1098,11 +1115,15 @@ void Foam::PstreamDetail::scatter
|
||||
{
|
||||
*requestID = PstreamGlobals::push_request(request);
|
||||
}
|
||||
|
||||
profilingPstream::addRequestTime();
|
||||
}
|
||||
#endif
|
||||
|
||||
if (!handled)
|
||||
{
|
||||
profilingPstream::beginTiming();
|
||||
|
||||
if (req) req->reset();
|
||||
if (requestID) *requestID = -1;
|
||||
|
||||
@ -1127,9 +1148,9 @@ void Foam::PstreamDetail::scatter
|
||||
<< " recvCount " << recvCount
|
||||
<< Foam::abort(FatalError);
|
||||
}
|
||||
}
|
||||
|
||||
profilingPstream::addScatterTime();
|
||||
profilingPstream::addScatterTime();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -1200,8 +1221,6 @@ void Foam::PstreamDetail::gatherv
|
||||
<< Foam::abort(FatalError);
|
||||
}
|
||||
|
||||
profilingPstream::beginTiming();
|
||||
|
||||
// Ensure send/recv consistency on master
|
||||
if (UPstream::master(comm) && !recvCounts[0])
|
||||
{
|
||||
@ -1214,6 +1233,8 @@ void Foam::PstreamDetail::gatherv
|
||||
// MPI-3 : eg, openmpi-1.7 (2013) and later
|
||||
if (immediate)
|
||||
{
|
||||
profilingPstream::beginTiming();
|
||||
|
||||
handled = true;
|
||||
MPI_Request request;
|
||||
|
||||
@ -1250,11 +1271,15 @@ void Foam::PstreamDetail::gatherv
|
||||
{
|
||||
*requestID = PstreamGlobals::push_request(request);
|
||||
}
|
||||
|
||||
profilingPstream::addRequestTime();
|
||||
}
|
||||
#endif
|
||||
|
||||
if (!handled)
|
||||
{
|
||||
profilingPstream::beginTiming();
|
||||
|
||||
if (req) req->reset();
|
||||
if (requestID) *requestID = -1;
|
||||
|
||||
@ -1280,9 +1305,9 @@ void Foam::PstreamDetail::gatherv
|
||||
<< " recvCounts " << recvCounts
|
||||
<< Foam::abort(FatalError);
|
||||
}
|
||||
}
|
||||
|
||||
profilingPstream::addGatherTime();
|
||||
profilingPstream::addGatherTime();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -1352,14 +1377,14 @@ void Foam::PstreamDetail::scatterv
|
||||
<< Foam::abort(FatalError);
|
||||
}
|
||||
|
||||
profilingPstream::beginTiming();
|
||||
|
||||
bool handled(false);
|
||||
|
||||
#if defined(MPI_VERSION) && (MPI_VERSION >= 3)
|
||||
// MPI-3 : eg, openmpi-1.7 (2013) and later
|
||||
if (immediate)
|
||||
{
|
||||
profilingPstream::beginTiming();
|
||||
|
||||
handled = true;
|
||||
MPI_Request request;
|
||||
|
||||
@ -1396,11 +1421,15 @@ void Foam::PstreamDetail::scatterv
|
||||
{
|
||||
*requestID = PstreamGlobals::push_request(request);
|
||||
}
|
||||
|
||||
profilingPstream::addRequestTime();
|
||||
}
|
||||
#endif
|
||||
|
||||
if (!handled)
|
||||
{
|
||||
profilingPstream::beginTiming();
|
||||
|
||||
if (req) req->reset();
|
||||
if (requestID) *requestID = -1;
|
||||
|
||||
@ -1426,9 +1455,9 @@ void Foam::PstreamDetail::scatterv
|
||||
<< " sendOffsets " << sendOffsets
|
||||
<< Foam::abort(FatalError);
|
||||
}
|
||||
}
|
||||
|
||||
profilingPstream::addScatterTime();
|
||||
profilingPstream::addScatterTime();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -5,7 +5,7 @@
|
||||
\\ / A nd | www.openfoam.com
|
||||
\\/ M anipulation |
|
||||
-------------------------------------------------------------------------------
|
||||
Copyright (C) 2019-2022 OpenCFD Ltd.
|
||||
Copyright (C) 2019-2023 OpenCFD Ltd.
|
||||
-------------------------------------------------------------------------------
|
||||
License
|
||||
This file is part of OpenFOAM.
|
||||
@ -61,8 +61,10 @@ Foam::functionObjects::parProfiling::parProfiling
|
||||
const dictionary& dict
|
||||
)
|
||||
:
|
||||
functionObject(name)
|
||||
functionObject(name),
|
||||
detailLevel_(0)
|
||||
{
|
||||
dict.readIfPresent("detail", detailLevel_);
|
||||
profilingPstream::enable();
|
||||
}
|
||||
|
||||
@ -75,121 +77,365 @@ Foam::functionObjects::parProfiling::~parProfiling()
|
||||
}
|
||||
|
||||
|
||||
// * * * * * * * * * * * * * * * Member Functions * * * * * * * * * * * * * //
|
||||
// * * * * * * * * * * * * * * * Local Functions * * * * * * * * * * * * * * //
|
||||
|
||||
void Foam::functionObjects::parProfiling::report()
|
||||
namespace Foam
|
||||
{
|
||||
if (!profilingPstream::active())
|
||||
|
||||
// Loop over all values (with striding) and extract the value at given index
|
||||
template<class Type>
|
||||
inline static void extractValues
|
||||
(
|
||||
UList<Type>& result,
|
||||
const int index,
|
||||
const UList<Type>& allValues
|
||||
)
|
||||
{
|
||||
if (result.empty())
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// (Time, Processor) for each of: min/max/sum
|
||||
typedef FixedList<Tuple2<double, int>, 3> statData;
|
||||
typedef FixedList<statData, 3> statDataTimes;
|
||||
const label numProc = result.size();
|
||||
const Type* values = allValues.cbegin();
|
||||
const label stride = allValues.size() / numProc;
|
||||
|
||||
// Reduction: if x and y are unequal assign value.
|
||||
auto statsEqOp = [](statDataTimes& xStats, const statDataTimes& yStats)
|
||||
if (!values || !stride)
|
||||
{
|
||||
forAll(xStats, i)
|
||||
result = Type(0);
|
||||
return;
|
||||
}
|
||||
|
||||
for (label proci = 0; proci < numProc; ++proci, values += stride)
|
||||
{
|
||||
result[proci] = values[index];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Loop over all values (with striding) and extract combined value
|
||||
// using the given unary function
|
||||
template<class Type, class Extract>
|
||||
inline static void extractValues
|
||||
(
|
||||
UList<Type>& result,
|
||||
const UList<Type>& allValues,
|
||||
const Extract& extract
|
||||
)
|
||||
{
|
||||
if (result.empty())
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
const label numProc = result.size();
|
||||
const Type* values = allValues.cbegin();
|
||||
const label stride = allValues.size() / numProc;
|
||||
|
||||
if (!values || !stride)
|
||||
{
|
||||
result = Type(0);
|
||||
return;
|
||||
}
|
||||
|
||||
for (label proci = 0; proci < numProc; ++proci, values += stride)
|
||||
{
|
||||
result[proci] = extract(values);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
inline static void printTimingDetail(const UList<double>& values)
|
||||
{
|
||||
const label numProc = values.size();
|
||||
|
||||
if (numProc)
|
||||
{
|
||||
Info<< indent << " times " << numProc << '(';
|
||||
|
||||
for (label proci = 0; proci < numProc; ++proci)
|
||||
{
|
||||
statData& x = xStats[i];
|
||||
const statData& y = yStats[i];
|
||||
|
||||
// 0: min, 1: max, 2: total (or avg)
|
||||
if (x[0].first() > y[0].first())
|
||||
{
|
||||
x[0] = y[0];
|
||||
}
|
||||
if (x[1].first() < y[1].first())
|
||||
{
|
||||
x[1] = y[1];
|
||||
}
|
||||
x[2].first() += y[2].first();
|
||||
if (proci) Info<< ' ';
|
||||
Info<< values[proci];
|
||||
}
|
||||
};
|
||||
|
||||
statDataTimes times;
|
||||
Info<< ')' << nl;
|
||||
}
|
||||
}
|
||||
|
||||
// Master time
|
||||
|
||||
inline static void printTimingDetail(const UList<uint64_t>& values)
|
||||
{
|
||||
const label numProc = values.size();
|
||||
|
||||
if (numProc)
|
||||
{
|
||||
const double total =
|
||||
(
|
||||
profilingPstream::times(profilingPstream::REDUCE)
|
||||
+ profilingPstream::times(profilingPstream::GATHER)
|
||||
+ profilingPstream::times(profilingPstream::SCATTER)
|
||||
// Include broadcast with reduce instead of all-to-all
|
||||
+ profilingPstream::times(profilingPstream::BROADCAST)
|
||||
);
|
||||
// Output via std::ostream to avoid conversion to Foam::label
|
||||
// that Ostream performs
|
||||
|
||||
times[0] = Tuple2<double, int>(total, Pstream::myProcNo());
|
||||
auto& os = Info.stdStream();
|
||||
|
||||
Info<< indent << " counts " << numProc << '(';
|
||||
|
||||
for (label proci = 0; proci < numProc; ++proci)
|
||||
{
|
||||
if (proci) os << ' ';
|
||||
os << values[proci];
|
||||
}
|
||||
|
||||
Info<< ')' << nl;
|
||||
}
|
||||
}
|
||||
|
||||
} // End namespace Foam
|
||||
|
||||
|
||||
// * * * * * * * * * * * * * * * Member Functions * * * * * * * * * * * * * //
|
||||
|
||||
void Foam::functionObjects::parProfiling::report()
|
||||
{
|
||||
const label numProc = (UPstream::parRun() ? UPstream::nProcs() : 1);
|
||||
|
||||
if (!profilingPstream::active() || numProc < 2)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// All time
|
||||
{
|
||||
const double total =
|
||||
(
|
||||
profilingPstream::times(profilingPstream::WAIT)
|
||||
+ profilingPstream::times(profilingPstream::ALL_TO_ALL)
|
||||
+ profilingPstream::times(profilingPstream::OTHER)
|
||||
);
|
||||
// Use mpiGather on all values and perform the combinations
|
||||
// and statistics locally. This reduces the overall number of MPI
|
||||
// calls. For detailed output we need this information anyhow.
|
||||
|
||||
times[1] = Tuple2<double, int>(total, Pstream::myProcNo());
|
||||
}
|
||||
// NB: profilingPstream uses a FixedList for timings(), counts()
|
||||
// so the sizes are guaranteed to be consistent and identical
|
||||
// everywhere.
|
||||
|
||||
// Other time
|
||||
{
|
||||
const double total =
|
||||
(
|
||||
profilingPstream::times(profilingPstream::OTHER)
|
||||
);
|
||||
|
||||
times[2] = Tuple2<double, int>(total, Pstream::myProcNo());
|
||||
}
|
||||
List<double> allTimes;
|
||||
List<uint64_t> allCounts;
|
||||
|
||||
// Avoid disturbing the counts
|
||||
profilingPstream::suspend();
|
||||
|
||||
Pstream::combineGather(times, statsEqOp);
|
||||
{
|
||||
// The timings
|
||||
const auto& procTimes = profilingPstream::times();
|
||||
|
||||
if (Pstream::master())
|
||||
{
|
||||
allTimes.resize(numProc * procTimes.size());
|
||||
}
|
||||
|
||||
UPstream::mpiGather
|
||||
(
|
||||
procTimes.cdata_bytes(), // Send
|
||||
procTimes.size_bytes(), // Num send per proc
|
||||
allTimes.data_bytes(), // Recv
|
||||
procTimes.size_bytes(), // Num recv per proc
|
||||
UPstream::commWorld()
|
||||
);
|
||||
}
|
||||
|
||||
if (detailLevel_ > 1)
|
||||
{
|
||||
// The counts
|
||||
const auto& procCounts = profilingPstream::counts();
|
||||
|
||||
if (Pstream::master())
|
||||
{
|
||||
allCounts.resize(numProc * procCounts.size());
|
||||
}
|
||||
|
||||
UPstream::mpiGather
|
||||
(
|
||||
procCounts.cdata_bytes(), // Send
|
||||
procCounts.size_bytes(), // Num send per proc
|
||||
allCounts.data_bytes(), // Recv
|
||||
procCounts.size_bytes(), // Num recv per proc
|
||||
UPstream::commWorld()
|
||||
);
|
||||
}
|
||||
|
||||
profilingPstream::resume();
|
||||
|
||||
|
||||
// (Time, Processor) for each of: min/max/sum(avg)
|
||||
typedef FixedList<Tuple2<double, int>, 3> statData;
|
||||
|
||||
// Extract min/max/average
|
||||
auto calcStats = [](const UList<double>& data) -> statData
|
||||
{
|
||||
statData stats;
|
||||
stats = Tuple2<double, int>((data.empty() ? 0 : data[0]), 0);
|
||||
|
||||
const label np = data.size();
|
||||
for (label proci = 1; proci < np; ++proci)
|
||||
{
|
||||
Tuple2<double, int> tup(data[proci], proci);
|
||||
|
||||
// 0: min, 1: max, 2: total(avg)
|
||||
if (stats[0].first() > tup.first()) stats[0] = tup;
|
||||
if (stats[1].first() < tup.first()) stats[1] = tup;
|
||||
stats[2].first() += tup.first();
|
||||
}
|
||||
|
||||
// From total -> average value
|
||||
if (np) { stats[2].first() /= np; }
|
||||
|
||||
return stats;
|
||||
};
|
||||
|
||||
|
||||
const auto printTimingStats =
|
||||
[&](Ostream& os, const char* tag, const statData& stats)
|
||||
{
|
||||
os << indent << tag << ": avg = " << stats[2].first()
|
||||
<< ", min = " << stats[0].first()
|
||||
<< " (proc " << stats[0].second() << ')'
|
||||
<< ", max = " << stats[1].first()
|
||||
<< " (proc " << stats[1].second() << ')'
|
||||
<< nl;
|
||||
};
|
||||
|
||||
|
||||
if (Pstream::master())
|
||||
{
|
||||
statData stats;
|
||||
List<double> extractedTimes(numProc);
|
||||
List<uint64_t> extractedCounts;
|
||||
|
||||
if (detailLevel_ > 1)
|
||||
{
|
||||
extractedCounts.resize(numProc);
|
||||
}
|
||||
|
||||
Info<< type() << ':' << nl
|
||||
<< incrIndent;
|
||||
|
||||
// Total times
|
||||
{
|
||||
const statData& stats = times[0];
|
||||
double avg = stats[2].first()/Pstream::nProcs();
|
||||
extractValues
|
||||
(
|
||||
extractedTimes,
|
||||
allTimes,
|
||||
[=](const double values[])
|
||||
{
|
||||
double total = 0;
|
||||
for (unsigned i = 0; i < profilingPstream::nCategories; ++i)
|
||||
{
|
||||
total += values[i];
|
||||
}
|
||||
return total;
|
||||
}
|
||||
);
|
||||
stats = calcStats(extractedTimes);
|
||||
|
||||
Info<< indent << "reduce : avg = " << avg << 's' << nl
|
||||
<< indent << " min = " << stats[0].first()
|
||||
<< "s (processor " << stats[0].second() << ')' << nl
|
||||
<< indent << " max = " << stats[1].first()
|
||||
<< "s (processor " << stats[1].second() << ')' << nl;
|
||||
printTimingStats(Info(), "total ", stats);
|
||||
if (detailLevel_ > 0) printTimingDetail(extractedTimes);
|
||||
}
|
||||
|
||||
// all-all
|
||||
{
|
||||
const statData& stats = times[1];
|
||||
double avg = stats[2].first()/Pstream::nProcs();
|
||||
const int index = int(profilingPstream::ALL_TO_ALL);
|
||||
|
||||
Info<< indent << "all-all : avg = " << avg << 's' << nl
|
||||
<< indent << " min = " << stats[0].first()
|
||||
<< "s (processor " << stats[0].second() << ')' << nl
|
||||
<< indent << " max = " << stats[1].first()
|
||||
<< "s (processor " << stats[1].second() << ')' << nl;
|
||||
extractValues(extractedTimes, index, allTimes);
|
||||
extractValues(extractedCounts, index, allCounts);
|
||||
stats = calcStats(extractedTimes);
|
||||
|
||||
printTimingStats(Info(), "all-all ", stats);
|
||||
if (detailLevel_ > 0) printTimingDetail(extractedTimes);
|
||||
if (detailLevel_ > 1) printTimingDetail(extractedCounts);
|
||||
}
|
||||
|
||||
// broadcast
|
||||
{
|
||||
const statData& stats = times[2];
|
||||
double avg = stats[2].first()/Pstream::nProcs();
|
||||
const int index = int(profilingPstream::BROADCAST);
|
||||
|
||||
Info<< indent << "other : avg = " << avg << 's' << nl
|
||||
<< indent << " min = " << stats[0].first()
|
||||
<< "s (processor " << stats[0].second() << ')' << nl
|
||||
<< indent << " max = " << stats[1].first()
|
||||
<< "s (processor " << stats[1].second() << ')' << nl;
|
||||
extractValues(extractedTimes, index, allTimes);
|
||||
extractValues(extractedCounts, index, allCounts);
|
||||
stats = calcStats(extractedTimes);
|
||||
|
||||
printTimingStats(Info(), "broadcast ", stats);
|
||||
if (detailLevel_ > 0) printTimingDetail(extractedTimes);
|
||||
if (detailLevel_ > 1) printTimingDetail(extractedCounts);
|
||||
}
|
||||
|
||||
// probe
|
||||
{
|
||||
const int index = int(profilingPstream::PROBE);
|
||||
|
||||
extractValues(extractedTimes, index, allTimes);
|
||||
extractValues(extractedCounts, index, allCounts);
|
||||
stats = calcStats(extractedTimes);
|
||||
|
||||
printTimingStats(Info(), "probe ", stats);
|
||||
if (detailLevel_ > 0) printTimingDetail(extractedTimes);
|
||||
if (detailLevel_ > 1) printTimingDetail(extractedCounts);
|
||||
}
|
||||
|
||||
// Reduce/scatter times
|
||||
{
|
||||
// const int index = int(profilingPstream::REDUCE);
|
||||
|
||||
extractValues
|
||||
(
|
||||
extractedTimes,
|
||||
allTimes,
|
||||
[=](const double values[])
|
||||
{
|
||||
return
|
||||
(
|
||||
values[profilingPstream::REDUCE]
|
||||
+ values[profilingPstream::GATHER]
|
||||
+ values[profilingPstream::SCATTER]
|
||||
);
|
||||
}
|
||||
);
|
||||
extractValues
|
||||
(
|
||||
extractedCounts,
|
||||
allCounts,
|
||||
[=](const uint64_t values[])
|
||||
{
|
||||
return
|
||||
(
|
||||
values[profilingPstream::REDUCE]
|
||||
+ values[profilingPstream::GATHER]
|
||||
+ values[profilingPstream::SCATTER]
|
||||
);
|
||||
}
|
||||
);
|
||||
stats = calcStats(extractedTimes);
|
||||
|
||||
printTimingStats(Info(), "reduce ", stats);
|
||||
if (detailLevel_ > 0) printTimingDetail(extractedTimes);
|
||||
if (detailLevel_ > 1) printTimingDetail(extractedCounts);
|
||||
}
|
||||
|
||||
// request
|
||||
{
|
||||
const int index = int(profilingPstream::REQUEST);
|
||||
|
||||
extractValues(extractedTimes, index, allTimes);
|
||||
extractValues(extractedCounts, index, allCounts);
|
||||
stats = calcStats(extractedTimes);
|
||||
|
||||
printTimingStats(Info(), "request ", stats);
|
||||
|
||||
if (detailLevel_ > 0) printTimingDetail(extractedTimes);
|
||||
if (detailLevel_ > 1) printTimingDetail(extractedCounts);
|
||||
}
|
||||
|
||||
// wait
|
||||
{
|
||||
const int index = int(profilingPstream::WAIT);
|
||||
|
||||
extractValues(extractedTimes, index, allTimes);
|
||||
extractValues(extractedCounts, index, allCounts);
|
||||
stats = calcStats(extractedTimes);
|
||||
|
||||
printTimingStats(Info(), "wait ", stats);
|
||||
|
||||
if (detailLevel_ > 0) printTimingDetail(extractedTimes);
|
||||
if (detailLevel_ > 1) printTimingDetail(extractedCounts);
|
||||
}
|
||||
|
||||
Info<< decrIndent;
|
||||
|
@ -5,7 +5,7 @@
|
||||
\\ / A nd | www.openfoam.com
|
||||
\\/ M anipulation |
|
||||
-------------------------------------------------------------------------------
|
||||
Copyright (C) 2019-2022 OpenCFD Ltd.
|
||||
Copyright (C) 2019-2023 OpenCFD Ltd.
|
||||
-------------------------------------------------------------------------------
|
||||
License
|
||||
This file is part of OpenFOAM.
|
||||
@ -43,6 +43,7 @@ Usage
|
||||
// Report stats on exit only (instead of every time step)
|
||||
executeControl onEnd;
|
||||
writeControl none;
|
||||
detail 0;
|
||||
}
|
||||
\endverbatim
|
||||
|
||||
@ -60,10 +61,6 @@ SourceFiles
|
||||
|
||||
namespace Foam
|
||||
{
|
||||
|
||||
// Forward Declarations
|
||||
class Time;
|
||||
|
||||
namespace functionObjects
|
||||
{
|
||||
|
||||
@ -75,7 +72,15 @@ class parProfiling
|
||||
:
|
||||
public functionObject
|
||||
{
|
||||
// Private Member Functions
|
||||
// Private Data
|
||||
|
||||
//- The level of detail
|
||||
// 0: summary, 1: per-proc times, 2: per-proc times/counts
|
||||
int detailLevel_;
|
||||
|
||||
public:
|
||||
|
||||
// Generated Methods
|
||||
|
||||
//- No copy construct
|
||||
parProfiling(const parProfiling&) = delete;
|
||||
@ -84,8 +89,6 @@ class parProfiling
|
||||
void operator=(const parProfiling&) = delete;
|
||||
|
||||
|
||||
public:
|
||||
|
||||
//- Runtime type information
|
||||
TypeName("parProfiling");
|
||||
|
||||
@ -116,7 +119,7 @@ public:
|
||||
//- Do nothing
|
||||
virtual bool write();
|
||||
|
||||
//- Report
|
||||
//- Disables profilingPstream
|
||||
virtual bool end();
|
||||
};
|
||||
|
||||
|
@ -54,6 +54,7 @@ functions
|
||||
// #include "sampleCellCentres"
|
||||
#include "isentropicTotalPressure"
|
||||
#include "wallHeatFlux"
|
||||
#include "profiling"
|
||||
}
|
||||
|
||||
|
||||
|
@ -0,0 +1,10 @@
|
||||
// -*- C++ -*-
|
||||
|
||||
profiling
|
||||
{
|
||||
#includeEtc "caseDicts/profiling/parallel.cfg"
|
||||
detail 2;
|
||||
}
|
||||
|
||||
|
||||
// ************************************************************************* //
|
@ -2,14 +2,8 @@
|
||||
|
||||
profiling
|
||||
{
|
||||
type parProfiling;
|
||||
|
||||
libs (utilityFunctionObjects);
|
||||
|
||||
// Report stats on exit only (instead of every time step)
|
||||
executeControl onEnd;
|
||||
writeControl none;
|
||||
#includeEtc "caseDicts/profiling/parallel.cfg"
|
||||
detail 1;
|
||||
}
|
||||
|
||||
|
||||
// ************************************************************************* //
|
||||
|
@ -2,14 +2,8 @@
|
||||
|
||||
profiling
|
||||
{
|
||||
type parProfiling;
|
||||
|
||||
libs (utilityFunctionObjects);
|
||||
|
||||
// Report stats on exit only (instead of every time step)
|
||||
executeControl onEnd;
|
||||
writeControl none;
|
||||
#includeEtc "caseDicts/profiling/parallel.cfg"
|
||||
detail 1;
|
||||
}
|
||||
|
||||
|
||||
// ************************************************************************* //
|
||||
|
Loading…
Reference in New Issue
Block a user