ENH: misc Pstream adjustments
- additional startup guard for inter-node/local-node queries (UPstream) - impose linear communication tree for inter-node/local-node communicators. Was previously defaulted to a basic tree, but more consistent to have flat addressing for these types of connections. - demand-driven UPstream::interNode_offsets() for walking inter-node ranges instead of creating it manually in various places. - (style): List<int> instead of labelList for internal commsStruct since the communication structures are tied to MPI sizes and not to the OpenFOAM label sizes - reduce the number of intermediate buffer allocations within gatherList, scatterList.
This commit is contained in:
parent
4720b61313
commit
eb4345ed44
@ -59,13 +59,13 @@ int main(int argc, char *argv[])
|
||||
|
||||
label nProcs = UPstream::nProcs(UPstream::worldComm);
|
||||
|
||||
List<int> interNodeProcs_fake;
|
||||
DynamicList<int> fake_interNode_offsets;
|
||||
|
||||
if (UPstream::parRun())
|
||||
{
|
||||
if (args.found("numProcs"))
|
||||
{
|
||||
InfoErr<< "ignoring -np option in parallel" << nl;
|
||||
InfoErr<< "ignoring -numProcs option in parallel" << nl;
|
||||
}
|
||||
if (args.found("cores"))
|
||||
{
|
||||
@ -78,25 +78,40 @@ int main(int argc, char *argv[])
|
||||
nProcs = args.getOrDefault<label>("numProcs", 16);
|
||||
label nCores = args.getOrDefault<label>("cores", 4);
|
||||
|
||||
auto& interNode_offsets = fake_interNode_offsets;
|
||||
|
||||
if (nCores > 1 && nCores < nProcs)
|
||||
{
|
||||
const label numNodes
|
||||
= (nProcs/nCores) + ((nProcs % nCores) ? 1 : 0);
|
||||
// Build the inter-node offsets
|
||||
interNode_offsets.reserve((nProcs/nCores) + 4);
|
||||
interNode_offsets.push_back(0);
|
||||
|
||||
interNodeProcs_fake.resize(numNodes);
|
||||
|
||||
for (label nodei = 0; nodei < numNodes; ++nodei)
|
||||
for
|
||||
(
|
||||
int count = interNode_offsets.back() + nCores;
|
||||
count < nProcs;
|
||||
count += nCores
|
||||
)
|
||||
{
|
||||
interNodeProcs_fake[nodei] = nodei * nCores;
|
||||
interNode_offsets.push_back(count);
|
||||
}
|
||||
|
||||
interNode_offsets.push_back(nProcs);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Some fallback
|
||||
interNode_offsets.reserve(2);
|
||||
interNode_offsets.push_back(0);
|
||||
interNode_offsets.push_back(nProcs);
|
||||
}
|
||||
}
|
||||
|
||||
const List<int>& interNodeProcs =
|
||||
const List<int>& interNodeOffsets =
|
||||
(
|
||||
UPstream::parRun()
|
||||
? UPstream::procID(UPstream::commInterNode())
|
||||
: interNodeProcs_fake
|
||||
? UPstream::interNode_offsets()
|
||||
: fake_interNode_offsets
|
||||
);
|
||||
|
||||
|
||||
@ -111,79 +126,31 @@ int main(int argc, char *argv[])
|
||||
// Prefer left-to-right layout for large graphs
|
||||
os << indent << "rankdir=LR" << nl;
|
||||
|
||||
int pos = 0;
|
||||
const label numNodes = interNodeOffsets.size()-1;
|
||||
|
||||
// First level are the inter-node connections
|
||||
const label parent = 0;
|
||||
for (const auto proci : interNodeProcs)
|
||||
{
|
||||
if (parent == proci) continue;
|
||||
os << indent << 0 << " -- " << token::LBRACE;
|
||||
|
||||
if (pos)
|
||||
for (label nodei = 1; nodei < numNodes; ++nodei)
|
||||
{
|
||||
os << " ";
|
||||
os << ' ' << interNodeOffsets[nodei];
|
||||
}
|
||||
else
|
||||
{
|
||||
os << indent;
|
||||
}
|
||||
os << parent << " -- " << proci;
|
||||
|
||||
if (++pos >= 4) // Max 4 items per line
|
||||
{
|
||||
pos = 0;
|
||||
os << nl;
|
||||
}
|
||||
os << token::SPACE << token::RBRACE
|
||||
<< " // inter-node: " << flatOutput(interNodeOffsets)
|
||||
<< nl;
|
||||
}
|
||||
|
||||
if (pos)
|
||||
// Next level are the local-node connections
|
||||
for (label nodei = 0; nodei < numNodes; ++nodei)
|
||||
{
|
||||
pos = 0;
|
||||
os << nl;
|
||||
}
|
||||
const auto firstProc = interNodeOffsets[nodei];
|
||||
const auto lastProc = interNodeOffsets[nodei+1];
|
||||
|
||||
// Next level are within the nodes
|
||||
for (label nodei = 0; nodei < interNodeProcs.size(); ++nodei)
|
||||
{
|
||||
pos = 0;
|
||||
|
||||
label firstProc = interNodeProcs[nodei];
|
||||
const label lastProc =
|
||||
(
|
||||
(nodei+1 < interNodeProcs.size())
|
||||
? interNodeProcs[nodei+1]
|
||||
: nProcs
|
||||
);
|
||||
|
||||
os << indent << "// inter-node " << nodei
|
||||
<< " [" << firstProc
|
||||
<< ".." << lastProc-1 << "]" << nl;
|
||||
|
||||
for (label proci = firstProc; proci < lastProc; ++proci)
|
||||
{
|
||||
if (firstProc == proci) continue;
|
||||
|
||||
if (pos)
|
||||
{
|
||||
os << " ";
|
||||
}
|
||||
else
|
||||
{
|
||||
os << indent;
|
||||
}
|
||||
os << firstProc << " -- " << proci;
|
||||
|
||||
if (++pos >= 4) // Max 4 items per line
|
||||
{
|
||||
pos = 0;
|
||||
os << nl;
|
||||
}
|
||||
}
|
||||
if (pos)
|
||||
{
|
||||
pos = 0;
|
||||
os << nl;
|
||||
}
|
||||
os << indent << firstProc << " -- " << token::DQUOTE
|
||||
<< (firstProc+1) << ".." << (lastProc-1)
|
||||
<< token::DQUOTE << nl;
|
||||
}
|
||||
|
||||
os.endBlock();
|
||||
|
@ -60,7 +60,7 @@ void Foam::Pstream::combineGather
|
||||
const auto& myComm = comms[UPstream::myProcNo(comm)];
|
||||
|
||||
// Receive from my downstairs neighbours
|
||||
for (const label belowID : myComm.below())
|
||||
for (const auto belowID : myComm.below())
|
||||
{
|
||||
if constexpr (is_contiguous_v<T>)
|
||||
{
|
||||
@ -172,7 +172,7 @@ void Foam::Pstream::listCombineGather
|
||||
const auto& myComm = comms[UPstream::myProcNo(comm)];
|
||||
|
||||
// Receive from my downstairs neighbours
|
||||
for (const label belowID : myComm.below())
|
||||
for (const auto belowID : myComm.below())
|
||||
{
|
||||
if constexpr (is_contiguous_v<T>)
|
||||
{
|
||||
@ -288,7 +288,7 @@ void Foam::Pstream::mapCombineGather
|
||||
const auto& myComm = comms[UPstream::myProcNo(comm)];
|
||||
|
||||
// Receive from my downstairs neighbours
|
||||
for (const label belowID : myComm.below())
|
||||
for (const auto belowID : myComm.below())
|
||||
{
|
||||
// Map/HashTable: non-contiguous
|
||||
|
||||
|
@ -44,18 +44,18 @@ void Foam::Pstream::gather
|
||||
T& value,
|
||||
const BinaryOp& bop,
|
||||
const int tag,
|
||||
const label comm
|
||||
const label communicator
|
||||
)
|
||||
{
|
||||
if (UPstream::is_parallel(comm))
|
||||
if (UPstream::is_parallel(communicator))
|
||||
{
|
||||
// Communication order
|
||||
const auto& comms = UPstream::whichCommunication(comm);
|
||||
const auto& comms = UPstream::whichCommunication(communicator);
|
||||
// if (comms.empty()) return; // extra safety?
|
||||
const auto& myComm = comms[UPstream::myProcNo(comm)];
|
||||
const auto& myComm = comms[UPstream::myProcNo(communicator)];
|
||||
|
||||
// Receive from my downstairs neighbours
|
||||
for (const label belowID : myComm.below())
|
||||
for (const auto belowID : myComm.below())
|
||||
{
|
||||
T received;
|
||||
|
||||
@ -68,12 +68,12 @@ void Foam::Pstream::gather
|
||||
reinterpret_cast<char*>(&received),
|
||||
sizeof(T),
|
||||
tag,
|
||||
comm
|
||||
communicator
|
||||
);
|
||||
}
|
||||
else
|
||||
{
|
||||
IPstream::recv(received, belowID, tag, comm);
|
||||
IPstream::recv(received, belowID, tag, communicator);
|
||||
}
|
||||
|
||||
value = bop(value, received);
|
||||
@ -91,12 +91,12 @@ void Foam::Pstream::gather
|
||||
reinterpret_cast<const char*>(&value),
|
||||
sizeof(T),
|
||||
tag,
|
||||
comm
|
||||
communicator
|
||||
);
|
||||
}
|
||||
else
|
||||
{
|
||||
OPstream::send(value, myComm.above(), tag, comm);
|
||||
OPstream::send(value, myComm.above(), tag, communicator);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -32,13 +32,13 @@ Description
|
||||
values[UPstream::myProcNo(comm)].
|
||||
Note: after gather every processor only knows its own data and that of the
|
||||
processors below it. Only the 'master' of the communication schedule holds
|
||||
a fully filled List. Use scatter to distribute the data.
|
||||
a fully filled List. Use broadcast to distribute the data.
|
||||
|
||||
\*---------------------------------------------------------------------------*/
|
||||
|
||||
#include "contiguous.H"
|
||||
#include "IPstream.H"
|
||||
#include "OPstream.H"
|
||||
#include "contiguous.H"
|
||||
|
||||
// * * * * * * * * * * * * * * * Member Functions * * * * * * * * * * * * * //
|
||||
|
||||
@ -48,13 +48,13 @@ void Foam::Pstream::gatherList
|
||||
const UPstream::commsStructList& comms,
|
||||
UList<T>& values,
|
||||
const int tag,
|
||||
const label comm
|
||||
const label communicator
|
||||
)
|
||||
{
|
||||
if (!comms.empty() && UPstream::is_parallel(comm))
|
||||
if (!comms.empty() && UPstream::is_parallel(communicator))
|
||||
{
|
||||
const label myProci = UPstream::myProcNo(comm);
|
||||
const label numProc = UPstream::nProcs(comm);
|
||||
const label myProci = UPstream::myProcNo(communicator);
|
||||
const label numProc = UPstream::nProcs(communicator);
|
||||
|
||||
if (values.size() < numProc)
|
||||
{
|
||||
@ -67,29 +67,71 @@ void Foam::Pstream::gatherList
|
||||
// My communication order
|
||||
const auto& myComm = comms[myProci];
|
||||
|
||||
// Receive from my downstairs neighbours
|
||||
for (const label belowID : myComm.below())
|
||||
|
||||
// Local buffer for send/recv of contiguous
|
||||
[[maybe_unused]] DynamicList<T> buffer;
|
||||
|
||||
// Presize buffer
|
||||
if constexpr (is_contiguous_v<T>)
|
||||
{
|
||||
const labelList& belowLeaves = comms[belowID].allBelow();
|
||||
label maxCount = 0;
|
||||
|
||||
for (const auto belowID : myComm.below())
|
||||
{
|
||||
auto count = comms[belowID].allBelow().size();
|
||||
maxCount = Foam::max(maxCount, count);
|
||||
}
|
||||
|
||||
if (myComm.above() >= 0)
|
||||
{
|
||||
auto count = myComm.allBelow().size();
|
||||
maxCount = Foam::max(maxCount, count);
|
||||
}
|
||||
|
||||
buffer.reserve(maxCount + 1);
|
||||
}
|
||||
|
||||
|
||||
// Receive from my downstairs neighbours
|
||||
for (const auto belowID : myComm.below())
|
||||
{
|
||||
const auto& leaves = comms[belowID].allBelow();
|
||||
|
||||
if constexpr (is_contiguous_v<T>)
|
||||
{
|
||||
List<T> received(belowLeaves.size() + 1);
|
||||
|
||||
UIPstream::read
|
||||
(
|
||||
UPstream::commsTypes::scheduled,
|
||||
belowID,
|
||||
received,
|
||||
tag,
|
||||
comm
|
||||
);
|
||||
|
||||
values[belowID] = received[0];
|
||||
|
||||
forAll(belowLeaves, leafI)
|
||||
if (leaves.empty())
|
||||
{
|
||||
values[belowLeaves[leafI]] = received[leafI + 1];
|
||||
// Receive directly into destination
|
||||
UIPstream::read
|
||||
(
|
||||
UPstream::commsTypes::scheduled,
|
||||
belowID,
|
||||
values[belowID],
|
||||
tag,
|
||||
communicator
|
||||
);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Receive via intermediate buffer
|
||||
buffer.resize_nocopy(leaves.size() + 1);
|
||||
|
||||
UIPstream::read
|
||||
(
|
||||
UPstream::commsTypes::scheduled,
|
||||
belowID,
|
||||
buffer,
|
||||
tag,
|
||||
communicator
|
||||
);
|
||||
|
||||
label recvIdx(0);
|
||||
values[belowID] = buffer[recvIdx++];
|
||||
|
||||
for (const auto leafID : leaves)
|
||||
{
|
||||
values[leafID] = buffer[recvIdx++];
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -100,7 +142,7 @@ void Foam::Pstream::gatherList
|
||||
belowID,
|
||||
0, // bufsize
|
||||
tag,
|
||||
comm
|
||||
communicator
|
||||
);
|
||||
fromBelow >> values[belowID];
|
||||
|
||||
@ -112,7 +154,7 @@ void Foam::Pstream::gatherList
|
||||
}
|
||||
|
||||
// Receive from all other processors below belowID
|
||||
for (const label leafID : belowLeaves)
|
||||
for (const auto leafID : leaves)
|
||||
{
|
||||
fromBelow >> values[leafID];
|
||||
|
||||
@ -131,7 +173,7 @@ void Foam::Pstream::gatherList
|
||||
// - all belowLeaves next
|
||||
if (myComm.above() >= 0)
|
||||
{
|
||||
const labelList& belowLeaves = myComm.allBelow();
|
||||
const auto& leaves = myComm.allBelow();
|
||||
|
||||
if (debug & 2)
|
||||
{
|
||||
@ -142,22 +184,40 @@ void Foam::Pstream::gatherList
|
||||
|
||||
if constexpr (is_contiguous_v<T>)
|
||||
{
|
||||
List<T> sending(belowLeaves.size() + 1);
|
||||
sending[0] = values[myProci];
|
||||
|
||||
forAll(belowLeaves, leafI)
|
||||
if (leaves.empty())
|
||||
{
|
||||
sending[leafI + 1] = values[belowLeaves[leafI]];
|
||||
// Send directly
|
||||
UOPstream::write
|
||||
(
|
||||
UPstream::commsTypes::scheduled,
|
||||
myComm.above(),
|
||||
values[myProci],
|
||||
tag,
|
||||
communicator
|
||||
);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Send via intermediate buffer
|
||||
buffer.resize_nocopy(leaves.size() + 1);
|
||||
|
||||
UOPstream::write
|
||||
(
|
||||
UPstream::commsTypes::scheduled,
|
||||
myComm.above(),
|
||||
sending,
|
||||
tag,
|
||||
comm
|
||||
);
|
||||
label sendIdx(0);
|
||||
buffer[sendIdx++] = values[myProci];
|
||||
|
||||
for (const auto leafID : leaves)
|
||||
{
|
||||
buffer[sendIdx++] = values[leafID];
|
||||
}
|
||||
|
||||
UOPstream::write
|
||||
(
|
||||
UPstream::commsTypes::scheduled,
|
||||
myComm.above(),
|
||||
buffer,
|
||||
tag,
|
||||
communicator
|
||||
);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -167,11 +227,11 @@ void Foam::Pstream::gatherList
|
||||
myComm.above(),
|
||||
0, // bufsize
|
||||
tag,
|
||||
comm
|
||||
communicator
|
||||
);
|
||||
toAbove << values[myProci];
|
||||
|
||||
for (const label leafID : belowLeaves)
|
||||
for (const auto leafID : leaves)
|
||||
{
|
||||
if (debug & 2)
|
||||
{
|
||||
@ -193,17 +253,17 @@ void Foam::Pstream::scatterList
|
||||
const UPstream::commsStructList& comms,
|
||||
UList<T>& values,
|
||||
const int tag,
|
||||
const label comm
|
||||
const label communicator
|
||||
)
|
||||
{
|
||||
// Apart from the additional size check, the only difference
|
||||
// between scatterList() and using broadcast(List<T>&) or a regular
|
||||
// scatter(List<T>&) is that processor-local data is skipped.
|
||||
|
||||
if (!comms.empty() && UPstream::is_parallel(comm))
|
||||
if (!comms.empty() && UPstream::is_parallel(communicator))
|
||||
{
|
||||
const label myProci = UPstream::myProcNo(comm);
|
||||
const label numProc = UPstream::nProcs(comm);
|
||||
const label myProci = UPstream::myProcNo(communicator);
|
||||
const label numProc = UPstream::nProcs(communicator);
|
||||
|
||||
if (values.size() < numProc)
|
||||
{
|
||||
@ -216,27 +276,53 @@ void Foam::Pstream::scatterList
|
||||
// My communication order
|
||||
const auto& myComm = comms[myProci];
|
||||
|
||||
|
||||
// Local buffer for send/recv of contiguous
|
||||
[[maybe_unused]] DynamicList<T> buffer;
|
||||
|
||||
// Presize buffer
|
||||
if constexpr (is_contiguous_v<T>)
|
||||
{
|
||||
label maxCount = 0;
|
||||
|
||||
if (myComm.above() >= 0)
|
||||
{
|
||||
auto count = myComm.allNotBelow().size();
|
||||
maxCount = Foam::max(maxCount, count);
|
||||
}
|
||||
|
||||
for (const auto belowID : myComm.below())
|
||||
{
|
||||
auto count = comms[belowID].allNotBelow().size();
|
||||
maxCount = Foam::max(maxCount, count);
|
||||
}
|
||||
|
||||
buffer.reserve(maxCount);
|
||||
}
|
||||
|
||||
|
||||
// Receive from up
|
||||
if (myComm.above() >= 0)
|
||||
{
|
||||
const labelList& notBelowLeaves = myComm.allNotBelow();
|
||||
const auto& leaves = myComm.allNotBelow();
|
||||
|
||||
if constexpr (is_contiguous_v<T>)
|
||||
{
|
||||
List<T> received(notBelowLeaves.size());
|
||||
buffer.resize_nocopy(leaves.size());
|
||||
|
||||
UIPstream::read
|
||||
(
|
||||
UPstream::commsTypes::scheduled,
|
||||
myComm.above(),
|
||||
received,
|
||||
buffer,
|
||||
tag,
|
||||
comm
|
||||
communicator
|
||||
);
|
||||
|
||||
forAll(notBelowLeaves, leafI)
|
||||
label recvIdx(0);
|
||||
for (const auto leafID : leaves)
|
||||
{
|
||||
values[notBelowLeaves[leafI]] = received[leafI];
|
||||
values[leafID] = buffer[recvIdx++];
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -247,10 +333,10 @@ void Foam::Pstream::scatterList
|
||||
myComm.above(),
|
||||
0, // bufsize
|
||||
tag,
|
||||
comm
|
||||
communicator
|
||||
);
|
||||
|
||||
for (const label leafID : notBelowLeaves)
|
||||
for (const auto leafID : leaves)
|
||||
{
|
||||
fromAbove >> values[leafID];
|
||||
|
||||
@ -267,25 +353,26 @@ void Foam::Pstream::scatterList
|
||||
// Send to my downstairs neighbours
|
||||
forAllReverse(myComm.below(), belowI)
|
||||
{
|
||||
const label belowID = myComm.below()[belowI];
|
||||
const labelList& notBelowLeaves = comms[belowID].allNotBelow();
|
||||
const auto belowID = myComm.below()[belowI];
|
||||
const auto& leaves = comms[belowID].allNotBelow();
|
||||
|
||||
if constexpr (is_contiguous_v<T>)
|
||||
{
|
||||
List<T> sending(notBelowLeaves.size());
|
||||
buffer.resize_nocopy(leaves.size());
|
||||
|
||||
forAll(notBelowLeaves, leafI)
|
||||
label sendIdx(0);
|
||||
for (const auto leafID : leaves)
|
||||
{
|
||||
sending[leafI] = values[notBelowLeaves[leafI]];
|
||||
buffer[sendIdx++] = values[leafID];
|
||||
}
|
||||
|
||||
UOPstream::write
|
||||
(
|
||||
UPstream::commsTypes::scheduled,
|
||||
belowID,
|
||||
sending,
|
||||
buffer,
|
||||
tag,
|
||||
comm
|
||||
communicator
|
||||
);
|
||||
}
|
||||
else
|
||||
@ -296,11 +383,11 @@ void Foam::Pstream::scatterList
|
||||
belowID,
|
||||
0, // bufsize
|
||||
tag,
|
||||
comm
|
||||
communicator
|
||||
);
|
||||
|
||||
// Send data destined for all other processors below belowID
|
||||
for (const label leafID : notBelowLeaves)
|
||||
for (const auto leafID : leaves)
|
||||
{
|
||||
toBelow << values[leafID];
|
||||
|
||||
|
@ -640,7 +640,7 @@ Foam::UPstream::treeCommunication(const label communicator)
|
||||
}
|
||||
|
||||
|
||||
void Foam::UPstream::printCommTree(const label communicator)
|
||||
void Foam::UPstream::printCommTree(int communicator)
|
||||
{
|
||||
const auto& comms = UPstream::whichCommunication(communicator);
|
||||
|
||||
@ -663,14 +663,60 @@ bool Foam::UPstream::usingNodeComms(const label communicator)
|
||||
(
|
||||
parRun_ && (constWorldComm_ == communicator)
|
||||
&& (nodeCommsControl_ > 0)
|
||||
|
||||
// More than one node and above defined threshold
|
||||
&& (numNodes_ > 1) && (numNodes_ >= nodeCommsMin_)
|
||||
// Some processes do share nodes
|
||||
&& (numNodes_ < procIDs_[constWorldComm_].size())
|
||||
|
||||
// Extra paranoid (guard against calling during startup)
|
||||
&& (commInterNode_ > constWorldComm_)
|
||||
&& (commLocalNode_ > constWorldComm_)
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
const Foam::List<int>& Foam::UPstream::interNode_offsets()
|
||||
{
|
||||
static std::unique_ptr<List<int>> singleton;
|
||||
|
||||
if (!singleton)
|
||||
{
|
||||
// Extra paranoid (guard against calling during startup)
|
||||
if
|
||||
(
|
||||
(commInterNode_ <= constWorldComm_)
|
||||
|| (commInterNode_ >= procIDs_.size())
|
||||
)
|
||||
{
|
||||
return List<int>::null();
|
||||
}
|
||||
|
||||
singleton = std::make_unique<List<int>>();
|
||||
auto& offsets = *singleton;
|
||||
|
||||
const auto& procs = procIDs_[commInterNode_];
|
||||
|
||||
// The procIDs_ are already the offsets, but missing the end offset
|
||||
if (!procs.empty())
|
||||
{
|
||||
const auto count = procs.size();
|
||||
|
||||
offsets.resize(count+1);
|
||||
std::copy_n
|
||||
(
|
||||
procs.begin(),
|
||||
count,
|
||||
offsets.begin()
|
||||
);
|
||||
offsets[count] = UPstream::nProcs(constWorldComm_);
|
||||
}
|
||||
}
|
||||
|
||||
return *singleton;
|
||||
}
|
||||
|
||||
|
||||
// * * * * * * * * * * * * * * Static Data Members * * * * * * * * * * * * * //
|
||||
|
||||
bool Foam::UPstream::parRun_(false);
|
||||
|
@ -108,18 +108,18 @@ public:
|
||||
// Private Data
|
||||
|
||||
//- The procID of the processor \em directly above
|
||||
label above_;
|
||||
int above_;
|
||||
|
||||
//- The procIDs of processors \em directly below
|
||||
labelList below_;
|
||||
List<int> below_;
|
||||
|
||||
//- The procIDs of all processors below myProcNo,
|
||||
//- not just directly below
|
||||
labelList allBelow_;
|
||||
List<int> allBelow_;
|
||||
|
||||
//- The procIDs of all processors not below myProcNo
|
||||
// (inverse of allBelow_ without myProcNo)
|
||||
labelList allNotBelow_;
|
||||
//- (inverse of allBelow_ without myProcNo)
|
||||
List<int> allNotBelow_;
|
||||
|
||||
|
||||
public:
|
||||
@ -132,20 +132,20 @@ public:
|
||||
//- Move construct from components
|
||||
commsStruct
|
||||
(
|
||||
const label above,
|
||||
labelList&& below,
|
||||
labelList&& allBelow,
|
||||
labelList&& allNotBelow
|
||||
const int above,
|
||||
List<int>&& below,
|
||||
List<int>&& allBelow,
|
||||
List<int>&& allNotBelow
|
||||
);
|
||||
|
||||
//- Copy construct from below, allBelow components
|
||||
commsStruct
|
||||
(
|
||||
const label numProcs,
|
||||
const label myProcID,
|
||||
const label above,
|
||||
const labelUList& below,
|
||||
const labelUList& allBelow
|
||||
const int numProcs,
|
||||
const int myProcID,
|
||||
const int above,
|
||||
const UList<int>& below,
|
||||
const UList<int>& allBelow
|
||||
);
|
||||
|
||||
|
||||
@ -153,26 +153,26 @@ public:
|
||||
|
||||
// Access
|
||||
|
||||
//- The number of processors addressed by the structure
|
||||
label nProcs() const noexcept;
|
||||
|
||||
//- The procID of the processor \em directly above
|
||||
label above() const noexcept { return above_; }
|
||||
int above() const noexcept { return above_; }
|
||||
|
||||
//- The procIDs of the processors \em directly below
|
||||
const labelList& below() const noexcept { return below_; }
|
||||
const List<int>& below() const noexcept { return below_; }
|
||||
|
||||
//- The procIDs of all processors below
|
||||
//- The procIDs of \em all processors below
|
||||
//- (so not just directly below)
|
||||
const labelList& allBelow() const noexcept { return allBelow_; }
|
||||
const List<int>& allBelow() const noexcept { return allBelow_; }
|
||||
|
||||
//- The procIDs of all processors not below myProcNo.
|
||||
//- The inverse set of allBelow without myProcNo.
|
||||
const labelList& allNotBelow() const noexcept
|
||||
const List<int>& allNotBelow() const noexcept
|
||||
{
|
||||
return allNotBelow_;
|
||||
}
|
||||
|
||||
//- The number of processors addressed by the structure
|
||||
int nProcs() const noexcept;
|
||||
|
||||
|
||||
// Edit
|
||||
|
||||
@ -183,9 +183,9 @@ public:
|
||||
//- possibly with communicator-specific adjustments
|
||||
void reset
|
||||
(
|
||||
const label procID,
|
||||
const label numProcs,
|
||||
const label comm = -1
|
||||
const int myProci,
|
||||
const int numProcs,
|
||||
const int communicator
|
||||
);
|
||||
|
||||
|
||||
@ -203,7 +203,7 @@ public:
|
||||
// Private Data
|
||||
|
||||
//- The communicator index
|
||||
label comm_;
|
||||
int comm_;
|
||||
|
||||
//- The communication tree
|
||||
List<commsStruct> tree_;
|
||||
@ -216,7 +216,7 @@ public:
|
||||
commsStructList() noexcept : comm_(-1) {}
|
||||
|
||||
//- Construct empty with given communicator
|
||||
commsStructList(label comm) noexcept : comm_(comm) {}
|
||||
explicit commsStructList(int comm) noexcept : comm_(comm) {}
|
||||
|
||||
|
||||
// Static Functions
|
||||
@ -230,8 +230,8 @@ public:
|
||||
//- True if communicator is non-negative (ie, was assigned)
|
||||
bool good() const noexcept { return (comm_ >= 0); }
|
||||
|
||||
//- The communicator label
|
||||
label comm() const noexcept { return comm_; }
|
||||
//- The communicator internal index
|
||||
int comm() const noexcept { return comm_; }
|
||||
|
||||
//- Clear the list
|
||||
void clear() { return tree_.clear(); }
|
||||
@ -242,20 +242,23 @@ public:
|
||||
//- The number of entries
|
||||
label size() const noexcept { return tree_.size(); }
|
||||
|
||||
//- Reset communicator index and clear demand-driven entries
|
||||
void init(const label comm);
|
||||
//- Reset communicator index, fill tree with empty entries
|
||||
void init(int communicator);
|
||||
|
||||
//- Reset communicator index, clear tree entries
|
||||
void reset(int communicator);
|
||||
|
||||
//- Get existing or create (demand-driven) entry
|
||||
const UPstream::commsStruct& get(const label proci) const;
|
||||
const UPstream::commsStruct& get(int proci) const;
|
||||
|
||||
//- Get existing or create (demand-driven) entry
|
||||
const UPstream::commsStruct& operator[](const label proci) const
|
||||
const UPstream::commsStruct& operator[](int proci) const
|
||||
{
|
||||
return get(proci);
|
||||
}
|
||||
|
||||
//- Print un-directed graph in graphviz dot format
|
||||
void printGraph(Ostream& os, label proci = 0) const;
|
||||
void printGraph(Ostream& os, int proci = 0) const;
|
||||
};
|
||||
|
||||
|
||||
@ -1074,6 +1077,10 @@ public:
|
||||
return rangeType(1, static_cast<int>(nProcs(communicator)-1));
|
||||
}
|
||||
|
||||
//- Processor offsets corresponding to the inter-node communicator
|
||||
static const List<int>& interNode_offsets();
|
||||
|
||||
|
||||
//- Communication schedule for linear all-to-master (proc 0)
|
||||
static const commsStructList& linearCommunication
|
||||
(
|
||||
@ -1105,7 +1112,7 @@ public:
|
||||
(
|
||||
np <= 1
|
||||
? commsStructList::null()
|
||||
: (np <= 2 || np < nProcsSimpleSum)
|
||||
: (np <= 2 || np < UPstream::nProcsSimpleSum)
|
||||
? linearCommunication(communicator)
|
||||
: treeCommunication(communicator)
|
||||
);
|
||||
|
@ -28,6 +28,9 @@ License
|
||||
|
||||
#include "UPstream.H"
|
||||
|
||||
#include <algorithm>
|
||||
#include <numeric>
|
||||
|
||||
// * * * * * * * * * * * * * * * Local Functions * * * * * * * * * * * * * * //
|
||||
|
||||
namespace Foam
|
||||
@ -38,9 +41,9 @@ static void printGraph_impl
|
||||
(
|
||||
Ostream& os,
|
||||
const UPstream::commsStructList& comms,
|
||||
const label proci,
|
||||
label depth,
|
||||
const label maxDepth = 1024
|
||||
const int proci,
|
||||
int depth,
|
||||
const int maxDepth = 1024
|
||||
)
|
||||
{
|
||||
if (proci >= comms.size())
|
||||
@ -59,41 +62,80 @@ static void printGraph_impl
|
||||
|
||||
// Prefer left-to-right layout for large graphs
|
||||
os << indent << "rankdir=LR" << nl;
|
||||
}
|
||||
|
||||
if (below.empty())
|
||||
|
||||
// Output the immediate neighbours below
|
||||
|
||||
if (below.empty())
|
||||
{
|
||||
if (proci == 0)
|
||||
{
|
||||
// A graph with a single-node (eg, self-comm)
|
||||
os << indent << proci << nl;
|
||||
}
|
||||
}
|
||||
|
||||
int pos = 0;
|
||||
|
||||
for (const auto nbrProci : below)
|
||||
else
|
||||
{
|
||||
if (pos)
|
||||
{
|
||||
os << " ";
|
||||
}
|
||||
else
|
||||
{
|
||||
os << indent;
|
||||
}
|
||||
os << proci << " -- " << nbrProci;
|
||||
os << indent << proci << " -- " << token::BEGIN_BLOCK;
|
||||
|
||||
if (++pos >= 4) // Max 4 items per line
|
||||
// Accumulate into ranges whenever possible
|
||||
IntRange<int> range;
|
||||
|
||||
// Print accumulated range and reset
|
||||
auto emit_range = [&]()
|
||||
{
|
||||
pos = 0;
|
||||
os << nl;
|
||||
if (!range.empty())
|
||||
{
|
||||
os << ' ';
|
||||
if (range.min() < range.max())
|
||||
{
|
||||
os << '"' << range.min() << ".." << range.max() << '"';
|
||||
}
|
||||
else
|
||||
{
|
||||
os << range.min();
|
||||
}
|
||||
range.reset();
|
||||
}
|
||||
};
|
||||
|
||||
for (const auto nbrProci : below)
|
||||
{
|
||||
const bool terminal = comms[nbrProci].below().empty();
|
||||
|
||||
if
|
||||
(
|
||||
terminal
|
||||
&& (!range.empty() && (range.max()+1 == nbrProci))
|
||||
)
|
||||
{
|
||||
// Accumulate
|
||||
++range;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Emit accumulated range
|
||||
emit_range();
|
||||
|
||||
if (terminal)
|
||||
{
|
||||
range.reset(nbrProci, 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
os << token::SPACE << nbrProci;
|
||||
}
|
||||
}
|
||||
|
||||
// Emit accumulated range
|
||||
emit_range();
|
||||
|
||||
os << token::SPACE << token::END_BLOCK << nl;
|
||||
}
|
||||
|
||||
if (pos)
|
||||
{
|
||||
os << nl;
|
||||
}
|
||||
|
||||
// Limit the maximum depth
|
||||
// Recurse into below neighbours, but limit the maximum depth
|
||||
++depth;
|
||||
if (depth >= maxDepth && (proci != 0))
|
||||
{
|
||||
@ -109,7 +151,6 @@ static void printGraph_impl
|
||||
if (proci == 0)
|
||||
{
|
||||
os.endBlock();
|
||||
|
||||
os << "// end graph" << nl;
|
||||
}
|
||||
}
|
||||
@ -150,46 +191,46 @@ static void printGraph_impl
|
||||
namespace Foam
|
||||
{
|
||||
|
||||
static label simpleTree
|
||||
static int simpleTree
|
||||
(
|
||||
const label procID,
|
||||
const label numProcs,
|
||||
const int myProci,
|
||||
const int numProcs,
|
||||
|
||||
DynamicList<label>& below,
|
||||
DynamicList<label>& allBelow
|
||||
DynamicList<int>& below,
|
||||
DynamicList<int>& allBelow
|
||||
)
|
||||
{
|
||||
label above(-1);
|
||||
int above(-1);
|
||||
|
||||
for (label mod = 2, step = 1; step < numProcs; step = mod)
|
||||
for (int mod = 2, step = 1; step < numProcs; step = mod)
|
||||
{
|
||||
mod = step * 2;
|
||||
|
||||
if (procID % mod)
|
||||
if (myProci % mod)
|
||||
{
|
||||
// The rank above
|
||||
above = procID - (procID % mod);
|
||||
above = myProci - (myProci % mod);
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
for
|
||||
(
|
||||
label j = procID + step;
|
||||
j < numProcs && j < procID + mod;
|
||||
j += step
|
||||
int i = myProci + step;
|
||||
i < numProcs && i < myProci + mod;
|
||||
i += step
|
||||
)
|
||||
{
|
||||
below.push_back(j);
|
||||
below.push_back(i);
|
||||
}
|
||||
for
|
||||
(
|
||||
label j = procID + step;
|
||||
j < numProcs && j < procID + mod;
|
||||
j++
|
||||
int i = myProci + step;
|
||||
i < numProcs && i < myProci + mod;
|
||||
++i
|
||||
)
|
||||
{
|
||||
allBelow.push_back(j);
|
||||
allBelow.push_back(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -204,10 +245,10 @@ static label simpleTree
|
||||
|
||||
Foam::UPstream::commsStruct::commsStruct
|
||||
(
|
||||
const label above,
|
||||
labelList&& below,
|
||||
labelList&& allBelow,
|
||||
labelList&& allNotBelow
|
||||
const int above,
|
||||
List<int>&& below,
|
||||
List<int>&& allBelow,
|
||||
List<int>&& allNotBelow
|
||||
)
|
||||
:
|
||||
above_(above),
|
||||
@ -219,11 +260,11 @@ Foam::UPstream::commsStruct::commsStruct
|
||||
|
||||
Foam::UPstream::commsStruct::commsStruct
|
||||
(
|
||||
const label numProcs,
|
||||
const label myProcID,
|
||||
const label above,
|
||||
const labelUList& below,
|
||||
const labelUList& allBelow
|
||||
const int numProcs,
|
||||
const int myProcID,
|
||||
const int above,
|
||||
const UList<int>& below,
|
||||
const UList<int>& allBelow
|
||||
)
|
||||
:
|
||||
above_(above),
|
||||
@ -237,14 +278,14 @@ Foam::UPstream::commsStruct::commsStruct
|
||||
isNotBelow[myProcID] = false;
|
||||
|
||||
// Exclude allBelow
|
||||
for (const label proci : allBelow)
|
||||
for (const auto proci : allBelow)
|
||||
{
|
||||
isNotBelow[proci] = false;
|
||||
}
|
||||
|
||||
// Compacting to obtain allNotBelow_
|
||||
label nNotBelow = 0;
|
||||
forAll(isNotBelow, proci)
|
||||
int nNotBelow = 0;
|
||||
for (int proci = 0; proci < numProcs; ++proci)
|
||||
{
|
||||
if (isNotBelow[proci])
|
||||
{
|
||||
@ -266,7 +307,7 @@ Foam::UPstream::commsStruct::commsStruct
|
||||
void Foam::UPstream::commsStructList::printGraph
|
||||
(
|
||||
Ostream& os,
|
||||
const label proci
|
||||
const int proci
|
||||
) const
|
||||
{
|
||||
// Print graph - starting at depth 0
|
||||
@ -282,9 +323,9 @@ void Foam::UPstream::commsStructList::printGraph
|
||||
|
||||
// * * * * * * * * * * * * * * * Member Functions * * * * * * * * * * * * * //
|
||||
|
||||
Foam::label Foam::UPstream::commsStruct::nProcs() const noexcept
|
||||
int Foam::UPstream::commsStruct::nProcs() const noexcept
|
||||
{
|
||||
return (1 + allBelow_.size() + allNotBelow_.size());
|
||||
return (1 + int(allBelow_.size() + allNotBelow_.size()));
|
||||
}
|
||||
|
||||
|
||||
@ -299,46 +340,65 @@ void Foam::UPstream::commsStruct::reset()
|
||||
|
||||
void Foam::UPstream::commsStruct::reset
|
||||
(
|
||||
const label procID,
|
||||
const label numProcs,
|
||||
[[maybe_unused]] const label comm
|
||||
const int myProci,
|
||||
const int numProcs,
|
||||
const int communicator
|
||||
)
|
||||
{
|
||||
reset();
|
||||
|
||||
if (numProcs <= 2 || numProcs < UPstream::nProcsSimpleSum)
|
||||
// Linear (flat) communication pattern
|
||||
if
|
||||
(
|
||||
// Trivially small domains
|
||||
(numProcs <= 2 || numProcs < UPstream::nProcsSimpleSum)
|
||||
|
||||
// local-node: assume that the local communication is low-latency
|
||||
|| (
|
||||
UPstream::commLocalNode() == communicator
|
||||
&& UPstream::commLocalNode() > UPstream::commConstWorld()
|
||||
)
|
||||
// inter-node: presumably relatively few nodes and/or
|
||||
// higher latency with larger messages being sent
|
||||
|| (
|
||||
UPstream::commInterNode() == communicator
|
||||
&& UPstream::commInterNode() > UPstream::commConstWorld()
|
||||
)
|
||||
)
|
||||
{
|
||||
// Linear communication pattern
|
||||
label above(-1);
|
||||
labelList below;
|
||||
int above(-1);
|
||||
List<int> below;
|
||||
|
||||
if (procID == 0)
|
||||
if (myProci == 0)
|
||||
{
|
||||
below = identity(numProcs-1, 1);
|
||||
below.resize(numProcs-1);
|
||||
std::iota(below.begin(), below.end(), 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
above = 0;
|
||||
}
|
||||
|
||||
*this = UPstream::commsStruct(numProcs, procID, above, below, below);
|
||||
*this = UPstream::commsStruct(numProcs, myProci, above, below, below);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
// Simple tree communication pattern
|
||||
DynamicList<label> below;
|
||||
DynamicList<label> allBelow;
|
||||
|
||||
label above = simpleTree
|
||||
DynamicList<int> below;
|
||||
DynamicList<int> allBelow;
|
||||
|
||||
// Simple tree communication pattern
|
||||
int above = simpleTree
|
||||
(
|
||||
procID,
|
||||
myProci,
|
||||
numProcs,
|
||||
below,
|
||||
allBelow
|
||||
);
|
||||
|
||||
*this = UPstream::commsStruct(numProcs, procID, above, below, allBelow);
|
||||
*this = UPstream::commsStruct(numProcs, myProci, above, below, allBelow);
|
||||
}
|
||||
|
||||
|
||||
@ -360,19 +420,36 @@ Foam::UPstream::commsStructList::null()
|
||||
|
||||
// * * * * * * * * * * * * * * * Member Functions * * * * * * * * * * * * * //
|
||||
|
||||
void Foam::UPstream::commsStructList::init(const label comm)
|
||||
void Foam::UPstream::commsStructList::init(int communicator)
|
||||
{
|
||||
comm_ = comm;
|
||||
comm_ = communicator;
|
||||
tree_.clear();
|
||||
if (comm_ >= 0)
|
||||
{
|
||||
tree_.resize(UPstream::nProcs(comm_));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void Foam::UPstream::commsStructList::reset(int communicator)
|
||||
{
|
||||
comm_ = communicator;
|
||||
tree_.clear();
|
||||
tree_.resize(UPstream::nProcs(comm));
|
||||
}
|
||||
|
||||
|
||||
const Foam::UPstream::commsStruct&
|
||||
Foam::UPstream::commsStructList::get(const label proci) const
|
||||
Foam::UPstream::commsStructList::get(int proci) const
|
||||
{
|
||||
const auto numProcs = UPstream::nProcs(comm_);
|
||||
|
||||
// Only if reset(comm) instead of init(comm) was used
|
||||
if (tree_.size() < numProcs)
|
||||
{
|
||||
const_cast<List<commsStruct>&>(tree_).resize(numProcs);
|
||||
}
|
||||
|
||||
const UPstream::commsStruct& entry = tree_[proci];
|
||||
const auto numProcs = tree_.size();
|
||||
|
||||
if (entry.nProcs() != numProcs)
|
||||
{
|
||||
@ -391,10 +468,8 @@ bool Foam::UPstream::commsStruct::operator==(const commsStruct& comm) const
|
||||
{
|
||||
return
|
||||
(
|
||||
(above_ == comm.above())
|
||||
&& (below_ == comm.below())
|
||||
// && (allBelow_ == comm.allBelow())
|
||||
// && (allNotBelow_ == comm.allNotBelow())
|
||||
(above() == comm.above())
|
||||
&& (below() == comm.below())
|
||||
);
|
||||
}
|
||||
|
||||
@ -409,10 +484,10 @@ bool Foam::UPstream::commsStruct::operator!=(const commsStruct& comm) const
|
||||
|
||||
Foam::Ostream& Foam::operator<<(Ostream& os, const UPstream::commsStruct& comm)
|
||||
{
|
||||
os << comm.above() << nl << token::SPACE << token::SPACE;
|
||||
comm.below().writeList(os) << nl << token::SPACE << token::SPACE;
|
||||
comm.allBelow().writeList(os) << nl << token::SPACE << token::SPACE;
|
||||
comm.allNotBelow().writeList(os);
|
||||
os << comm.above() << nl;
|
||||
os << " "; comm.below().writeList(os) << nl;
|
||||
os << " "; comm.allBelow().writeList(os) << nl;
|
||||
os << " "; comm.allNotBelow().writeList(os);
|
||||
|
||||
os.check(FUNCTION_NAME);
|
||||
return os;
|
||||
|
@ -2110,21 +2110,33 @@ void Foam::argList::parse
|
||||
Info<< " (" << UPstream::nProcs() << " ranks, "
|
||||
<< UPstream::numNodes() << " nodes)" << nl;
|
||||
|
||||
Info<< " floatTransfer : "
|
||||
<< Switch::name(UPstream::floatTransfer) << nl
|
||||
<< " maxCommsSize : "
|
||||
<< UPstream::maxCommsSize << nl
|
||||
<< " nProcsSimpleSum : "
|
||||
<< UPstream::nProcsSimpleSum << nl
|
||||
<< " nonBlockingExchange: "
|
||||
<< UPstream::nProcsNonblockingExchange
|
||||
<< " (tuning: " << UPstream::tuning_NBX_ << ')' << nl
|
||||
<< " exchange algorithm : "
|
||||
<< PstreamBuffers::algorithm << nl
|
||||
<< " commsType : "
|
||||
<< UPstream::commsTypeNames[UPstream::defaultCommsType] << nl
|
||||
<< " polling iterations : "
|
||||
<< UPstream::nPollProcInterfaces << nl;
|
||||
if (UPstream::floatTransfer)
|
||||
{
|
||||
Info<< " floatTransfer : enabled" << nl;
|
||||
}
|
||||
if (UPstream::maxCommsSize)
|
||||
{
|
||||
Info<< " maxCommsSize : "
|
||||
<< UPstream::maxCommsSize << nl;
|
||||
}
|
||||
if (UPstream::nProcsSimpleSum > 2)
|
||||
{
|
||||
Info<< " nProcsSimpleSum : "
|
||||
<< UPstream::nProcsSimpleSum << nl;
|
||||
}
|
||||
{
|
||||
const auto& commsType =
|
||||
UPstream::commsTypeNames[UPstream::defaultCommsType];
|
||||
|
||||
Info<< " nonBlockingExchange: "
|
||||
<< UPstream::nProcsNonblockingExchange
|
||||
<< " (tuning: " << UPstream::tuning_NBX_ << ')' << nl
|
||||
<< " exchange algorithm : "
|
||||
<< PstreamBuffers::algorithm << nl
|
||||
<< " commsType : " << commsType << nl
|
||||
<< " polling iterations : "
|
||||
<< UPstream::nPollProcInterfaces << nl;
|
||||
}
|
||||
|
||||
if (UPstream::allWorlds().size() > 1)
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user