From 3852f7c5b764bfb9b0373810c35055548344ed01 Mon Sep 17 00:00:00 2001 From: Mark Olesen Date: Fri, 7 Feb 2025 15:50:47 +0100 Subject: [PATCH] ENH: add node-based globalIndex::gather (topology-aware handling) --- .../globalIndex/globalIndexTemplates.C | 85 ++++++++++++++++--- 1 file changed, 71 insertions(+), 14 deletions(-) diff --git a/src/OpenFOAM/parallel/globalIndex/globalIndexTemplates.C b/src/OpenFOAM/parallel/globalIndex/globalIndexTemplates.C index 8c7d83f7ef..6c3c46397d 100644 --- a/src/OpenFOAM/parallel/globalIndex/globalIndexTemplates.C +++ b/src/OpenFOAM/parallel/globalIndex/globalIndexTemplates.C @@ -136,15 +136,15 @@ Foam::List Foam::globalIndex::listGatherValues // low-level: no parRun guard? const int masterProci = (procIDs.empty() ? 0 : procIDs[0]); - List allValues; - // if (!UPstream::is_parallel(comm)) // { - // allValues.resize(1); + // List allValues(1); // allValues[0] = localValue; // return allValues; // } + List allValues; + // Cannot use non-blocking for non-contiguous data if constexpr (!is_contiguous_v) { @@ -241,9 +241,9 @@ void Foam::globalIndex::gather for (label i = 1; i < procIDs.size(); ++i) { - SubList procSlot(allFld, off[i+1]-off[i], off[i]); + SubList slot(allFld, off[i+1]-off[i], off[i]); - if (procSlot.empty()) + if (slot.empty()) { // Nothing to do } @@ -253,14 +253,14 @@ void Foam::globalIndex::gather ( commsType, procIDs[i], - procSlot, + slot, tag, comm ); } else { - IPstream::recv(procSlot, procIDs[i], tag, comm); + IPstream::recv(slot, procIDs[i], tag, comm); } } @@ -361,15 +361,15 @@ void Foam::globalIndex::gather for (label i = 1; i < procIDs.size(); ++i) { - SubList procSlot(allFld, off[i+1]-off[i], off[i]); + SubList slot(allFld, off[i+1]-off[i], off[i]); - if (procSlot.empty()) + if (slot.empty()) { // Nothing to do } else { - IPstream::recv(procSlot, procIDs[i], tag, comm); + IPstream::recv(slot, procIDs[i], tag, comm); } } @@ -498,6 +498,7 @@ void Foam::globalIndex::gather allData.clear(); // zero-size on non-master } + if (!UPstream::usingNodeComms(comm)) { globalIndex::gather ( @@ -510,6 +511,62 @@ void Foam::globalIndex::gather commsType ); } + else + { + // Using node-based hierarchy + + // Using comm-world and have node communication active + const auto interNodeComm = UPstream::commInterNode(); + const auto localNodeComm = UPstream::commLocalNode(); + + // Stage 0 : The inter-node/intra-node offsets + labelList interNodeOffsets; + labelList localNodeOffsets; + this->splitNodeOffsets(interNodeOffsets, localNodeOffsets, comm); + + // The first node re-uses the output (allData) when collecting + // content. All other nodes require temporary node-local storage. + + List tmpNodeData; + if (UPstream::is_subrank(interNodeComm)) + { + tmpNodeData.resize(localNodeOffsets.back()); + } + + List& nodeData = + ( + UPstream::master(interNodeComm) ? allData : tmpNodeData + ); + + // Stage 1 : Gather data within the node + { + globalIndex::gather + ( + localNodeOffsets, // (master only) + localNodeComm, + UPstream::allProcs(localNodeComm), + sendData, + nodeData, // node-local dest (or the allData parameter) + tag, + commsType + ); + } + + // Stage 2 : Gather data between nodes + if (UPstream::is_rank(interNodeComm)) + { + globalIndex::gather + ( + interNodeOffsets, // (master only) + interNodeComm, + UPstream::allProcs(interNodeComm), + nodeData, + allData, + tag, + commsType + ); + } + } } @@ -1029,9 +1086,9 @@ void Foam::globalIndex::scatter { for (label i = 1; i < procIDs.size(); ++i) { - const SubList procSlot(allFld, off[i+1]-off[i], off[i]); + const SubList slot(allFld, off[i+1]-off[i], off[i]); - if (procSlot.empty()) + if (slot.empty()) { // Nothing to do } @@ -1041,14 +1098,14 @@ void Foam::globalIndex::scatter ( commsType, procIDs[i], - procSlot, + slot, tag, comm ); } else { - OPstream::send(procSlot, commsType, procIDs[i], tag, comm); + OPstream::send(slot, commsType, procIDs[i], tag, comm); } }