FIX: redistributePar problems with lagrangian

- the fileHandler changes included setting cacheLevel(0) to avoid
  blocking with redistributePar. However, this meant if clouds
  were not uniformly present on all ranks the fileHandler would follow
  different code paths and lead to blocking.

  Now switch to distributed mode for the lagrangian operations within
  redistributePar based on the cacheLevel information.

FIX: avoid triggering a false processor check in argList

- when redistributing to few ranks
This commit is contained in:
Mark Olesen 2023-12-20 14:59:36 +01:00
parent 88be9ef5c6
commit de133af526
6 changed files with 133 additions and 32 deletions

View File

@ -29,6 +29,7 @@ License
#include "ListOps.H"
#include "parLagrangianDistributor.H"
#include "passivePositionParticleCloud.H"
#include "fileOperation.H"
// * * * * * * * * * * * * * * Static Data Members * * * * * * * * * * * * * //
@ -76,9 +77,10 @@ void Foam::parLagrangianDistributor::findClouds
(
cloud::prefix,
mesh.time().timeName(),
mesh,
mesh.thisDb(),
IOobjectOption::MUST_READ,
IOobjectOption::NO_WRITE
IOobjectOption::NO_WRITE,
IOobjectOption::NO_REGISTER
);
// Using the fileHandler:
@ -110,9 +112,10 @@ void Foam::parLagrangianDistributor::findClouds
Pstream::combineReduce(cloudNames, ListOps::uniqueEqOp<word>());
Foam::sort(cloudNames); // Consistent order
const label nClouds = cloudNames.size();
// See which of the global cloudNames I have
haveClouds.resize_nocopy(cloudNames.size());
haveClouds.resize_nocopy(nClouds);
haveClouds = false;
for (const fileName& localCloudName : localCloudDirs)
@ -125,17 +128,21 @@ void Foam::parLagrangianDistributor::findClouds
}
// Collect fields per cloud
objectNames.resize(cloudNames.size());
objectNames.resize_nocopy(nClouds);
for (const fileName& localCloudName : localCloudDirs)
for (label cloudi = 0; cloudi < nClouds; ++cloudi)
{
objectNames[cloudi].clear();
if (!haveClouds[cloudi]) continue;
// Do local scan for valid cloud objects
const bool oldParRun = UPstream::parRun(false);
IOobjectList localObjs
(
mesh,
mesh.time().timeName(),
cloud::prefix/localCloudName
cloud::prefix/cloudNames[cloudi]
);
UPstream::parRun(oldParRun);
@ -152,9 +159,6 @@ void Foam::parLagrangianDistributor::findClouds
if (isCloud)
{
// Has coordinates/positions - so must be a valid cloud
const label cloudi = cloudNames.find(localCloudName);
objectNames[cloudi] = localObjs.sortedNames();
}
}
@ -333,9 +337,24 @@ Foam::parLagrangianDistributor::distributeLagrangianPositions
const word& cloudName
) const
{
// Load cloud and send particle
// Mixed exists/missing on various ranks?
// Avoid masterRead+broadcast (can cause blocking)
auto& handler = Foam::fileHandler();
const bool oldDistributed =
handler.distributed
(
!fileOperation::cacheLevel() || handler.distributed()
);
// Load cloud
passivePositionParticleCloud lpi(srcMesh_, cloudName, false);
// Restore distributed flag
handler.distributed(oldDistributed);
// Distribute particles to other ranks
return distributeLagrangianPositions(lpi);
}

View File

@ -34,6 +34,7 @@ Description
#include "parLagrangianDistributor.H"
#include "unmappedPassivePositionParticleCloud.H"
#include "fileOperation.H"
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
@ -59,6 +60,17 @@ readLagrangian
(void)mesh.tetBasePtIs();
}
// Mixed exists/missing on various ranks?
// Avoid masterRead+broadcast (can cause blocking)
auto& handler = Foam::fileHandler();
const bool oldDistributed =
handler.distributed
(
!fileOperation::cacheLevel() || handler.distributed()
);
// Setup clouds
forAll(cloudNames, i)
{
@ -88,6 +100,9 @@ readLagrangian
);
}
// Restore distributed flag
handler.distributed(oldDistributed);
return clouds;
}
@ -164,6 +179,16 @@ void reconstructLagrangian
}
const auto& distributor = *distributorPtr;
// Mixed exists/missing on various ranks?
// Avoid masterRead+broadcast (can cause blocking)
auto& handler = Foam::fileHandler();
const bool oldDistributed =
handler.distributed
(
!fileOperation::cacheLevel() || handler.distributed()
);
forAll(cloudNames, cloudi)
{
const word& cloudName = cloudNames[cloudi];
@ -171,6 +196,12 @@ void reconstructLagrangian
Info<< "Reconstructing lagrangian fields for cloud "
<< cloudName << nl << endl;
autoPtr<mapDistributeBase> lagrangianMapPtr =
distributor.distributeLagrangianPositions
(
cloudName
);
IOobjectList cloudObjs
(
mesh,
@ -178,12 +209,6 @@ void reconstructLagrangian
cloud::prefix/cloudName
);
autoPtr<mapDistributeBase> lagrangianMapPtr =
distributor.distributeLagrangianPositions
(
cloudName
);
distributor.distributeAllFields
(
lagrangianMapPtr(),
@ -193,6 +218,9 @@ void reconstructLagrangian
selectedFields
);
}
// Restore distributed flag
handler.distributed(oldDistributed);
}

View File

@ -1440,24 +1440,25 @@ int main(int argc, char *argv[])
else
{
// Directory does not exist. If this happens on master -> decompose mode
if (UPstream::master() && !reconstruct)
if (UPstream::master() && !reconstruct && !decompose)
{
decompose = true;
InfoOrPout
<< "No processor directories; switching on decompose mode"
<< nl << endl;
<< nl << endl;
}
}
// If master changed to decompose mode make sure all nodes know about it
Pstream::broadcast(decompose);
if (decompose)
{
// The UPstream::nProcs is either the source or destination procs
fileOperation::nProcsFilter(UPstream::nProcs());
InfoOrPout<< "Switching to exact matching for "
<< fileOperation::processorsBaseDir + Foam::name(UPstream::nProcs())
<< " processor directories"
<< nl << endl;
// The UPstream::nProcs is either the source or destination procs
fileOperation::nProcsFilter(UPstream::nProcs());
InfoOrPout
<< "Switching to exact matching for "
<< fileOperation::processorsBaseDir + Foam::name(UPstream::nProcs())
<< " processor directories"
<< nl << endl;
}

View File

@ -1711,21 +1711,21 @@ void Foam::argList::parse
}
if (nProcDirs != Pstream::nProcs())
if (nProcDirs < UPstream::nProcs())
{
FatalError
<< "number of processor directories = "
<< nProcDirs
<< " is not equal to the number of processors = "
<< Pstream::nProcs()
<< UPstream::nProcs()
<< exit(FatalError);
}
}
// Distribute the master's argument list (unaltered)
for (const int subproci : Pstream::subProcs())
for (const int proci : UPstream::subProcs())
{
OPstream toProc(Pstream::commsTypes::scheduled, subproci);
OPstream toProc(UPstream::commsTypes::scheduled, proci);
toProc
<< args_ << options_

View File

@ -3,6 +3,10 @@ cd "${0%/*}" || exit # Run from this directory
. ${WM_PROJECT_DIR:?}/bin/tools/RunFunctions # Tutorial run functions
#------------------------------------------------------------------------------
fileHandler="-fileHandler collated"
unset fileHandler
## decompDict5="-decomposeParDict system/decomposeParDict.5"
# Create mesh
runApplication blockMesh
@ -18,12 +22,29 @@ runApplication createBaffles -overwrite
runApplication $(getApplication)
#- RedistributePar to do decomposition
runParallel redistributePar -decompose -cellDist
runParallel redistributePar -decompose -cellDist $fileHandler
#- Continue running for a bit more
runParallel -s parallel $(getApplication)
runParallel -s parallel $(getApplication) $fileHandler
#- Reconstruct all times
runParallel -s 1 redistributePar -reconstruct
if :
then
#- Reconstruct all times
runParallel -s reconstruct \
redistributePar -reconstruct $fileHandler
else
# Not yet entirely working...
#- Send to more ranks
runParallel -s more-ranks $decompDict5 redistributePar $fileHandler
#- Continue running for a bit more
runParallel -s more-ranks $decompDict5 $(getApplication) $fileHandler
#- Reconstruct all times
runParallel -s reconstruct $decompDict5 \
redistributePar -reconstruct $fileHandler -latestTime
fi
#------------------------------------------------------------------------------

View File

@ -0,0 +1,32 @@
/*--------------------------------*- C++ -*----------------------------------*\
| ========= | |
| \\ / F ield | OpenFOAM: The Open Source CFD Toolbox |
| \\ / O peration | Version: v2312 |
| \\ / A nd | Website: www.openfoam.com |
| \\/ M anipulation | |
\*---------------------------------------------------------------------------*/
FoamFile
{
version 2.0;
format ascii;
class dictionary;
object decomposeParDict;
}
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
numberOfSubdomains 5;
method scotch;
constraints
{
//- Keep owner and neighbour on same processor for faces in zones:
faces
{
type preserveFaceZones;
zones (cycLeft cycRight);
}
}
// ************************************************************************* //