ENH: wordRes::uniq() removes all duplicates

- previously just removed duplicate literals, but now remove any
  duplicates.

- Replace previous wordHashSet implementation with a linear search
  instead. The lists are normally fairly small and mostly just have
  unique entries anyhow. This reduces the overall overhead.
This commit is contained in:
Mark Olesen 2019-02-14 11:03:04 +01:00 committed by Andrew Heather
parent 60c314150c
commit 95a33c2f68
3 changed files with 80 additions and 22 deletions

View File

@ -2,7 +2,7 @@
========= |
\\ / F ield | OpenFOAM: The Open Source CFD Toolbox
\\ / O peration |
\\ / A nd | Copyright (C) 2017-2018 OpenCFD Ltd.
\\ / A nd | Copyright (C) 2017-2019 OpenCFD Ltd.
\\/ M anipulation |
-------------------------------------------------------------------------------
| Copyright (C) 2011-2016 OpenFOAM Foundation
@ -36,6 +36,7 @@ Description
#include "keyType.H"
#include "wordRes.H"
#include "predicates.H"
#include "Random.H"
using namespace Foam;
@ -139,6 +140,34 @@ int main(int argc, char *argv[])
Info<< "string match: " << string("x.*")("xyz") << nl;
Info<< "string match: " << string("x.*")(keyre) << nl;
// Test uniq
{
Random rnd;
const label last = wres1.size()-1;
for (label i = 0; i < 8; ++i)
{
// Make a copy
wordRe wre(wres1[rnd.position<label>(0,last)]);
// Append
wres1.append(wre);
}
// Add some entropy
Foam::shuffle(wres1);
Info<< nl
<< "Test uniq on " << wres1
<< " == " << wordRes::uniq(wres1) << nl;
// Inplace
wres1.uniq();
Info<< nl << "Inplace: " << wres1 << nl;
}
Info<< nl;
wordRe(s1, wordRe::DETECT).info(Info) << nl;
wordRe(s2).info(Info) << nl;
wordRe(s2, wordRe::DETECT).info(Info) << nl;

View File

@ -2,7 +2,7 @@
========= |
\\ / F ield | OpenFOAM: The Open Source CFD Toolbox
\\ / O peration |
\\ / A nd | Copyright (C) 2016-2018 OpenCFD Ltd.
\\ / A nd | Copyright (C) 2016-2019 OpenCFD Ltd.
\\/ M anipulation |
-------------------------------------------------------------------------------
License
@ -24,26 +24,41 @@ License
\*---------------------------------------------------------------------------*/
#include "wordRes.H"
#include "HashSet.H"
// * * * * * * * * * * * * * Static Member Functions * * * * * * * * * * * * //
Foam::wordRes Foam::wordRes::uniq(const UList<wordRe>& input)
{
wordRes output(input.size());
wordHashSet uniqWord;
// Use linear List search instead of HashSet, since the lists are
// normally fairly small and mostly just have unique entries
// anyhow. This reduces the overall overhead.
List<bool> duplicate(input.size(), false); // Track duplicates
label count = 0;
for (const wordRe& select : input)
forAll(input, i)
{
if (select.isPattern() || uniqWord.insert(select))
const wordRe& val = input[i];
const label next = input.find(val, i+1);
if (next > i)
{
output[count] = select;
duplicate[next] = true; // Duplicate
}
if (!duplicate[i])
{
output[count] = val;
++count;
}
}
output.resize(count);
return output;
}
@ -52,23 +67,37 @@ Foam::wordRes Foam::wordRes::uniq(const UList<wordRe>& input)
void Foam::wordRes::uniq()
{
wordHashSet uniqWord;
List<wordRe> input = *this;
label i = 0, count = 0;
for (wordRe& select : *this)
wordRes& output = *this;
// Use linear List search instead of HashSet, since the lists are
// normally fairly small and mostly just have unique entries
// anyhow. This reduces the overall overhead.
List<bool> duplicate(input.size(), false); // Track duplicates
label count = 0;
forAll(input, i)
{
if (select.isPattern() || uniqWord.insert(select))
wordRe& val = input[i];
const label next = input.find(val, i+1);
if (next > i)
{
if (count != i)
{
(*this)[count] = std::move(select);
}
duplicate[next] = true; // Duplicate
}
if (!duplicate[i])
{
output[count] = std::move(val);
++count;
}
++i;
}
resize(count);
output.resize(count);
}

View File

@ -2,7 +2,7 @@
========= |
\\ / F ield | OpenFOAM: The Open Source CFD Toolbox
\\ / O peration |
\\ / A nd | Copyright (C) 2016-2018 OpenCFD Ltd.
\\ / A nd | Copyright (C) 2016-2019 OpenCFD Ltd.
\\/ M anipulation |
-------------------------------------------------------------------------------
License
@ -101,8 +101,8 @@ public:
//- Return a null wordRes - a reference to the NullObject
inline static const wordRes& null();
//- Return a wordRes with duplicate words filtered out.
// No filtering attempted on regular expressions.
//- Return a wordRes with duplicate entries filtered out.
// No distinction made between literals or regular expressions.
static wordRes uniq(const UList<wordRe>& input);
@ -118,8 +118,8 @@ public:
// Member Functions
//- Filter out duplicate words (inplace).
// No filtering attempted on regular expressions.
//- Filter out duplicate entries (inplace).
// No distinction made between literals or regular expressions.
void uniq();
//- Smart match as literal or regex, stopping on the first match.