ENH: improve robustness of raw reading, file size checks

- use ignore instead of seekg/tellg to swallow input (robuster)

- check for bad gcount() values

- wrap Foam::fileSize() compressed/uncompressed handling into IFstream.

- improve handling of compressed files in masterUncollatedFileOperation.
  Previously read into a string via stream iterators.
  Now read chunk-wise into a List of char for fewer reallocations.
This commit is contained in:
Mark Olesen 2023-08-29 12:27:36 +02:00 committed by Andrew Heather
parent a341d09afc
commit 459aaad0f9
16 changed files with 318 additions and 144 deletions

View File

@ -5,7 +5,7 @@
\\ / A nd | www.openfoam.com
\\/ M anipulation |
-------------------------------------------------------------------------------
Copyright (C) 2020 OpenCFD Ltd.
Copyright (C) 2020-2023 OpenCFD Ltd.
-------------------------------------------------------------------------------
License
This file is part of OpenFOAM.
@ -33,6 +33,7 @@ Description
#include "argList.H"
#include "Fstream.H"
#include "OSspecific.H"
#include "etcFiles.H"
using namespace Foam;
@ -44,11 +45,14 @@ int main(int argc, char *argv[])
{
argList::noBanner();
argList::noParallel();
argList::noParallel();
argList::addOption("ignore", "file", "Test readRaw with ignore");
#include "setRootCase.H"
// Test with etc/controlDict (mandatory, from distribution)
if (!args.found("ignore"))
{
const fileName inputFile
(
@ -97,6 +101,43 @@ int main(int argc, char *argv[])
}
}
fileName testFile;
if (args.readIfPresent("ignore", testFile))
{
if (testFile.has_ext("gz"))
{
testFile.remove_ext();
Info<< "stripping extraneous .gz ending" << endl;
}
IFstream is(testFile);
auto& stdStream = is.stdStream();
List<char> buffer(1000);
Info<< "Test readRaw with: " << is.name()
<< " compressed:" << int(is.compression())
<< " file-size:" << is.fileSize() << nl;
for (int iter = 0; is.good() && iter < 1000; ++iter)
{
Info<< "iter:" << iter;
if (iter % 2)
{
Info<< " [read] ";
is.readRaw(buffer.data(), buffer.size());
}
else
{
Info<< " [ignore]";
is.readRaw(nullptr, buffer.size() / 2);
}
Info<< " : " << stdStream.gcount() << endl;
}
}
Info<< "\nEnd\n" << endl;
return 0;
}

View File

@ -130,7 +130,7 @@ static List<char> slurpFile
// 66% compression = 3 iterations
// ...
const off_t inputSize = Foam::fileSize(pathname + ".gz");
const auto inputSize = Foam::fileSize(pathname + ".gz");
const uint64_t chunkSize =
(
@ -202,7 +202,7 @@ static List<char> slurpFile
}
else
{
const off_t inputSize = Foam::fileSize(pathname);
const auto inputSize = Foam::fileSize(pathname);
if (inputSize >= 0)
{

View File

@ -27,7 +27,7 @@ License
\*---------------------------------------------------------------------------*/
#include "IFstream.H"
#include "OSspecific.H"
#include "OSspecific.H" // For isFile(), fileSize()
// * * * * * * * * * * * * * * Static Data Members * * * * * * * * * * * * * //
@ -91,6 +91,45 @@ Foam::IFstream::IFstream
// * * * * * * * * * * * * * * * Member Functions * * * * * * * * * * * * * //
std::streamsize Foam::IFstream::fileSize() const
{
const std::istream* ptr = ifstreamPointer::get();
if (!ptr || this->name().empty())
{
return std::streamsize(-1);
}
off_t fileLen = -1;
if (IOstreamOption::COMPRESSED == ifstreamPointer::whichCompression())
{
fileLen = Foam::fileSize(this->name() + ".gz");
}
else
{
// TBD: special handing for wrapped icharstream
// if
// (
// const Foam::icharstream* charstr
// = dynamic_cast<const Foam::icharstream*>(ptr)>(ptr)
// )
// {
// return charstr->capacity();
// }
fileLen = Foam::fileSize(this->name());
}
if (fileLen >= 0)
{
return std::streamsize(fileLen);
}
return std::streamsize(-1);
}
std::istream& Foam::IFstream::stdStream()
{
std::istream* ptr = ifstreamPointer::get();

View File

@ -6,7 +6,7 @@
\\/ M anipulation |
-------------------------------------------------------------------------------
Copyright (C) 2011 OpenFOAM Foundation
Copyright (C) 2017-2021 OpenCFD Ltd.
Copyright (C) 2017-2023 OpenCFD Ltd.
-------------------------------------------------------------------------------
License
This file is part of OpenFOAM.
@ -94,6 +94,13 @@ public:
//- Read/write access to the name of the stream
using ISstream::name;
//- Return the size of the underlying file (-1 on error).
//- This corresponds to Foam::fileSize() but with extra handling of
//- compressed files.
// The return type is \c std::streamsize instead of \c off_t.
// \note Use sparingly since it involves a file stat()!
std::streamsize fileSize() const;
// STL stream

View File

@ -1051,20 +1051,7 @@ Foam::Istream& Foam::ISstream::readRaw(char* data, std::streamsize count)
}
else
{
// Forward seek
// - use absolute positioning (see C++ notes about std::ifstream)
is_.seekg(is_.tellg() + std::istream::pos_type(count));
// Not sure if this is needed (as per rewind)
// some documentation indicates that ifstream needs
// seekg with values from a tellg
//
// stdStream().rdbuf()->pubseekpos
// (
// count,
// std::ios_base::seekdir::cur,
// std::ios_base::in
// );
is_.ignore(count);
}
}
syncState();
@ -1102,8 +1089,11 @@ void Foam::ISstream::rewind()
stdStream().clear(); // Clear the iostate error state flags
setGood(); // Sync local copy of iostate
// pubseekpos() rather than seekg() so that it works with gzstream
stdStream().rdbuf()->pubseekpos(0, std::ios_base::in);
// NOTE: this form of rewind does not work with igzstream.
// However, igzstream is usually wrapped as IFstream which has its
// own dedicated rewind treatment for igzstream.
}

View File

@ -76,9 +76,10 @@ inline int Foam::ISstream::peek()
inline Foam::ISstream& Foam::ISstream::getLine(std::string& str, char delim)
{
std::getline(is_, str, delim);
std::streamsize count = is_.gcount();
syncState();
if (delim == '\n')
if (delim == '\n' && count > 0)
{
++lineNumber_;
}
@ -90,11 +91,10 @@ inline Foam::ISstream& Foam::ISstream::getLine(std::string& str, char delim)
inline std::streamsize Foam::ISstream::getLine(std::nullptr_t, char delim)
{
is_.ignore(std::numeric_limits<std::streamsize>::max(), delim);
std::streamsize count = is_.gcount();
syncState();
std::streamsize count = is_.gcount();
if (delim == '\n' && count)
if (delim == '\n' && count > 0)
{
++lineNumber_;
}

View File

@ -73,7 +73,11 @@ static void broadcastFile_single
if (UPstream::master(comm))
{
// Read (see newIFstream)
lengthAndMode.first() = Foam::fileSize(srcName);
auto fileLen = Foam::fileSize(srcName);
if (fileLen > 0)
{
lengthAndMode.first() = uint64_t(fileLen);
}
lengthAndMode.second() = Foam::mode(srcName);
srcStream.reset

View File

@ -83,6 +83,101 @@ namespace fileOperations
}
// * * * * * * * * * * * * * * * Local Functions * * * * * * * * * * * * * * //
namespace Foam
{
// Get file contents (compressed or uncompressed)
static DynamicList<char> slurpFile(IFstream& ifs)
{
DynamicList<char> buffer;
auto& iss = ifs.stdStream();
const auto inputSize = ifs.fileSize();
if (IOstreamOption::COMPRESSED == ifs.compression())
{
// For compressed files, no idea how large the result will be.
// So read chunk-wise.
// Using the compressed size for the chunk size:
// 50% compression = 2 iterations
// 66% compression = 3 iterations
// ...
const uint64_t chunkSize =
(
(inputSize <= 1024)
? uint64_t(4096)
: uint64_t(2*inputSize)
);
uint64_t beg = 0;
for (int iter = 1; iter < 100000; ++iter)
{
// Manual resizing to use incremental vs doubling
buffer.setCapacity(label(iter * chunkSize));
buffer.resize(buffer.capacity());
ifs.readRaw(buffer.data() + beg, chunkSize);
const std::streamsize nread = iss.gcount();
if
(
nread < 0
|| nread == std::numeric_limits<std::streamsize>::max()
)
{
// Failed, but treat as normal 'done'
buffer.resize(label(beg));
break;
}
else
{
beg += uint64_t(nread);
if (nread >= 0 && uint64_t(nread) < chunkSize)
{
// normalExit = true;
buffer.resize(label(beg));
break;
}
}
}
}
else
{
if (inputSize >= 0)
{
buffer.setCapacity(label(inputSize));
buffer.resize(buffer.capacity());
ifs.readRaw(buffer.data(), buffer.size_bytes());
const std::streamsize nread = iss.gcount();
if
(
nread < 0
|| nread == std::numeric_limits<std::streamsize>::max()
)
{
// Failed, but treat as normal 'done'
buffer.clear();
}
else
{
buffer.resize(label(nread)); // Safety
}
}
}
return buffer;
}
} // End namespace Foam
// * * * * * * * * * * * * * Private Member Functions * * * * * * * * * * * //
Foam::word
@ -447,57 +542,26 @@ void Foam::fileOperations::masterUncollatedFileOperation::readAndSend
if (debug)
{
Pout<< "masterUncollatedFileOperation::readAndSend :"
Info<< "masterUncollatedFileOperation::readAndSend :"
<< " compressed:" << bool(ifs.compression()) << " "
<< filePath << endl;
}
if (ifs.compression() == IOstreamOption::COMPRESSED)
{
// Could use Foam::fileSize, estimate uncompressed size (eg, 2x)
// and then string reserve followed by string assign...
// Uncompress and read file contents into a character buffer
const std::string buf
(
std::istreambuf_iterator<char>(ifs.stdStream()),
std::istreambuf_iterator<char>()
);
// Read file contents (compressed or uncompressed) into a character buffer
DynamicList<char> buf(slurpFile(ifs));
for (const label proci : recvProcs)
{
UOPstream os(proci, pBufs);
os.write(buf.data(), buf.length());
os.write(buf.cdata_bytes(), buf.size_bytes());
}
if (debug)
{
Pout<< "masterUncollatedFileOperation::readStream :"
Info<< "masterUncollatedFileOperation::readStream :"
<< " From " << filePath << " sent " << buf.size()
<< " bytes" << endl;
}
}
else
{
const off_t count(Foam::fileSize(filePath));
// Read file contents into a character buffer
List<char> buf(static_cast<label>(count));
ifs.stdStream().read(buf.data(), count);
for (const label proci : recvProcs)
{
UOPstream os(proci, pBufs);
os.write(buf.cdata(), count);
}
if (debug)
{
Pout<< "masterUncollatedFileOperation::readStream :"
<< " From " << filePath << " sent " << buf.size()
<< " bytes" << endl;
}
}
}

View File

@ -50,12 +50,10 @@ Foam::ensightReadFile::detectBinaryHeader(const fileName& pathname)
// Binary string is *exactly* 80 characters
string buf(size_t(80), '\0');
iss.read(&buf[0], 80);
const std::streamsize gcount = iss.gcount();
buf.erase(gcount <= 0 ? 0 : gcount); // Truncated?
if (!iss)
{
// Truncated?
buf.erase(iss.gcount());
}
// Could exit on truncated input, but no real advantage
// Truncate at the first embedded '\0'
const auto endp = buf.find('\0');
@ -119,15 +117,13 @@ Foam::Istream& Foam::ensightReadFile::read(string& value)
// Binary string is *exactly* 80 characters
value.resize(80, '\0');
iss.read(&value[0], 80);
const std::streamsize gcount = iss.gcount();
value.erase(gcount <= 0 ? 0 : gcount); // Truncated?
// Could exit on truncated input, but no real advantage
syncState();
if (!iss)
{
// Truncated - could also exit here, but no real advantage
value.erase(iss.gcount());
}
// Truncate at the first embedded '\0'
auto endp = value.find('\0');

View File

@ -112,8 +112,8 @@ public:
// Constructors
//- From input stream and the approximate number of vertices in the STL
inline STLAsciiParse(const label approxNpoints);
//- Construct with the estimated number of triangles in the STL
inline STLAsciiParse(const label nTrisEstimated);
// Member Functions

View File

@ -6,7 +6,7 @@
\\/ M anipulation |
-------------------------------------------------------------------------------
Copyright (C) 2011-2016 OpenFOAM Foundation
Copyright (C) 2016-2018 OpenCFD Ltd.
Copyright (C) 2016-2023 OpenCFD Ltd.
-------------------------------------------------------------------------------
License
This file is part of OpenFOAM.
@ -89,10 +89,10 @@ public:
// Constructors
//- From input stream and the approximate number of vertices in the STL
STLAsciiParseFlex(istream* is, const label approxNpoints)
//- From input stream, with the estimated number of triangles in the STL
STLAsciiParseFlex(istream* is, const label nTrisEstimated)
:
Detail::STLAsciiParse(approxNpoints),
Detail::STLAsciiParse(nTrisEstimated),
yySTLFlexLexer(is)
{}
@ -306,9 +306,8 @@ endsolid {space}("endsolid"|"ENDSOLID")({some_space}{word})*
%%
//
// Member Function
//
// * * * * * * * * * * * * * * * Member Functions * * * * * * * * * * * * * //
bool Foam::fileFormats::STLReader::readAsciiFlex
(
const fileName& filename
@ -322,8 +321,20 @@ bool Foam::fileFormats::STLReader::readAsciiFlex
<< exit(FatalError);
}
// Create with approx number of vertices in the STL (from file size)
STLAsciiParseFlex lexer(&(is.stdStream()), Foam::fileSize(filename)/400);
// Create with estimated number of triangles in the STL.
// 180 bytes / triangle. For simplicity, ignore compression
const auto fileLen = is.fileSize();
const label nTrisEstimated =
(
(fileLen > 0)
? max(label(100), label(fileLen/180))
: label(100)
);
STLAsciiParseFlex lexer(&(is.stdStream()), nTrisEstimated);
lexer.execute();
transfer(lexer);
@ -331,6 +342,5 @@ bool Foam::fileFormats::STLReader::readAsciiFlex
return true;
}
/* ------------------------------------------------------------------------ *\
------ End of STLReaderASCII.L
\* ------------------------------------------------------------------------ */
// ************************************************************************* //

View File

@ -131,15 +131,15 @@ inline void Foam::Detail::STLAsciiParse::endFacet()
// * * * * * * * * * * * * * * * * Constructors * * * * * * * * * * * * * * //
inline Foam::Detail::STLAsciiParse::STLAsciiParse(const label approxNpoints)
inline Foam::Detail::STLAsciiParse::STLAsciiParse(const label nTrisEstimated)
:
sorted_(true),
groupId_(-1),
lineNum_(1),
nFacetPoints_(0),
nVertexCmpt_(0),
points_(approxNpoints),
facets_(approxNpoints/2)
points_(3*nTrisEstimated),
facets_(nTrisEstimated)
{}

View File

@ -5,7 +5,7 @@
\\ / A nd | www.openfoam.com
\\/ M anipulation |
-------------------------------------------------------------------------------
Copyright (C) 2018 OpenCFD Ltd.
Copyright (C) 2018-2023 OpenCFD Ltd.
-------------------------------------------------------------------------------
License
This file is part of OpenFOAM.
@ -30,7 +30,6 @@ Description
#include "STLAsciiParse.H"
#include "STLReader.H"
#include "OSspecific.H"
#include "stringOps.H"
// * * * * * * * * * * * * * * * Local Functions * * * * * * * * * * * * * * //
@ -127,10 +126,10 @@ class STLAsciiParseManual
public:
//- From input stream and the approximate number of vertices in the STL
STLAsciiParseManual(const label approxNpoints)
//- Construct with the estimated number of triangles in the STL
STLAsciiParseManual(const label nTrisEstimated)
:
Detail::STLAsciiParse(approxNpoints)
Detail::STLAsciiParse(nTrisEstimated)
{}
//- Execute parser
@ -179,7 +178,7 @@ void Foam::Detail::STLAsciiParseManual::execute(std::istream& is)
is.read(data, buflen);
const std::streamsize gcount = is.gcount();
if (!gcount)
if (gcount <= 0)
{
// EOF
// If scanning for next "solid" this is a valid way to exit, but
@ -398,11 +397,8 @@ void Foam::Detail::STLAsciiParseManual::execute(std::istream& is)
}
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
// * * * * * * * * * * * * * * * Member Functions * * * * * * * * * * * * * //
//
// Member Function
//
bool Foam::fileFormats::STLReader::readAsciiManual
(
const fileName& filename
@ -416,8 +412,20 @@ bool Foam::fileFormats::STLReader::readAsciiManual
<< exit(FatalError);
}
// Create with the approximate number of vertices in the STL from file size
Detail::STLAsciiParseManual lexer(Foam::fileSize(filename)/400);
// Create with estimated number of triangles in the STL.
// 180 bytes / triangle. For simplicity, ignore compression
const auto fileLen = is.fileSize();
const label nTrisEstimated =
(
(fileLen > 0)
? max(label(100), label(fileLen/180))
: label(100)
);
Detail::STLAsciiParseManual lexer(nTrisEstimated);
lexer.execute(is.stdStream());
transfer(lexer);

View File

@ -7,7 +7,7 @@
\\ / A nd | www.openfoam.com
\\/ M anipulation |
-------------------------------------------------------------------------------
Copyright (C) 2018 OpenCFD Ltd.
Copyright (C) 2018-2023 OpenCFD Ltd.
-------------------------------------------------------------------------------
License
This file is part of OpenFOAM.
@ -35,7 +35,6 @@ Description
#include "STLAsciiParse.H"
#include "STLReader.H"
#include "OSspecific.H"
#pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
#pragma GCC diagnostic ignored "-Wunused-const-variable"
@ -69,6 +68,7 @@ Description
// - Only look for initial 'facet '. Ignore 'normal ...'
// - Ignore name for 'endsolid'
//
// ------------------------------------------------------------------------- //
// Ragel machine definition
// Ragel variables (p, pe, eof, cs, top, stack, ts, te, act) defined later...
@ -121,10 +121,10 @@ class STLAsciiParseRagel
public:
//- From input stream and the approximate number of vertices in the STL
STLAsciiParseRagel(const label approxNpoints)
//- From input stream, with the estimated number of triangles in the STL
STLAsciiParseRagel(const label nTrisEstimated)
:
Detail::STLAsciiParse(approxNpoints)
Detail::STLAsciiParse(nTrisEstimated)
{}
//- Execute lexer
@ -184,7 +184,7 @@ void Foam::Detail::STLAsciiParseRagel::execute(std::istream& is)
is.read(data, buflen);
const std::streamsize gcount = is.gcount();
if (!gcount)
if (gcount <= 0)
{
break;
}
@ -2970,9 +2970,7 @@ void Foam::Detail::STLAsciiParseRagel::die
const char *pe
) const
{
auto error = FatalErrorInFunction;
error
FatalErrorInFunction
<< nl
<< "Parsing error at or near line " << lineNum_
<<", while parsing for " << what << nl
@ -2984,22 +2982,19 @@ void Foam::Detail::STLAsciiParseRagel::die
for (unsigned i=0; i < 80; ++i)
{
if (*parsing == '\n' || parsing == pe) break;
error << *parsing;
FatalError << *parsing;
++parsing;
}
}
error
FatalError
<< "'\n"
<< exit(FatalError);
}
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
// * * * * * * * * * * * * * * * Member Functions * * * * * * * * * * * * * //
//
// Member Function
//
bool Foam::fileFormats::STLReader::readAsciiRagel
(
const fileName& filename
@ -3013,8 +3008,20 @@ bool Foam::fileFormats::STLReader::readAsciiRagel
<< exit(FatalError);
}
// Create with approx number of vertices in the STL (from file size)
Detail::STLAsciiParseRagel lexer(Foam::fileSize(filename)/400);
// Create with estimated number of triangles in the STL.
// 180 bytes / triangle. For simplicity, ignore compression
const auto fileLen = is.fileSize();
const label nTrisEstimated =
(
(fileLen > 0)
? max(label(100), label(fileLen/180))
: label(100)
);
Detail::STLAsciiParseRagel lexer(nTrisEstimated);
lexer.execute(is.stdStream());
transfer(lexer);

View File

@ -5,7 +5,7 @@
\\ / A nd | www.openfoam.com
\\/ M anipulation |
-------------------------------------------------------------------------------
Copyright (C) 2018 OpenCFD Ltd.
Copyright (C) 2018-2023 OpenCFD Ltd.
-------------------------------------------------------------------------------
License
This file is part of OpenFOAM.
@ -33,7 +33,6 @@ Description
#include "STLAsciiParse.H"
#include "STLReader.H"
#include "OSspecific.H"
#pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
#pragma GCC diagnostic ignored "-Wunused-const-variable"
@ -67,6 +66,7 @@ Description
// - Only look for initial 'facet '. Ignore 'normal ...'
// - Ignore name for 'endsolid'
//
// ------------------------------------------------------------------------- //
// Ragel machine definition
// Ragel variables (p, pe, eof, cs, top, stack, ts, te, act) defined later...
@ -176,10 +176,10 @@ class STLAsciiParseRagel
public:
//- From input stream and the approximate number of vertices in the STL
STLAsciiParseRagel(const label approxNpoints)
//- From input stream, with the estimated number of triangles in the STL
STLAsciiParseRagel(const label nTrisEstimated)
:
Detail::STLAsciiParse(approxNpoints)
Detail::STLAsciiParse(nTrisEstimated)
{}
//- Execute lexer
@ -232,7 +232,7 @@ void Foam::Detail::STLAsciiParseRagel::execute(std::istream& is)
is.read(data, buflen);
const std::streamsize gcount = is.gcount();
if (!gcount)
if (gcount <= 0)
{
break;
}
@ -303,9 +303,7 @@ void Foam::Detail::STLAsciiParseRagel::die
const char *pe
) const
{
auto error = FatalErrorInFunction;
error
FatalErrorInFunction
<< nl
<< "Parsing error at or near line " << lineNum_
<<", while parsing for " << what << nl
@ -317,22 +315,19 @@ void Foam::Detail::STLAsciiParseRagel::die
for (unsigned i=0; i < 80; ++i)
{
if (*parsing == '\n' || parsing == pe) break;
error << *parsing;
FatalError << *parsing;
++parsing;
}
}
error
FatalError
<< "'\n"
<< exit(FatalError);
}
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
// * * * * * * * * * * * * * * * Member Functions * * * * * * * * * * * * * //
//
// Member Function
//
bool Foam::fileFormats::STLReader::readAsciiRagel
(
const fileName& filename
@ -346,8 +341,20 @@ bool Foam::fileFormats::STLReader::readAsciiRagel
<< exit(FatalError);
}
// Create with approx number of vertices in the STL (from file size)
Detail::STLAsciiParseRagel lexer(Foam::fileSize(filename)/400);
// Create with estimated number of triangles in the STL.
// 180 bytes / triangle. For simplicity, ignore compression
const auto fileLen = is.fileSize();
const label nTrisEstimated =
(
(fileLen > 0)
? max(label(100), label(fileLen/180))
: label(100)
);
Detail::STLAsciiParseRagel lexer(nTrisEstimated);
lexer.execute(is.stdStream());
transfer(lexer);

View File

@ -5,7 +5,7 @@
\\ / A nd | www.openfoam.com
\\/ M anipulation |
-------------------------------------------------------------------------------
Copyright (C) 2018-2022 OpenCFD Ltd.
Copyright (C) 2018-2023 OpenCFD Ltd.
-------------------------------------------------------------------------------
License
This file is part of OpenFOAM.
@ -645,7 +645,8 @@ Foam::label Foam::vtk::seriesWriter::scan
header.resize(1024);
is.read(&(header.front()), header.size());
header.resize(is.gcount());
const std::streamsize gcount = is.gcount();
header.erase(gcount <= 0 ? 0 : gcount);
// DebugInfo
// << "got header:\n=====\n" << header << "\n=====\n" << nl;