From 459aaad0f9a727bc7bff0ab9910fe8229fe7c7e8 Mon Sep 17 00:00:00 2001 From: Mark Olesen Date: Tue, 29 Aug 2023 12:27:36 +0200 Subject: [PATCH] ENH: improve robustness of raw reading, file size checks - use ignore instead of seekg/tellg to swallow input (robuster) - check for bad gcount() values - wrap Foam::fileSize() compressed/uncompressed handling into IFstream. - improve handling of compressed files in masterUncollatedFileOperation. Previously read into a string via stream iterators. Now read chunk-wise into a List of char for fewer reallocations. --- applications/test/IFstream/Test-IFstream.C | 43 ++++- .../test/readBroadcast1/Test-readBroadcast1.C | 4 +- src/OpenFOAM/db/IOstreams/Fstreams/IFstream.C | 41 ++++- src/OpenFOAM/db/IOstreams/Fstreams/IFstream.H | 9 +- src/OpenFOAM/db/IOstreams/Sstreams/ISstream.C | 20 +-- .../db/IOstreams/Sstreams/ISstreamI.H | 8 +- .../fileOperation/fileOperationBroadcast.C | 6 +- .../masterUncollatedFileOperation.C | 150 +++++++++++++----- .../ensight/read/ensightReadFile.C | 18 +-- src/fileFormats/stl/STLAsciiParse.H | 4 +- src/fileFormats/stl/STLAsciiParseFlex.L | 34 ++-- src/fileFormats/stl/STLAsciiParseI.H | 6 +- src/fileFormats/stl/STLAsciiParseManual.C | 32 ++-- src/fileFormats/stl/STLAsciiParseRagel.cc | 41 +++-- src/fileFormats/stl/STLAsciiParseRagel.rl | 41 +++-- .../vtk/file/foamVtkSeriesWriter.C | 5 +- 16 files changed, 318 insertions(+), 144 deletions(-) diff --git a/applications/test/IFstream/Test-IFstream.C b/applications/test/IFstream/Test-IFstream.C index c55957109a..c3a193fd21 100644 --- a/applications/test/IFstream/Test-IFstream.C +++ b/applications/test/IFstream/Test-IFstream.C @@ -5,7 +5,7 @@ \\ / A nd | www.openfoam.com \\/ M anipulation | ------------------------------------------------------------------------------- - Copyright (C) 2020 OpenCFD Ltd. + Copyright (C) 2020-2023 OpenCFD Ltd. ------------------------------------------------------------------------------- License This file is part of OpenFOAM. @@ -33,6 +33,7 @@ Description #include "argList.H" #include "Fstream.H" +#include "OSspecific.H" #include "etcFiles.H" using namespace Foam; @@ -44,11 +45,14 @@ int main(int argc, char *argv[]) { argList::noBanner(); argList::noParallel(); + argList::noParallel(); + argList::addOption("ignore", "file", "Test readRaw with ignore"); #include "setRootCase.H" // Test with etc/controlDict (mandatory, from distribution) + if (!args.found("ignore")) { const fileName inputFile ( @@ -97,6 +101,43 @@ int main(int argc, char *argv[]) } } + fileName testFile; + if (args.readIfPresent("ignore", testFile)) + { + if (testFile.has_ext("gz")) + { + testFile.remove_ext(); + Info<< "stripping extraneous .gz ending" << endl; + } + + IFstream is(testFile); + auto& stdStream = is.stdStream(); + + List buffer(1000); + + Info<< "Test readRaw with: " << is.name() + << " compressed:" << int(is.compression()) + << " file-size:" << is.fileSize() << nl; + + for (int iter = 0; is.good() && iter < 1000; ++iter) + { + Info<< "iter:" << iter; + if (iter % 2) + { + Info<< " [read] "; + is.readRaw(buffer.data(), buffer.size()); + } + else + { + Info<< " [ignore]"; + is.readRaw(nullptr, buffer.size() / 2); + } + + Info<< " : " << stdStream.gcount() << endl; + } + } + + Info<< "\nEnd\n" << endl; return 0; } diff --git a/applications/test/readBroadcast1/Test-readBroadcast1.C b/applications/test/readBroadcast1/Test-readBroadcast1.C index 0cc407f61b..fc9821593a 100644 --- a/applications/test/readBroadcast1/Test-readBroadcast1.C +++ b/applications/test/readBroadcast1/Test-readBroadcast1.C @@ -130,7 +130,7 @@ static List slurpFile // 66% compression = 3 iterations // ... - const off_t inputSize = Foam::fileSize(pathname + ".gz"); + const auto inputSize = Foam::fileSize(pathname + ".gz"); const uint64_t chunkSize = ( @@ -202,7 +202,7 @@ static List slurpFile } else { - const off_t inputSize = Foam::fileSize(pathname); + const auto inputSize = Foam::fileSize(pathname); if (inputSize >= 0) { diff --git a/src/OpenFOAM/db/IOstreams/Fstreams/IFstream.C b/src/OpenFOAM/db/IOstreams/Fstreams/IFstream.C index 345734aa10..b9b9b2f318 100644 --- a/src/OpenFOAM/db/IOstreams/Fstreams/IFstream.C +++ b/src/OpenFOAM/db/IOstreams/Fstreams/IFstream.C @@ -27,7 +27,7 @@ License \*---------------------------------------------------------------------------*/ #include "IFstream.H" -#include "OSspecific.H" +#include "OSspecific.H" // For isFile(), fileSize() // * * * * * * * * * * * * * * Static Data Members * * * * * * * * * * * * * // @@ -91,6 +91,45 @@ Foam::IFstream::IFstream // * * * * * * * * * * * * * * * Member Functions * * * * * * * * * * * * * // +std::streamsize Foam::IFstream::fileSize() const +{ + const std::istream* ptr = ifstreamPointer::get(); + + if (!ptr || this->name().empty()) + { + return std::streamsize(-1); + } + + off_t fileLen = -1; + + if (IOstreamOption::COMPRESSED == ifstreamPointer::whichCompression()) + { + fileLen = Foam::fileSize(this->name() + ".gz"); + } + else + { + // TBD: special handing for wrapped icharstream + // if + // ( + // const Foam::icharstream* charstr + // = dynamic_cast(ptr)>(ptr) + // ) + // { + // return charstr->capacity(); + // } + + fileLen = Foam::fileSize(this->name()); + } + + if (fileLen >= 0) + { + return std::streamsize(fileLen); + } + + return std::streamsize(-1); +} + + std::istream& Foam::IFstream::stdStream() { std::istream* ptr = ifstreamPointer::get(); diff --git a/src/OpenFOAM/db/IOstreams/Fstreams/IFstream.H b/src/OpenFOAM/db/IOstreams/Fstreams/IFstream.H index f178fd95cc..457c850a62 100644 --- a/src/OpenFOAM/db/IOstreams/Fstreams/IFstream.H +++ b/src/OpenFOAM/db/IOstreams/Fstreams/IFstream.H @@ -6,7 +6,7 @@ \\/ M anipulation | ------------------------------------------------------------------------------- Copyright (C) 2011 OpenFOAM Foundation - Copyright (C) 2017-2021 OpenCFD Ltd. + Copyright (C) 2017-2023 OpenCFD Ltd. ------------------------------------------------------------------------------- License This file is part of OpenFOAM. @@ -94,6 +94,13 @@ public: //- Read/write access to the name of the stream using ISstream::name; + //- Return the size of the underlying file (-1 on error). + //- This corresponds to Foam::fileSize() but with extra handling of + //- compressed files. + // The return type is \c std::streamsize instead of \c off_t. + // \note Use sparingly since it involves a file stat()! + std::streamsize fileSize() const; + // STL stream diff --git a/src/OpenFOAM/db/IOstreams/Sstreams/ISstream.C b/src/OpenFOAM/db/IOstreams/Sstreams/ISstream.C index 7aa75ebd92..48d63eb49a 100644 --- a/src/OpenFOAM/db/IOstreams/Sstreams/ISstream.C +++ b/src/OpenFOAM/db/IOstreams/Sstreams/ISstream.C @@ -1051,20 +1051,7 @@ Foam::Istream& Foam::ISstream::readRaw(char* data, std::streamsize count) } else { - // Forward seek - // - use absolute positioning (see C++ notes about std::ifstream) - is_.seekg(is_.tellg() + std::istream::pos_type(count)); - - // Not sure if this is needed (as per rewind) - // some documentation indicates that ifstream needs - // seekg with values from a tellg - // - // stdStream().rdbuf()->pubseekpos - // ( - // count, - // std::ios_base::seekdir::cur, - // std::ios_base::in - // ); + is_.ignore(count); } } syncState(); @@ -1102,8 +1089,11 @@ void Foam::ISstream::rewind() stdStream().clear(); // Clear the iostate error state flags setGood(); // Sync local copy of iostate - // pubseekpos() rather than seekg() so that it works with gzstream stdStream().rdbuf()->pubseekpos(0, std::ios_base::in); + + // NOTE: this form of rewind does not work with igzstream. + // However, igzstream is usually wrapped as IFstream which has its + // own dedicated rewind treatment for igzstream. } diff --git a/src/OpenFOAM/db/IOstreams/Sstreams/ISstreamI.H b/src/OpenFOAM/db/IOstreams/Sstreams/ISstreamI.H index 84a83df8db..628e404eed 100644 --- a/src/OpenFOAM/db/IOstreams/Sstreams/ISstreamI.H +++ b/src/OpenFOAM/db/IOstreams/Sstreams/ISstreamI.H @@ -76,9 +76,10 @@ inline int Foam::ISstream::peek() inline Foam::ISstream& Foam::ISstream::getLine(std::string& str, char delim) { std::getline(is_, str, delim); + std::streamsize count = is_.gcount(); syncState(); - if (delim == '\n') + if (delim == '\n' && count > 0) { ++lineNumber_; } @@ -90,11 +91,10 @@ inline Foam::ISstream& Foam::ISstream::getLine(std::string& str, char delim) inline std::streamsize Foam::ISstream::getLine(std::nullptr_t, char delim) { is_.ignore(std::numeric_limits::max(), delim); + std::streamsize count = is_.gcount(); syncState(); - std::streamsize count = is_.gcount(); - - if (delim == '\n' && count) + if (delim == '\n' && count > 0) { ++lineNumber_; } diff --git a/src/OpenFOAM/global/fileOperations/fileOperation/fileOperationBroadcast.C b/src/OpenFOAM/global/fileOperations/fileOperation/fileOperationBroadcast.C index 2d61a91f94..f45eedbbd0 100644 --- a/src/OpenFOAM/global/fileOperations/fileOperation/fileOperationBroadcast.C +++ b/src/OpenFOAM/global/fileOperations/fileOperation/fileOperationBroadcast.C @@ -73,7 +73,11 @@ static void broadcastFile_single if (UPstream::master(comm)) { // Read (see newIFstream) - lengthAndMode.first() = Foam::fileSize(srcName); + auto fileLen = Foam::fileSize(srcName); + if (fileLen > 0) + { + lengthAndMode.first() = uint64_t(fileLen); + } lengthAndMode.second() = Foam::mode(srcName); srcStream.reset diff --git a/src/OpenFOAM/global/fileOperations/masterUncollatedFileOperation/masterUncollatedFileOperation.C b/src/OpenFOAM/global/fileOperations/masterUncollatedFileOperation/masterUncollatedFileOperation.C index 5ad84f497c..f7781c32f2 100644 --- a/src/OpenFOAM/global/fileOperations/masterUncollatedFileOperation/masterUncollatedFileOperation.C +++ b/src/OpenFOAM/global/fileOperations/masterUncollatedFileOperation/masterUncollatedFileOperation.C @@ -83,6 +83,101 @@ namespace fileOperations } +// * * * * * * * * * * * * * * * Local Functions * * * * * * * * * * * * * * // + +namespace Foam +{ + +// Get file contents (compressed or uncompressed) +static DynamicList slurpFile(IFstream& ifs) +{ + DynamicList buffer; + + auto& iss = ifs.stdStream(); + + const auto inputSize = ifs.fileSize(); + + if (IOstreamOption::COMPRESSED == ifs.compression()) + { + // For compressed files, no idea how large the result will be. + // So read chunk-wise. + // Using the compressed size for the chunk size: + // 50% compression = 2 iterations + // 66% compression = 3 iterations + // ... + + const uint64_t chunkSize = + ( + (inputSize <= 1024) + ? uint64_t(4096) + : uint64_t(2*inputSize) + ); + + uint64_t beg = 0; + + for (int iter = 1; iter < 100000; ++iter) + { + // Manual resizing to use incremental vs doubling + buffer.setCapacity(label(iter * chunkSize)); + buffer.resize(buffer.capacity()); + + ifs.readRaw(buffer.data() + beg, chunkSize); + const std::streamsize nread = iss.gcount(); + + if + ( + nread < 0 + || nread == std::numeric_limits::max() + ) + { + // Failed, but treat as normal 'done' + buffer.resize(label(beg)); + break; + } + else + { + beg += uint64_t(nread); + if (nread >= 0 && uint64_t(nread) < chunkSize) + { + // normalExit = true; + buffer.resize(label(beg)); + break; + } + } + } + } + else + { + if (inputSize >= 0) + { + buffer.setCapacity(label(inputSize)); + buffer.resize(buffer.capacity()); + + ifs.readRaw(buffer.data(), buffer.size_bytes()); + const std::streamsize nread = iss.gcount(); + + if + ( + nread < 0 + || nread == std::numeric_limits::max() + ) + { + // Failed, but treat as normal 'done' + buffer.clear(); + } + else + { + buffer.resize(label(nread)); // Safety + } + } + } + + return buffer; +} + +} // End namespace Foam + + // * * * * * * * * * * * * * Private Member Functions * * * * * * * * * * * // Foam::word @@ -447,56 +542,25 @@ void Foam::fileOperations::masterUncollatedFileOperation::readAndSend if (debug) { - Pout<< "masterUncollatedFileOperation::readAndSend :" + Info<< "masterUncollatedFileOperation::readAndSend :" << " compressed:" << bool(ifs.compression()) << " " << filePath << endl; } - if (ifs.compression() == IOstreamOption::COMPRESSED) + // Read file contents (compressed or uncompressed) into a character buffer + DynamicList buf(slurpFile(ifs)); + + for (const label proci : recvProcs) { - // Could use Foam::fileSize, estimate uncompressed size (eg, 2x) - // and then string reserve followed by string assign... - - // Uncompress and read file contents into a character buffer - const std::string buf - ( - std::istreambuf_iterator(ifs.stdStream()), - std::istreambuf_iterator() - ); - - for (const label proci : recvProcs) - { - UOPstream os(proci, pBufs); - os.write(buf.data(), buf.length()); - } - - if (debug) - { - Pout<< "masterUncollatedFileOperation::readStream :" - << " From " << filePath << " sent " << buf.size() - << " bytes" << endl; - } + UOPstream os(proci, pBufs); + os.write(buf.cdata_bytes(), buf.size_bytes()); } - else + + if (debug) { - const off_t count(Foam::fileSize(filePath)); - - // Read file contents into a character buffer - List buf(static_cast