BUG: string wrapping (eg, argList help) truncates character (#2625)

- had an off-by-one in the accounting for some corner caes,
  partly because the logic was a bit convoluted

ENH: improved string wrapping (#2625)

- reworked logic (like a state machine) to handle backtracking
  with fallback of splitting near punctuation characters.

  Still doesn't compete with nroff or TeX, but does avoid long lines
  and many funny splits.  With this change the help for mapFieldsPar
  now like this:

  =====
      Specify the mapping method
      (direct|mapNearest|cellVolumeWeight|
      correctedCellVolumeWeight)
  =====

  Since the list of options is very long without any spaces, it takes
  '|' as the best split point, which definitely reads better
This commit is contained in:
Mark Olesen 2022-11-18 20:15:54 +01:00
parent d9ab5d54ef
commit d7bf2d400d
4 changed files with 238 additions and 58 deletions

View File

@ -0,0 +1,3 @@
Test-write-wrapped-string.C
EXE = $(FOAM_USER_APPBIN)/Test-write-wrapped-string

View File

@ -0,0 +1,2 @@
/* EXE_INC = */
/* EXE_LIBS = */

View File

@ -0,0 +1,93 @@
/*---------------------------------------------------------------------------*\
========= |
\\ / F ield | OpenFOAM: The Open Source CFD Toolbox
\\ / O peration |
\\ / A nd | www.openfoam.com
\\/ M anipulation |
-------------------------------------------------------------------------------
Copyright (C) 2022 OpenCFD Ltd.
-------------------------------------------------------------------------------
License
This file is part of OpenFOAM.
OpenFOAM is free software: you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OpenFOAM is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU General Public License
along with OpenFOAM. If not, see <http://www.gnu.org/licenses/>.
Application
Test-write-wrapped-string
Description
Simple tests for wrapped strings
\*---------------------------------------------------------------------------*/
#include "argList.H"
#include "stringOps.H"
using namespace Foam;
void print(const std::string& str, std::size_t width, std::size_t indent=0)
{
auto& os = Info();
os << nl
<< "string[" << str.size() << "]" << nl
<< str.c_str() << "<<<<" << nl
<< "indent:" << indent << " width:" << width << endl;
for (size_t i = 0; i < width; ++i)
{
os << '=';
}
os << endl;
stringOps::writeWrapped(os, str, width, indent);
for (size_t i = 0; i < width; ++i)
{
os << '=';
}
os << endl;
}
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
int main(int argc, char *argv[])
{
argList::noBanner();
argList::noParallel();
#include "setRootCase.H"
{
string test =
"123456789-12345\n\n"
"6789-12\t"
"xyz3456789-1234 56789-123456789-";
print(test, 10, 4);
}
{
string test = "ABCDEFGHI";
print(test, 10, 4);
}
Info<< "\nEnd\n" << endl;
return 0;
}
// ************************************************************************* //

View File

@ -1217,12 +1217,40 @@ void Foam::stringOps::writeWrapped
const bool escape const bool escape
) )
{ {
const auto len = str.length(); // Disabled below some minimal lower limit
if (width <= 8)
{
char c = 0;
std::string::size_type pos = 0; const auto len = str.size();
// Handle leading newlines for (std::string::size_type pos = 0; pos < len; ++pos)
while (str[pos] == '\n' && pos < len) {
c = str[pos];
if (escape && c == '\\')
{
os << '\\';
}
os << c;
}
// Trailing newline for non-empty string and if still pending
if (len && c != '\n')
{
os << '\n';
}
return;
}
// Normal case
std::size_t pos = 0;
const auto len = str.size();
// Output leading newlines without any indention
while (pos < len && str[pos] == '\n')
{ {
os << '\n'; os << '\n';
++pos; ++pos;
@ -1230,85 +1258,139 @@ void Foam::stringOps::writeWrapped
while (pos < len) while (pos < len)
{ {
// Potential end point and next point // Potential end point, break point and next point
std::string::size_type end = pos + width - 1; std::string::size_type endp = pos + width;
std::string::size_type eol = str.find('\n', pos); std::string::size_type breakp = str.find('\n', pos);
std::string::size_type next = string::npos; std::string::size_type nextp = endp;
if (end >= len) if (std::string::npos != breakp && breakp < endp)
{ {
// No more wrapping needed // Embedded line break
end = len; endp = breakp;
nextp = breakp + 1; // Skip this newline in the next chunk
if (std::string::npos != eol && eol <= end) // Trim trailing space
while
(
(endp > pos)
&& (str[endp-1] == ' ' || str[endp-1] == '\t')
)
{ {
// Manual '\n' break, next follows it (default behaviour) --endp;
end = eol;
} }
} }
else if (std::string::npos != eol && eol <= end) else if (endp >= len)
{ {
// Manual '\n' break, next follows it (default behaviour) // Can output the rest without any wrapping, no line-breaks
end = eol; nextp = endp = len;
}
else if (isspace(str[end]))
{
// Ended on a space - can use this directly
next = str.find_first_not_of(" \t\n", end); // Next non-space
}
else if (isspace(str[end+1]))
{
// The next one is a space - so we are okay
++end; // Otherwise the length is wrong
next = str.find_first_not_of(" \t\n", end); // Next non-space
} }
else else
{ {
// Line break will be mid-word // Find a good point to break the string
auto prev = str.find_last_of(" \t\n", end); // Prev word break // try to find space/tab, or use punctuation as a fallback
if (std::string::npos != prev && prev > pos) breakp = nextp = endp;
std::string::size_type punc = std::string::npos;
// Backtrack to find whitespace
bool foundBreak = false;
while (breakp > pos)
{ {
end = prev; --breakp;
next = prev + 1; // Continue from here
}
}
// The next position to continue from const char c = str[breakp];
if (std::string::npos == next)
{
next = end + 1;
}
// Has a length if (c == ' ' || c == '\t')
if (end > pos)
{
// Indent following lines.
// The first one was already done prior to calling this routine.
if (pos)
{
for (std::string::size_type i = 0; i < indent; ++i)
{ {
os <<' '; foundBreak = true;
endp = breakp;
// Found a space, but continue loop anyhow
// (trims trailing space)
}
else if (foundBreak)
{
// Non-whitespace encountered while consuming
// trailing space. We are done
break;
}
else
{
// Potentially viable as last non-whitespace?
nextp = breakp;
// Remember if we see any punctuation characters
// - useful later as fallback
if (punc == std::string::npos)
{
switch (c)
{
// Break before the punctuation
case '(' : case '<' :
{
punc = breakp;
break;
}
// Break after the punctuation
case ')' : case '>' :
case ',' : case '.' :
case ':' : case ';' :
case '/' : case '|' :
{
punc = (breakp + 1);
break;
}
}
}
} }
} }
while (pos < end) if (!foundBreak)
{ {
const char c = str[pos]; // No whitespace breaks, but perhaps a punctuation break.
// Otherwise can't do much else
if (escape && c == '\\') if (punc != std::string::npos)
{ {
os << '\\'; nextp = endp = punc;
}
else
{
nextp = endp;
} }
os << c;
++pos;
} }
os << nl;
} }
pos = next;
// Output
// ~~~~~~
// Indent subsequent lines.
// - assuming the one was done prior to calling this routine.
// - no extra indent if it will only have a newline
if (pos && (pos < endp))
{
// Put indent
for (std::string::size_type i = 0; i < indent; ++i)
{
os << ' ';
}
}
while (pos < endp)
{
const char c = str[pos];
if (escape && c == '\\')
{
os << '\\';
}
os << c;
++pos;
}
os << nl;
pos = nextp;
} }
} }